# Battle Royale Environment Trainer
This notebook is for training Battle Royale agents. MADDPG is used for training the agents.

## Setup Environment Dependencies

In [1]:
import sys
from gym_unity.envs import UnityEnv

print("Python version:")
print(sys.version)
print(sys.executable)

# check Python version
if (sys.version_info[0] < 3):
    raise Exception("ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3")

Python version:
3.7.6 (default, Jan  8 2020, 13:42:34) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
/Users/adhipradhana/anaconda3/envs/unity-battle-royale/bin/python


## Start Environment

In [19]:
# Environment name
# Remember to put battle royale environment configuration within the config folder
env_name = "environment/battle-royale-static"

env = UnityEnv(env_name, worker_id=3, use_visual=False, multiagent=True)

print(str(env))

INFO:mlagents_envs:Connected new brain:
PlayerBrain?team=0
INFO:gym_unity:3 agents within environment.


<UnityEnv instance>




## Testing Model

### Setup Algorithm Dependencies

In [8]:
import torch
import numpy as np

from utils.MADDPG import MADDPG

### Model Variables

In [25]:
random_seed = 6272727
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
n_agents = env.number_agents
n_episode = 100
max_steps = 2000
buffer_capacity = 1000000
batch_size = 1000
episodes_before_train = 100

In [26]:
# setup seed
torch.manual_seed(random_seed)
np.random.seed(random_seed)

maddpg = MADDPG(n_agents, n_states, n_actions, batch_size, buffer_capacity, episodes_before_train)

FloatTensor = torch.cuda.FloatTensor if maddpg.use_cuda else torch.FloatTensor

## Loading Model

In [27]:
import os

path = os.path.join(os.getcwd(), 'checkpoint', 'Time_2020-03-26_12.02.52.676306_NAgent_3', 'Time_2020-03-26_12.02.52.676306_NAgent_3_Episode_1500.pth')
maddpg.load(path, map_location='cpu')

## Run Model

In [28]:
print("Testing model...")
for i_episode in range(n_episode):
    # reset environment
    obs = env.reset()
    obs = np.stack(obs)
    
    # convert observation to tensor
    if isinstance(obs, np.ndarray):
        obs = torch.from_numpy(obs).float()
    
    total_reward = 0.0
    for i_step in range(max_steps):
        obs = obs.type(FloatTensor)
        actions = maddpg.select_action(obs).data.cpu()
        actions_list = actions.tolist()
        
        print(actions_list[0][2])
        
        obs_, reward, done, _ = env.step(actions_list)
        
        reward = torch.FloatTensor(reward).type(FloatTensor)
        obs_ = np.stack(obs_)
        obs_ = torch.from_numpy(obs_).float()
        if i_step != max_steps - 1:
            next_obs = obs_
        else:
            next_obs = None

        total_reward += reward.sum()     
        obs = next_obs

        # check if done
        if True in done:
            break

    maddpg.episode_done += 1
    print("Episode: {}, reward = {}".format(i_episode, total_reward))

Testing model...
0.8955457210540771
0.9497663378715515
0.4655416011810303
1.0
1.0
-0.967146635055542
-0.9147393107414246
0.9663916230201721
0.9230995178222656
0.8515515327453613
1.0
0.8615862131118774
1.0
1.0
1.0
1.0
0.9263032078742981
1.0
0.97507244348526
1.0
0.8830512762069702
0.9033214449882507
1.0
1.0
0.8064382076263428
0.9357988834381104
1.0
0.8923037052154541
1.0
0.6510396599769592
1.0
-0.9778864979743958
-0.9364740252494812
-1.0
-0.9508917331695557
-0.9742867946624756
-1.0
-1.0
-1.0
-0.9678910374641418
-0.9456737637519836
-0.933405876159668
-1.0
-0.9033561944961548
-1.0
-1.0
-0.9192863702774048
0.9724539518356323
1.0
-0.9498558640480042
-1.0
1.0
0.9782973527908325
0.9239950180053711
-1.0
-1.0
0.8520993590354919
0.9920393824577332
0.769230842590332
-0.9744117259979248
-0.9378653168678284
0.6474754214286804
1.0
0.6822178363800049
-1.0
-1.0
0.010371528565883636
0.9469529390335083
0.36109796166419983
-0.9852259755134583
-1.0
1.0
1.0
-0.6643657684326172
-0.961923360824585
-1.0
-0.943

-1.0
-1.0
-1.0
-0.9858958721160889
-1.0
-0.9926863312721252
-1.0
-1.0
-1.0
-1.0
-0.9892193078994751
-1.0
-1.0
-0.9586654305458069
-0.9342800974845886
-1.0
-0.893067479133606
-1.0
-1.0
-1.0
-1.0
-0.9430586695671082
-0.913823127746582
-0.9666717052459717
-0.9864831566810608
-1.0
-0.9683259129524231
-1.0
-1.0
-1.0
-0.8612971901893616
-1.0
-0.9815718531608582
-1.0
-1.0
-1.0
-0.9937057495117188
-0.9839379191398621
-0.9636118412017822
-0.9173116087913513
-1.0
-0.9477412700653076
-0.9420682787895203
-0.9472416043281555
-0.974312961101532
-1.0
-1.0
-1.0
-0.9931994676589966
-0.9912249445915222
-1.0
-0.991340160369873
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-0.841515064239502
-0.9567009806632996
-0.90728360414505
-0.8950555324554443
-0.9476830959320068
-1.0
-1.0
-0.9963922500610352
-1.0
-0.823550820350647
-1.0
-1.0
-1.0
-1.0
-1.0
-0.9176468849182129
-1.0
-0.9859710931777954
-0.9872267842292786
-1.0
-1.0
-0.9791394472122192
-0.9996951222419739
-1.0
-1.0
-1.0
-1.0
-1.0
-0.

-1.0
-0.9779236912727356
-1.0
-1.0
-0.9681978225708008
-0.9118800163269043
-0.9330923557281494
-1.0
-1.0
-1.0
-0.8884677886962891
-0.9503512978553772
-0.9706547260284424
-0.8899533748626709
1.0
-0.8986659049987793
-1.0
1.0
-0.9036774635314941
0.9187477827072144
-0.9641752243041992
-0.9841541647911072
-1.0
-0.9259243011474609
-1.0
-1.0
-0.9288363456726074
-1.0
-0.9919822812080383
-0.979007363319397
-0.9837707877159119
-0.9414693713188171
-0.9335199594497681
-0.9011801481246948
-1.0
-0.9498353004455566
-1.0
-1.0
-0.9578529596328735
-0.9439718127250671
-0.9590975046157837
-1.0
-0.9733408689498901
-1.0
-0.9750896692276001
-0.9897712469100952
-1.0
-0.977799117565155
-1.0
-0.9772717356681824
-0.9456889629364014
-0.9760700464248657
-1.0
-1.0
-0.9641290903091431
-0.8647916316986084
-1.0
0.9110540151596069
-1.0
0.9737873673439026
-0.9082230925559998
-1.0
-1.0
-1.0
-0.9932190179824829
-0.9213986396789551
-1.0
-1.0
-1.0
-0.9651662111282349
-1.0
-1.0
-0.9964051246643066
-0.9719884991645813
-0.9642

0.9888535141944885
-1.0
-1.0
-1.0
0.9788341522216797
0.9175946712493896
1.0
-1.0
-0.9312809705734253
-1.0
0.979651689529419
0.9212884902954102
0.8818663954734802
-0.9611261487007141
-0.8976626396179199
-1.0
1.0
0.8957435488700867
0.992242157459259
-1.0
-0.9593608975410461
-0.3074580430984497
1.0
1.0
-0.8832142949104309
-0.9912375211715698
-0.5074163675308228
0.9598045945167542
-0.9844732880592346
-0.983587920665741
-1.0
0.9663642644882202
0.9917415380477905
0.9154030084609985
-1.0
-0.9056621193885803
0.8573721051216125
1.0
1.0
-1.0
-1.0
0.2731197476387024
1.0
1.0
-0.9869223237037659
-1.0
0.28655755519866943
1.0
1.0
-1.0
-0.8850858211517334
0.11887333542108536
1.0
0.9979087114334106
-1.0
-0.9221075773239136
0.15559552609920502
1.0
1.0
-1.0
-0.9714813232421875
0.18703360855579376
0.9461751580238342
1.0
-0.9785865545272827
-1.0
0.729838490486145
1.0
1.0
-1.0
-1.0
0.1171928197145462
0.9720678925514221
1.0
-1.0
-0.9791065454483032
0.45788276195526123
1.0
0.9214658737182617
-0.97233730554580

0.9683406949043274
-0.984459400177002
1.0
-1.0
-0.9189928770065308
0.9476762413978577
0.9189671277999878
0.9081195592880249
-0.982373058795929
-0.9519516229629517
1.0
0.9606972932815552
-0.9476451277732849
-1.0
-0.9648477435112
1.0
0.8554126620292664
0.8757956027984619
-0.9563997983932495
-0.8563830852508545
1.0
0.9242749214172363
-0.7129802703857422
-0.8885015249252319
-1.0
1.0
-1.0
-1.0
1.0
1.0
-0.9124523997306824
-0.9551695585250854
1.0
0.9584977626800537
-0.9604697823524475
-0.9140603542327881
-1.0
-1.0
1.0
0.9048636555671692
0.985606849193573
-0.9548596143722534
-1.0
0.9483389258384705
-0.828136682510376
-1.0
-0.9516587257385254
-1.0
-1.0
0.9491528868675232
0.9705550670623779
-1.0
0.9128310680389404
-0.9570267200469971
-1.0
1.0
0.9781901240348816
-1.0
-1.0
-1.0
-1.0
-1.0
-0.63394695520401
0.8719057440757751
-1.0
-0.986626923084259
0.9103977680206299
0.9536461234092712
1.0
-1.0
-0.9526780843734741
-1.0
1.0
1.0
0.9468234181404114
-0.9794410467147827
-0.9654290676116943
-0.8952928781

KeyboardInterrupt: 

## Close Environment

In [29]:
env.close()

INFO:mlagents_envs:Environment shut down with return code 0.
