# Battle Royale Environment Trainer
This notebook is for training Battle Royale agents. MADDPG is used for training the agents.

## Setup Environment Dependencies

In [1]:
import sys
from gym_unity.envs import UnityEnv

print("Python version:")
print(sys.version)
print(sys.executable)

# check Python version
if (sys.version_info[0] < 3):
    raise Exception("ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3")

Python version:
3.7.6 (default, Jan  8 2020, 13:42:34) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
/Users/adhipradhana/anaconda3/envs/unity-battle-royale/bin/python


## Start Environment

In [2]:
# Environment name
# Remember to put battle royale environment configuration within the config folder
env_name = "environment/battle-royale-static"

env = UnityEnv(env_name, worker_id=3, use_visual=False, multiagent=True)

print(str(env))

INFO:mlagents_envs:Connected new brain:
PlayerBrain?team=0
INFO:gym_unity:2 agents within environment.


<UnityEnv instance>




## Examine Observation Space

In [3]:
# Examine observation space
observation = env.observation_space
print("Agent observation space type: {}".format(observation))

Agent observation space type: Box(42,)


## Examine Action Space

In [4]:
# Examine action space
action = env.action_space
print("Agent action space type: {}".format(action))

Agent action space type: Box(5,)


## Agents Training
This part shows agent training using MADDPG algoritm

### Setup Algorithm Dependencies

In [5]:
from datetime import datetime
import torch
import visdom
import numpy as np

from utils.MADDPG import MADDPG
from utils.RandomProcess import OUNoise

### Setup Algoritm Parameters

In [6]:
random_seed = 4966
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
n_agents = env.number_agents
n_episode = 10000
max_steps = 10000
buffer_capacity = 1000000
batch_size = 1000
episodes_before_train = 100
checkpoint_episode = 500

### Setup MADDPG

In [7]:
# setup seed
torch.manual_seed(random_seed)
np.random.seed(random_seed)

maddpg = MADDPG(n_agents, n_states, n_actions, batch_size, buffer_capacity, episodes_before_train)
noise = OUNoise(env.action_space)

FloatTensor = torch.cuda.FloatTensor if maddpg.use_cuda else torch.FloatTensor

vis = visdom.Visdom(port=8097)

INFO:visdom:Visdom successfully connected to server


### MADDPG Training

In [8]:
win = None
current_time = str(datetime.now())
reward_record = []

print("Exploration begins...")
for i_episode in range(n_episode):
    # reset environment
    obs = env.reset()
    obs = np.stack(obs)
    noise.reset()
    
    # convert observation to tensor
    if isinstance(obs, np.ndarray):
        obs = torch.from_numpy(obs).float()
    
    total_reward = 0.0
    rr = np.zeros((n_agents,))
    for i_step in range(max_steps):
        obs = obs.type(FloatTensor)
        actions = maddpg.select_action(obs).data.cpu()
        actions_list = [noise.get_action(action) for action in actions.tolist()]
        
        obs_, reward, done, _ = env.step(actions_list)

        reward = torch.FloatTensor(reward).type(FloatTensor)
        obs_ = np.stack(obs_)
        obs_ = torch.from_numpy(obs_).float()
        if i_step != max_steps - 1:
            next_obs = obs_
        else:
            next_obs = None

        total_reward += reward.sum()
        rr += reward.cpu().numpy()
        maddpg.memory.push(obs.data, actions, next_obs, reward)
        
        obs = next_obs

        c_loss, a_loss = maddpg.update_policy()
        
        # check if done
        if True in done:
            break

    maddpg.episode_done += 1
    print("Episode: {}, reward = {}".format(i_episode, total_reward))
    reward_record.append(total_reward)
    
    if maddpg.episode_done == maddpg.episodes_before_train:
        print("Training begins...")
        print("MADDPG on Battle Royale")
              
    if win is None:
        win = vis.line(X=np.arange(i_episode, i_episode+1),
                       Y=np.array([
                           np.append(total_reward, rr)]),
                       opts=dict(
                           ylabel="Reward",
                           xlabel="Episode",
                           title="MADDPG on Battle Royale | " + \
                               "Agent: {} | ".format(n_agents) + \
                               "Time: {}\n".format(current_time),
                           legend=["Total"] +
                           ["Agent-".format(i) for i in range(n_agents)]))
    else:
        vis.line(X=np.array(
            [np.array(i_episode).repeat(n_agents+1)]),
                 Y=np.array([np.append(total_reward,rr)]),
                 win=win,
                 update="append")
        
    # save model
    if (maddpg.episode_done % checkpoint_episode == 0):
        maddpg.save(current_time, maddpg.episode_done)

Exploration begins...
[array([-0.08441044, -0.20764791, -0.23965168, -0.29216411,  0.27108342]), array([ 0.40103958, -0.04510969, -0.7979477 , -0.20805811,  0.56222736])]
[array([ 0.41244246,  0.30590602, -0.667995  , -0.55316537, -0.08317485]), array([ 0.23903182,  0.48487373, -0.89018854, -0.46787762, -0.04969698])]
[array([-0.19765583,  0.12016494, -0.39692483, -0.13400011, -0.3605234 ]), array([-0.40468617,  0.24058521, -0.40869578,  0.11015745, -0.71845313])]
[array([-0.77302328,  0.32556389, -0.71018864,  0.18921766, -0.23763466]), array([-0.53240015,  0.05565484, -1.        ,  0.58792622, -0.35646091])]
[array([-0.33199257,  0.13848355, -0.88417492, -0.17022712, -0.60771479]), array([-0.65450354,  0.57577517, -0.6572314 ,  0.0220359 , -0.26139889])]
[array([-0.42866651, -0.11558449, -0.14841768,  0.0580049 , -0.31929027]), array([-0.43345861, -0.0351966 , -0.58170981,  0.71379316, -0.20146522])]
[array([-0.59921394, -0.23549055, -0.01859365,  0.31834103, -0.30525041]), array([ 0

[array([-0.13760301,  0.76099468, -0.5497842 , -0.21343789, -0.92473257]), array([-0.01007869,  0.8933761 , -0.51473364, -0.1982269 , -0.71643981])]
[array([-0.19881167,  0.88107425, -0.17068915, -0.13914051, -0.8519422 ]), array([-0.57792872,  1.        , -0.48627895, -0.08424448, -0.49944818])]
[array([-0.02752746,  0.4153739 ,  0.6074765 ,  0.04948721, -0.950416  ]), array([ 0.10192003,  0.52930006,  0.3254192 ,  0.38146983, -0.5721117 ])]
[array([-0.36272698,  0.59804188,  0.32239255,  0.36349408, -0.25340065]), array([-0.22800357,  0.73556022,  0.549617  ,  0.16277357,  0.01409177])]
[array([ 0.07451245,  0.60939094,  0.55019028,  0.25804223, -0.32948761]), array([ 0.3659759 ,  0.53147795,  0.59132376,  0.26040741, -0.31425869])]
[array([0.53928065, 0.48536914, 0.97992319, 0.1025431 , 0.07574121]), array([4.26560597e-01, 5.27171772e-01, 6.16903138e-01, 2.39089024e-04,
       1.80133831e-01])]
[array([ 0.27795713,  0.91792386,  1.        ,  0.02732968, -0.01712593]), array([ 0.0875

[array([-0.0721583 ,  0.45259639,  0.2323764 ,  0.48299705,  0.53499498]), array([ 0.18581025,  0.43786728, -0.43418753,  0.31715825,  0.36913162])]
[array([-0.22607969, -0.13441059, -0.26492325,  0.49603632,  0.27164999]), array([-0.32118442,  0.07346995,  0.12262088,  0.40333046,  0.55230626])]
[array([ 0.06042648, -0.25513552,  0.38097135,  0.40125308,  0.43911344]), array([ 0.1770906 , -0.35399095,  0.01020312,  0.5119757 ,  0.53472813])]
[array([-0.30728025, -0.33005434,  0.04765458,  0.28095811,  0.43513234]), array([-0.03479556, -0.18962138,  0.14668763,  0.5942374 ,  0.08482245])]
[array([-0.19909139, -0.28283437,  0.26847853,  0.24552636, -0.36330724]), array([-0.72297512,  0.14786409,  0.32112901,  0.51895949, -0.65188167])]
[array([-0.00712654, -0.17916584,  0.36898169,  0.1515915 , -0.14196384]), array([-0.27743011, -0.15898504,  0.4411967 ,  0.13060977,  0.27953715])]
[array([-0.17254724, -0.43741892,  0.57443987,  0.0232925 ,  0.56449189]), array([-0.10349716, -0.02838486

[array([-0.32900458,  0.13093999,  0.2884713 ,  0.83001412, -0.59197982]), array([-0.06015304, -0.31267924,  0.0618486 ,  0.79904782, -0.40269484])]
[array([-0.72287948, -0.30146078,  0.25463302, -0.21663489, -0.52358368]), array([-0.3765917 , -0.0051383 ,  0.07830029, -0.07478315,  0.31109271])]
[array([-0.10940429,  0.88614164,  0.39346844, -0.50368875, -0.16676142]), array([-0.29683451,  0.69228704, -0.12808117, -0.29739748,  0.29492935])]
[array([-0.54362094,  0.31121714,  0.51054643, -0.33952003,  0.48506108]), array([-0.73841651, -0.01633973,  0.30550986, -0.60857809,  0.89280945])]
[array([-1.        ,  0.16549709,  0.66546241, -0.69216901,  0.72363077]), array([-1.        ,  0.37948146,  0.56051387, -0.35218688,  0.8414182 ])]
[array([-1.        , -0.04739555,  0.51258362, -0.62380042,  0.48888296]), array([-0.86099692, -0.0133994 ,  0.64029436,  0.01958323,  0.65336639])]
[array([-0.98646268, -0.51443915,  0.67811183,  0.41292879,  0.73407838]), array([-0.61352075, -0.363541  

[array([-0.21507905,  0.43760582, -0.0915998 , -0.0298505 , -0.52465792]), array([ 0.12476272,  0.55282663,  0.34040926, -0.04421595, -0.45068074])]
[array([-0.25678533,  0.38553708,  0.31880818,  0.2995756 , -0.69214585]), array([-0.56005629,  0.00211326,  0.31181583,  0.22772675, -0.67832818])]
[array([-0.52490185, -0.06558036,  0.42597597,  0.02121843, -0.48234791]), array([-0.39001171, -0.4553681 ,  0.34690121, -0.25325233, -0.02803801])]
[array([-6.40580490e-01, -4.94167856e-01,  1.34499618e-01, -9.55218981e-01,
       -1.34353318e-04]), array([ 0.32170934, -0.31517598, -0.09054214, -0.55758024,  0.43377845])]
[array([ 0.55378506, -0.52622705,  0.71688419, -0.32817796,  0.33893405]), array([ 0.59604459, -0.35316785,  0.17334634, -0.08259276,  0.34823092])]
[array([ 0.82063252, -0.39850474,  0.94037115, -0.4367205 ,  0.37888656]), array([ 0.53240574, -0.30855834,  0.77646721, -0.51518975,  0.00659845])]
[array([-0.35297834, -0.57079998,  1.        , -0.29502969, -0.57998421]), arra

[array([ 0.76666392, -0.29469178,  0.15733377, -0.44537298,  0.18447355]), array([ 0.45402057, -0.52277504,  0.20033327, -0.57616262,  0.06072747])]
[array([ 0.43069493, -0.40481924,  0.52063715, -0.9271945 , -0.02551392]), array([ 0.22106508, -0.48986328,  0.15688595, -0.5163148 , -0.35012722])]
[array([ 0.09635173, -0.611046  ,  0.37024142, -1.        , -0.00785944]), array([ 0.06434177, -0.45522557,  0.49297077, -1.        ,  0.08967351])]
[array([-0.391342  , -0.79749617,  0.55828045, -1.        , -0.56482309]), array([ 0.58569569, -0.61293369, -0.13586717, -0.71378394, -0.92079318])]
[array([ 0.79050816, -0.35738967,  0.25917263, -0.67144037, -0.61246363]), array([ 0.73704095, -0.55128567,  0.14877581, -0.88645005, -0.49499729])]
[array([ 0.29942989, -0.11944123,  0.25429057, -0.8277467 , -0.69760371]), array([ 0.44349876,  0.33485443,  0.70883541, -0.52053974,  0.07783389])]
[array([ 0.73374047,  0.10449505,  0.5324595 , -0.95740977,  0.0657226 ]), array([ 0.25487694, -0.02478194

[array([ 0.41766731,  0.0992195 , -0.31610468,  0.39455831,  0.29883065]), array([ 0.30795595, -0.01578644, -0.32426741,  0.11349452,  0.52515689])]
[array([-0.20988377,  0.25092443,  0.18525087,  0.22614604,  0.5520224 ]), array([-0.58541968, -0.05819712,  0.26262313,  0.0573263 ,  0.60885581])]
[array([-0.85557112,  0.02627253, -0.08924273, -0.06122016,  0.68969253]), array([ 0.14537771,  0.48905087, -0.55353597,  0.10254811,  0.32671212])]
[array([ 0.23458577,  0.67542439, -0.25701405,  0.2704722 , -0.32080713]), array([-0.00426419,  0.9366237 , -0.7835168 ,  0.0426315 ,  0.09845046])]
[array([-0.01489402,  0.68323619, -0.56677122, -0.13681434,  0.17616003]), array([ 0.79466283,  0.92901598, -0.99315618, -0.11870722,  0.58576321])]
[array([ 0.18746653,  0.99149706, -0.44473877,  0.13590072,  0.40098389]), array([-0.06090056,  0.82997912, -0.40610793,  0.27378214,  0.53027868])]
[array([-0.27350025,  1.        ,  0.06844963, -0.29979191,  0.25134136]), array([-0.63124265,  1.        

[array([-0.18583429,  0.41114846,  0.07793742,  0.17075791,  0.36915971]), array([-0.56251996,  0.36680072, -0.13407889,  0.04414452,  0.6907732 ])]
[array([-0.14193635,  0.57140773,  0.71567913, -0.21543282,  0.64612301]), array([ 0.18805036,  0.99798472,  0.43948629, -0.32255751,  0.67483607])]
[array([-0.10674175,  0.40234317,  0.01529133, -0.36298126,  0.315587  ]), array([ 0.02012182,  0.74126451,  0.01576949, -0.89230475,  1.        ])]
[array([-0.19950765,  0.68663499,  0.05534011, -1.        ,  1.        ]), array([ 0.20776148,  0.82059229,  0.18731576, -0.52061228,  1.        ])]
[array([ 0.12951882,  0.63365596,  0.37512075, -0.45101251,  0.98613144]), array([ 0.05343437,  0.75092726, -0.01083805, -0.26608807,  1.        ])]
[array([-0.27163843,  0.42367221, -0.10798838, -0.98096502,  1.        ]), array([ 0.19001328,  0.67893458, -0.02074798, -0.94123884,  1.        ])]
[array([ 0.13895336, -0.15720081,  0.0301158 , -1.        ,  0.40459809]), array([ 0.08050817,  0.2000863 

[array([ 0.6576032 , -0.25714632,  1.        , -0.35284058, -0.5094448 ]), array([ 0.42837953,  0.15191267,  1.        , -0.34299525,  0.13684406])]
[array([ 0.55112299, -0.02654968,  1.        , -0.26560782,  0.10900052]), array([ 0.1419345 , -0.11796547,  0.44186561, -0.48624749, -0.21143512])]
[array([ 0.01311775,  0.08692962,  0.71039997, -0.2980828 , -0.30130873]), array([-0.16357284,  0.53557892,  0.59819201,  0.05388328, -0.23826076])]
[array([ 0.07946705,  0.49727862,  1.        ,  0.11177994, -0.82263273]), array([ 0.39926203, -0.36388045,  0.38997101,  0.51183879, -0.68101725])]
[array([ 0.24331864, -0.79712505,  0.18220153,  0.28644911, -0.43355056]), array([ 0.54686565, -0.77696128,  0.48633336,  0.050751  , -0.36146894])]
[array([ 0.1128125 ,  0.0043411 ,  0.54613301, -0.50990371,  0.27586603]), array([ 0.23204279, -0.17248066,  0.30112503, -0.63885042,  0.60114143])]
[array([-0.0676215 ,  0.15288119, -0.0164622 , -1.        ,  0.65857105]), array([ 0.07172035,  0.43749499

[array([ 9.16022496e-01,  4.09719258e-01,  1.85233696e-01, -9.98751107e-01,
       -8.45018445e-04]), array([ 1.        ,  0.64947385, -0.28943024, -0.50220581, -0.13070626])]
[array([ 1.        ,  0.11050027, -0.17392831, -0.6459376 ,  0.06090822]), array([ 0.83016395, -0.11969251, -0.65995624, -0.43002658,  0.18858118])]
[array([ 0.59610586, -0.04856132,  0.0506698 , -0.33435835, -0.20809615]), array([ 0.62911052, -0.06943925, -0.64450107,  0.32119592, -0.23171706])]
[array([ 0.12723758,  0.00606662, -0.29890943,  0.30157374, -0.44195428]), array([ 0.78955971,  0.16577847, -0.24377664,  0.51965228, -0.30158574])]
[array([ 0.63533919, -0.09315739, -0.44745932,  0.21578042, -0.06671809]), array([ 1.        ,  0.32504969, -0.06332016, -0.29299004, -0.33952545])]
[array([ 0.65840391,  0.38979011,  0.22584358, -0.22185057, -0.05035796]), array([ 0.48989432,  0.51119979, -0.34934424,  0.11257407, -0.04394521])]
[array([ 0.42185963,  0.73152863, -0.4556    ,  0.02935327,  0.35720226]), arra

[array([ 0.87143684, -0.71762507,  0.5719803 ,  0.54207982, -0.64751507]), array([ 0.98188175, -0.79348201,  0.18018155,  0.89849713, -0.08379855])]
[array([ 0.74431567, -1.        ,  0.20499765,  0.87371699, -0.20869828]), array([ 0.30569667, -0.93603026, -0.06096082,  0.82091658,  0.34087871])]
[array([-0.29114257, -1.        ,  0.401148  ,  1.        ,  0.23337709]), array([-0.12154542, -1.        , -0.14271722,  0.95663448,  0.128883  ])]
[array([ 0.1556538 , -0.99770594, -0.45420358,  0.73477529, -0.1479632 ]), array([-0.19347402, -0.26722794, -0.48726821,  0.48720345, -0.07847017])]
[array([-0.26313586,  0.26739928,  0.01704818,  0.52653569, -0.30914033]), array([-0.49722968,  0.34555296, -0.33273716,  0.65134495, -0.06529024])]
[array([-0.94089068,  0.15703633,  0.302653  ,  0.64188327, -0.00913893]), array([-0.39691801,  0.26886349,  0.22449978,  0.03190238, -0.18536434])]
[array([-1.        , -0.00259862,  0.80729686, -0.16012136,  0.16269211]), array([-0.67518658, -0.15077938

[array([ 0.34358921, -0.12258127,  0.31852195, -0.48290571, -0.32135404]), array([ 1.64641378e-01,  3.65812726e-04,  2.85146334e-01, -6.61944866e-01,
        2.00440550e-01])]
[array([ 0.54010001,  0.21748275,  0.79530624, -0.51588434, -0.00824366]), array([ 0.30913508,  0.05107481,  0.51533837, -0.12478314, -0.05343832])]
[array([-0.07395084, -0.18495227,  0.74528507, -0.43197607, -0.55916899]), array([-0.21049693, -0.41128671,  0.68384083,  0.0034727 , -0.58418586])]
[array([-0.12464882, -0.93892028,  0.94349582, -0.55324145, -0.54015386]), array([ 0.33472805, -0.84518285,  0.40445264, -0.41153362, -0.20090795])]
[array([-0.03446204, -0.76345822,  0.7251622 ,  0.05699409, -0.25773019]), array([-0.32139895, -0.75733348,  0.25056077, -0.05319696, -0.56466261])]
[array([-0.2245056 , -0.88848257,  0.20040107, -0.4003756 , -0.25567298]), array([ 0.5474759 , -0.52447681, -0.05608353, -0.82714776, -0.09527237])]
[array([ 0.38797482, -0.67226291, -0.14609038, -0.44153518,  0.27361121]), arra

[array([ 0.25487739,  0.35750369, -0.0613168 , -0.76723156,  0.22832381]), array([-0.05229002,  0.18229387, -0.95265672, -0.18186327,  0.45634594])]
[array([-0.35157906,  0.41529136, -0.73862504,  0.11657115, -0.16481503]), array([-0.65024194, -0.48082367, -1.        ,  0.4485768 ,  0.15628357])]
[array([-0.8398512 ,  0.04684032, -0.4186543 ,  0.15445285, -0.3842596 ]), array([-0.67877732,  0.17665695, -0.7247524 ,  0.3113205 , -0.61381273])]
[array([-0.80211777, -0.32915998, -0.68534119,  0.20972354, -0.3836061 ]), array([-0.51284367, -0.49899832, -1.        ,  0.84132368,  0.2035042 ])]
[array([-0.64214349, -0.63858831, -1.        ,  0.45809832,  0.17050967]), array([ 0.03239449, -0.62711051, -1.        , -0.01307755,  0.51803562])]
[array([ 0.24151681, -0.56663802, -0.35031684, -0.45015627,  0.31732422]), array([ 0.24197563, -0.78862127, -0.52490063,  0.00625318, -0.20662431])]
[array([ 0.64456433, -0.49093434, -0.17872837, -0.03695248,  0.19118029]), array([ 0.70080879, -0.22728172

[array([-0.03086798,  0.43553843,  0.07853925, -0.52354886,  0.00367431]), array([ 0.64007512,  0.0359234 ,  0.01354246,  0.09111711, -0.09024676])]
[array([ 0.51257947, -0.51761163,  0.83774172, -0.12265893, -0.92312998]), array([ 0.63610885,  0.06462618,  0.1465385 , -0.35129695, -0.67862976])]
[array([ 0.26222344, -0.21751392, -0.11819315, -0.03473032, -0.65661493]), array([ 0.29755596,  0.01782225, -0.58696408,  0.30960522, -0.03959227])]
[array([ 0.02220218,  0.2627417 , -0.58008536,  0.1955539 , -0.02006537]), array([-0.04807555,  0.04726703, -0.6343395 ,  0.99065022,  0.01338723])]
[array([-0.6083048 ,  0.20499311, -0.07575941,  0.77059023,  0.27054451]), array([-0.76442879, -0.37379106, -0.90119285,  1.        ,  0.40561934])]
[array([-0.40793301, -0.9215697 , -0.71977641,  0.57988648,  0.4554807 ]), array([-0.34837123, -1.        , -0.87274161,  0.61344626,  0.41354155])]
[array([-0.1705632 , -0.97015838, -0.07643207,  0.67084983, -0.13640832]), array([-0.24409826, -0.62131487

[array([ 1.        , -0.47144798,  0.087926  , -0.32569525,  1.        ]), array([ 0.94692983, -0.2472286 , -0.43676732, -0.29132961,  1.        ])]
[array([ 1.        , -0.04427961,  0.38560365, -0.27536306,  0.82675366]), array([ 0.44760062,  0.37426564,  0.04242896, -0.11820179,  0.82444163])]
[array([ 0.36609419,  0.64885686,  0.30566084, -0.36702544,  0.86162069]), array([ 0.06776919,  0.63210205, -0.24309142, -0.44149489,  1.        ])]
[array([ 0.16232351,  0.96688101,  0.12367871, -0.34079179,  1.        ]), array([-0.05289563,  0.87726987, -0.23189418, -0.3932518 ,  1.        ])]
[array([-0.23515747,  0.72170801,  0.1865014 , -0.61907729,  1.        ]), array([-0.12927395,  1.        , -0.63977377, -0.39272714,  0.78803297])]
[array([-0.27995182,  0.4981785 , -0.61491109, -0.45695078,  0.42310678]), array([-0.43692832,  1.        , -0.56544063, -0.88731665,  0.82125804])]
[array([-0.60899437,  0.86254894,  0.04262485, -0.60869283,  0.67355965]), array([-0.30370254,  0.43805614

[array([-0.43795582,  0.48953296,  1.        , -0.04591491,  0.0999012 ]), array([-0.49765488,  0.23667902,  0.51353305, -0.11091108, -0.1316475 ])]
[array([-0.54655797, -0.1075028 ,  0.5216292 , -0.3684401 , -0.20893509]), array([-0.13342982, -0.06590466,  0.36505482,  0.00203655,  0.05857861])]
[array([-0.84329319, -0.36356323,  0.48725362, -0.09810103, -0.2805658 ]), array([-1.        , -0.12870656, -0.18767696,  0.13180078,  0.22530937])]
[array([-1.        , -0.47224543,  0.0672484 , -0.61101079,  0.1862477 ]), array([-0.53270853, -0.86532188, -0.1138952 , -0.890885  ,  0.61573197])]
[array([-0.59311299, -1.        ,  0.51378185, -1.        ,  0.20575017]), array([-0.4456025 , -0.94568284,  0.37029057, -1.        ,  0.3661852 ])]
[array([-0.53622756, -0.53692279,  0.50620275, -0.83320082,  0.22626089]), array([-0.13852293,  0.0327355 ,  0.15782688, -0.15605939,  0.6394265 ])]
[array([-0.42789334, -0.15964877,  0.47473124, -0.32263422,  0.5592899 ]), array([-0.25213113, -0.39072403

[array([-0.51071374,  0.35241376, -0.37175272, -1.        ,  0.14930083]), array([-0.47497975,  0.1911754 ,  0.09079029, -0.98727642,  0.06146253])]
[array([-0.83718112,  0.14452384,  0.5801157 , -0.7280504 , -0.00380835]), array([-0.35856256,  0.26447027,  0.27819668, -0.48970218, -0.05080559])]
[array([-0.27687697,  0.21989914,  0.52751171, -0.50301113, -0.24879243]), array([-0.44277104,  0.33105748,  0.28587693, -0.27003395,  0.17491325])]
[array([ 0.07989897, -0.37604407,  0.64866964, -0.45950495,  0.12492454]), array([-0.12521579, -0.40420091,  0.21025683, -0.30201566,  0.67508173])]
[array([-0.51346604, -0.6540794 ,  0.36484161, -0.16335969,  0.69505688]), array([ 0.02738153, -0.08509216,  0.15616647, -0.50657648,  0.84394771])]
[array([-0.16133798, -0.52184286,  0.47315169, -0.54022426,  0.8886346 ]), array([-0.23220855, -0.35760186,  0.11398732, -0.66162944,  1.        ])]
[array([-0.88055609, -0.80581815,  0.75359309, -0.69818965,  1.        ]), array([ 0.01319862, -0.12330793

[array([-1.        ,  0.83594241, -0.23554949,  0.59932321, -0.55539142]), array([-0.81895074,  0.78457412, -0.2313138 ,  0.87263619, -0.53050789])]
[array([-0.52977857,  0.6713801 , -0.4390384 ,  0.78514125, -0.75197068]), array([-0.29150856,  0.67172477, -0.18398515,  0.91333273, -0.9628628 ])]
[array([-0.48928774,  0.67461467,  0.08910883,  0.98491673, -1.        ]), array([-0.47515946,  0.79390036,  0.20336334,  1.        , -1.        ])]
[array([-0.78232654,  0.71366572,  0.42857531,  1.        , -1.        ]), array([-1.        ,  0.41725192, -0.05725851,  1.        , -1.        ])]
[array([-1.        ,  0.37784198,  0.01070342,  1.        , -0.78793635]), array([-0.57142677,  0.26203855, -0.04765814,  1.        , -0.30665922])]
[array([-0.4896788 ,  0.10495265,  0.53320795,  0.81710535, -0.57891178]), array([ 0.15877696, -0.18206484,  0.57173803,  0.92780337, -0.55351322])]
[array([ 0.40140764, -0.03915225,  0.26677283,  0.69814869, -0.57984843]), array([ 0.39022158,  0.31331941

[array([ 0.4021279 , -0.1651952 ,  0.69856119,  0.44154812, -0.11830633]), array([ 0.16247741, -0.21519956,  0.59031199,  0.30299984,  0.76330419])]
[array([ 0.26234755, -0.62170289,  0.91715466, -0.05104884,  0.91101843]), array([ 0.28014498, -1.        ,  0.6260919 ,  0.36617277,  1.        ])]
[array([-0.35901936, -0.53249903,  0.90589974, -0.17135031,  0.88388078]), array([-0.38807331, -0.65139991,  0.46953532,  0.41737329,  0.74450212])]
[array([-0.46352269, -0.90905292,  0.47570567,  0.35540362,  0.67850361]), array([-0.42177105, -0.69150631, -0.05332814,  0.27424308,  0.04708804])]
[array([-0.46800427, -0.98108118,  0.36323164, -0.1962979 ,  0.03607261]), array([-0.04032842, -1.        , -0.13548933, -0.40383683, -0.7330468 ])]
[array([-0.61270386, -1.        ,  0.21345548,  0.2342898 , -0.19763774]), array([ 4.33853914e-04, -1.00000000e+00, -4.46593915e-01,  6.30515121e-01,
       -5.05204783e-01])]
[array([ 0.05601159, -1.        , -0.23945788,  0.12419732, -0.29217281]), arra

[array([-0.08845905,  0.13777609, -0.31407787, -1.        , -0.09664577]), array([ 0.17525089,  0.46376893, -0.42965837, -0.70559876, -0.28757389])]
[array([ 0.27776333,  0.4033802 ,  0.0994236 , -0.74576112, -0.24061455]), array([ 0.34103963,  0.4395268 , -0.06951204, -0.11462778,  0.02427025])]
[array([-0.2172094 ,  0.58669298,  0.53801667, -0.232729  ,  0.17183618]), array([ 0.32069664,  0.73397365,  0.37742229, -0.06528738,  0.1482498 ])]
[array([0.72835391, 0.62691456, 0.21308563, 0.10026434, 0.11112819]), array([ 0.77645921,  0.25601631,  0.24403816,  0.09353781, -0.19705488])]
[array([ 0.93301403,  0.25059139,  0.68812952, -0.21797968, -0.26947061]), array([ 0.89713446,  0.98544388,  0.30093044, -0.01435841, -0.35168268])]
[array([ 0.48925547,  0.87579607,  0.28753703,  0.24038821, -0.85185734]), array([ 0.36698697,  0.73636581, -0.29462618, -0.03645552, -1.        ])]
[array([ 0.23587317,  0.13033167, -0.06510701,  0.29434065, -1.        ]), array([ 0.06819806,  0.56902789, -0.

[array([-0.31391738,  0.39057703, -0.75934082, -0.99766648, -0.55713774]), array([ 0.08064922,  0.08980001, -0.50288291, -0.93345034, -0.68210026])]
[array([-0.44839022,  0.25130776, -0.17941612, -0.73558562, -0.16134674]), array([-0.54870388,  0.39226881, -0.70817781, -0.23455592, -0.23314796])]
[array([-0.1981974 , -0.21038362, -0.57866644, -0.18573216, -0.90475199]), array([-0.27281998, -0.06046941, -0.89712954, -0.19993566, -0.25002004])]
[array([ 0.06120713, -0.07967636, -0.51628388,  0.11675491, -1.        ]), array([ 0.03605336,  0.19263054, -0.38759577,  0.25079348, -1.        ])]
[array([ 0.37195616,  0.21817056, -0.66008648, -0.33877801, -0.80835717]), array([ 0.05624162, -0.03231493, -0.57127056, -0.66111262, -0.75637813])]
[array([ 0.10758229, -0.10470467, -0.06964153, -0.60990469, -1.        ]), array([-0.02157759, -0.56181721, -0.22456942, -0.0725142 , -0.7230291 ])]
[array([-0.14896303, -0.20244589,  0.27055789,  0.03007223, -0.26624643]), array([-0.1526512 , -0.19101943

[array([ 0.81966105, -0.55106264, -0.12133544,  0.06483237,  0.17530462]), array([ 1.        , -0.53291611, -0.26240043, -0.17860429,  0.80667948])]
[array([ 0.56650706, -0.14136889, -0.55474983,  0.20106477,  1.        ]), array([ 0.26492888, -0.23851624, -0.96983469,  0.42313848,  0.7773267 ])]
[array([ 0.49748526, -0.59635284, -1.        ,  0.54540773,  1.        ]), array([ 0.28240216, -0.42926691, -1.        ,  0.42143322,  0.988247  ])]
[array([ 0.02378958, -1.        , -0.8468436 ,  0.35054148,  0.79775726]), array([-0.05106212, -0.78268585, -1.        ,  0.16353831,  1.        ])]
[array([-0.43221753, -0.58621614, -1.        ,  0.08618015,  0.98250842]), array([-0.36473485,  0.05194866, -1.        , -0.46029051,  0.66891977])]
[array([-0.93894619,  0.35647017, -0.55443959, -0.65580203,  0.43275653]), array([-0.51377693,  0.67070789, -0.92289665, -0.22342527,  0.07943779])]
[array([-0.75999504,  0.61316393, -0.90864985, -0.53277892, -0.18212705]), array([-0.23203583,  0.64980658

[array([-0.09100648,  0.26876589,  0.93735374,  0.21206312,  0.66085981]), array([-0.26051262,  0.46481885,  0.39908056,  0.64954461,  0.30093158])]
[array([-0.58123557,  0.03145626,  1.        ,  0.94689768, -0.26373046]), array([-0.25382863,  0.19398899,  0.36680831,  1.        ,  0.23778683])]


KeyboardInterrupt: 

## Close Environment

In [14]:
env.close()

INFO:mlagents_envs:Environment shut down with return code 0.


# Testing Model

## Model Variables

In [10]:
random_seed = 4966
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
n_agents = env.number_agents
n_episode = 50
max_steps = 10000
buffer_capacity = 1000000
batch_size = 1000
episodes_before_train = 10

In [11]:
# setup seed
torch.manual_seed(random_seed)
np.random.seed(random_seed)

maddpg = MADDPG(n_agents, n_states, n_actions, batch_size, buffer_capacity, episodes_before_train)
noise = OUNoise(env.action_space)

FloatTensor = torch.cuda.FloatTensor if maddpg.use_cuda else torch.FloatTensor

## Loading Model

In [12]:
import os

path = os.path.join(os.getcwd(), 'checkpoint', 'Time_2020-03-24_14-08-03.639988_NAgent_2_Episode_180.pth')
maddpg.load(path, map_location='cpu')

## Run Model

In [13]:
print("Testing model...")
for i_episode in range(n_episode):
    # reset environment
    obs = env.reset()
    obs = np.stack(obs)
    noise.reset()
    
    # convert observation to tensor
    if isinstance(obs, np.ndarray):
        obs = torch.from_numpy(obs).float()
    
    total_reward = 0.0
    for i_step in range(max_steps):
        obs = obs.type(FloatTensor)
        actions = maddpg.select_action(obs).data.cpu()
        actions_list = [noise.get_action(action) for action in actions.tolist()]
        
        obs_, reward, done, _ = env.step(actions_list)
        
        reward = torch.FloatTensor(reward).type(FloatTensor)
        obs_ = np.stack(obs_)
        obs_ = torch.from_numpy(obs_).float()
        if i_step != max_steps - 1:
            next_obs = obs_
        else:
            next_obs = None

        total_reward += reward.sum()     
        obs = next_obs

        print(done)
        # check if done
        if True in done:
            break

    maddpg.episode_done += 1
    print("Episode: {}, reward = {}".format(i_episode, total_reward))

Testing model...
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, 

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
Episode: 0, reward = 1.114999532699585
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, False]
[False, Fa

KeyboardInterrupt: 