### CONCERNS
1. list of directions/positions are never emptied during an episode, which makes the simulation slower and slower after each timestep

### IDEAS
other possible features:
1. robot is fallen down or not
2. distance to border (& which border?)

time optimalization:
1. clear last item from history every 4 timesteps (we only use the current and previous state and the one before that)
2. interval of states to be interpreted: skip N frames before evaluation next state
3. Since the rewards are so sparse, maybe use Imitation learning instead of DQN --> we are "experts" since we know the tactic of the blue bot. we can use this to teach our bot how to defeat the other agent.


In [7]:
from VisualModule import AgentEnvironment
from DQN_Agent import NeurosmashAgent

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize

import gym
import math
import random
from collections import namedtuple
from itertools import count
from PIL import Image
import os

from stopwatch import Stopwatch


In [8]:
model_output_dir = "output/model_output/"

if not os.path.exists(model_output_dir):
    os.makedirs(model_output_dir)

show_images = False
skip_frames = 10
state_size =  11 # agent_pos, enemy_pos, vec_agent, vec_enemy, rel_pos_enemy, done
action_size = 3
episode_count = 1000
batch_size = 32
size       = 768         # Please check the Updates section above for more details
timescale  = 10           # Please check the Updates section above for more details

In [3]:
environment = AgentEnvironment(size=size, timescale=timescale)
agent = NeurosmashAgent(state_size = state_size, action_size = action_size) # action size: move in x or y direction, or do nothing

In [4]:
def direction(agent_path, enemy_path):
    A_X = (agent_path[-1] - np.array(agent_path[-2]))[0]
    A_Y = -(agent_path[-1] - np.array(agent_path[-2]))[1]
    E_X = (enemy_path[-1] - np.array(enemy_path[-2]))[0]
    E_Y = -(enemy_path[-1] - np.array(enemy_path[-2]))[1]
    return [A_X,A_Y],[E_X,E_Y]

def do_action(action, total_steps, eval_pic):
    stopwatch = Stopwatch() 
    stopwatch.start()
    info, reward, agent_coord, enemy_coord, following_state = environment.simpleCoord(action, 0, eval_pic)
    stopwatch.stop()
    #print(f"Total time for do action: {stopwatch.duration}")
    if len(environment.agent_path) < 2:
        distance = 500 # Initial distance, only for initialisation
        agent_direction = [1,0] # By definition of facing each other
        enemy_direction = [-1,0]
    else:
        distance = np.sqrt(np.square(np.array(list(np.array(agent_coord)- np.array(enemy_coord))).sum(axis=0)))
        # Extract all variables 
        agent_direction, enemy_direction  = direction(environment.agent_path, environment.enemy_path)

    rel_pos_enemy = np.array(enemy_coord) - np.array(agent_coord)
    return info, reward, np.array(agent_coord), np.array(enemy_coord), agent_direction, enemy_direction, distance, rel_pos_enemy, following_state


In [5]:
def init_environment(env, agent_here):
    info, reward, state = env.reset() 
    agent_trajectories = []
    enemy_trajectories = []
    
    for i in range(3):
        action = agent_here.act(3) # get next action
        #pre_state_img = np.flip(np.array(state).reshape(3,256,256).transpose(1,2,0),0)
        step_number_now = i+1
        info, reward, agent_pos, enemy_pos, agent_direction, enemy_direction, distance, relative_pos_enemy, next_state = do_action(action, step_number_now, True)  

        #post_state_img = np.flip(np.array(next_state).reshape(3,256,256).transpose(1,2,0),0)

        #agent_pos, enemy_pos = env_feat.coord(pre_state_img, post_state_img)
        agent_trajectories.append(list(agent_pos))
        enemy_trajectories.append(list(enemy_pos))
        
    return info, reward, next_state, agent_trajectories, enemy_trajectories, agent_direction, relative_pos_enemy, enemy_direction

In [6]:
for e in range(episode_count):
    status, reward, next_state, agent_trajectories, enemy_trajectories, agent_dir, relative_pos_enemy, enemy_dir = init_environment(environment, agent)
    done = False
    total_reward = 0
    total_timesteps = 4
    small_state = [agent_trajectories[-1][0], agent_trajectories[-1][1], enemy_trajectories[-1][0], enemy_trajectories[-1][1], [agent_dir[0]], [agent_dir[1]], [relative_pos_enemy[0]], [relative_pos_enemy[1]], [enemy_dir[0]], [enemy_dir[1]], done]#"agent direction", "relative position enemy", "enemy direction" ]
    distances = []
    evaluate_frame = False

    while done == False:
        if total_timesteps % skip_frames == 0:
            evaluate_frame = True
        else:
            evaluate_frame = False
            
        action = agent.act(small_state) #step(info, reward, state)
        #print(f"agent chooses action: {action}")
        stopwatch = Stopwatch() 
        stopwatch.start()
        status, reward, agent_pos, enemy_pos, agent_dir, enemy_dir, distance, enemy_pos_rel, next_state = do_action(action, total_timesteps, evaluate_frame)   
        stopwatch.stop()
        #print(f"Total time for one step: {stopwatch.duration}")
        
        total_reward += reward

        if status == 1:
            print(f"Game is finished, \n your final reward is: {total_reward}, duration was {total_timesteps} timesteps")
            done = True
        
        agent_trajectories.append(list(agent_pos))
        enemy_trajectories.append(list(enemy_pos))
        distances.append(distance)
        
        done_list = [done]
        next_small_state = [agent_trajectories[-1][0], agent_trajectories[-1][1], enemy_trajectories[1][0], enemy_trajectories[1][1], agent_dir[0], agent_dir[1], enemy_pos_rel[0], enemy_pos_rel[1], enemy_dir[0], enemy_dir[1], done]  
    
        next_small_state = np.reshape(next_small_state, [1, state_size]) # why?

        if evaluate_frame:
            agent.remember(small_state, action, reward, next_small_state, list(done_list))
        small_state = next_small_state
        total_timesteps += 1

    if len(agent.memory) > batch_size:
        agent.train(batch_size)
        print("train")

    if e % 50 == 0:
        agent.save(model_output_dir + "weights_"+ '{:04d}'.format(e) + ".hdf5")
            
    

will be analysed
will be analysed
total time for current frame (with analysis): 0.21896831099729752
will be analysed
total time for current frame (with analysis): 0.2214054789947113
total time for current frame (no analysis): 0.09866003599745454
total time for current frame (no analysis): 0.09600572800263762
total time for current frame (no analysis): 0.09971796599711524
total time for current frame (no analysis): 0.10010865299409488
total time for current frame (no analysis): 0.09402514500106918
total time for current frame (no analysis): 0.12869745599891758
will be analysed
total time for current frame (with analysis): 0.2295581769940327
total time for current frame (no analysis): 0.1243344350004918
total time for current frame (no analysis): 0.1339587760012364
total time for current frame (no analysis): 0.10698620300536277
total time for current frame (no analysis): 0.10002760399947874
total time for current frame (no analysis): 0.13953358899743762
total time for current frame (no a

total time for current frame (no analysis): 0.10047425000084331
total time for current frame (no analysis): 0.10301433299900964
total time for current frame (no analysis): 0.10262065399729181
total time for current frame (no analysis): 0.0980316290006158
will be analysed
total time for current frame (with analysis): 0.26443775900406763
total time for current frame (no analysis): 0.10964501000125892
total time for current frame (no analysis): 0.10054969100019662
total time for current frame (no analysis): 0.10138035599811701
total time for current frame (no analysis): 0.09942445300112013
total time for current frame (no analysis): 0.09973508000257425
total time for current frame (no analysis): 0.0991499199953978
total time for current frame (no analysis): 0.10024388100282522
total time for current frame (no analysis): 0.10676027499721386
total time for current frame (no analysis): 0.12438181699690176
will be analysed
total time for current frame (with analysis): 0.22495340699970257
tota

total time for current frame (no analysis): 0.10333220299798995
total time for current frame (no analysis): 0.10232011000334751
total time for current frame (no analysis): 0.10071898000023793
total time for current frame (no analysis): 0.11414698499720544
will be analysed
total time for current frame (with analysis): 0.21824865399685223
total time for current frame (no analysis): 0.10141232900059549
total time for current frame (no analysis): 0.10079381700052181
total time for current frame (no analysis): 0.09915070200077025
total time for current frame (no analysis): 0.1013077780007734
total time for current frame (no analysis): 0.10074279100081185
total time for current frame (no analysis): 0.09960242699889932
total time for current frame (no analysis): 0.10246916799951578
total time for current frame (no analysis): 0.11868200000026263
total time for current frame (no analysis): 0.11523645700071938
will be analysed
total time for current frame (with analysis): 0.23197719400195638
tot

total time for current frame (no analysis): 0.10141991400450934
total time for current frame (no analysis): 0.12126416600222001
total time for current frame (no analysis): 0.10948253399692476
total time for current frame (no analysis): 0.1034535139988293
total time for current frame (no analysis): 0.1032254780002404
total time for current frame (no analysis): 0.1221241090024705
total time for current frame (no analysis): 0.11628305000340333
total time for current frame (no analysis): 0.11384225399524439
will be analysed
total time for current frame (with analysis): 0.2181857950054109
total time for current frame (no analysis): 0.10054190899973037
total time for current frame (no analysis): 0.10121280999737792
total time for current frame (no analysis): 0.12032326299959095
total time for current frame (no analysis): 0.12196812800539192
total time for current frame (no analysis): 0.11080057900107931
total time for current frame (no analysis): 0.1276675580011215
total time for current fra

total time for current frame (no analysis): 0.11420730000099866
total time for current frame (no analysis): 0.11502390600071521
will be analysed
total time for current frame (with analysis): 0.2705024580063764
total time for current frame (no analysis): 0.12808364699594676
total time for current frame (no analysis): 0.13512091300071916
total time for current frame (no analysis): 0.12147607199585764
total time for current frame (no analysis): 0.1167083129985258
total time for current frame (no analysis): 0.1283631600017543
total time for current frame (no analysis): 0.11553586399531923
total time for current frame (no analysis): 0.10448537000047509
total time for current frame (no analysis): 0.1026345450009103
total time for current frame (no analysis): 0.10228194700175663
will be analysed
total time for current frame (with analysis): 0.27163869899959536
total time for current frame (no analysis): 0.12645066600089194
total time for current frame (no analysis): 0.10587411400047131
total 

total time for current frame (no analysis): 0.10507551200134913
total time for current frame (no analysis): 0.10327640299510676
total time for current frame (no analysis): 0.10237354099808726
total time for current frame (no analysis): 0.10228026700497139
total time for current frame (no analysis): 0.09827498999948148
total time for current frame (no analysis): 0.1058000860066386
will be analysed
total time for current frame (with analysis): 0.22891992000222672
total time for current frame (no analysis): 0.11811095200391719
total time for current frame (no analysis): 0.1190038690037909
total time for current frame (no analysis): 0.11747211100009736
total time for current frame (no analysis): 0.1126107729942305
total time for current frame (no analysis): 0.10108479700284079
total time for current frame (no analysis): 0.10231895800097845
total time for current frame (no analysis): 0.10215422199689783
total time for current frame (no analysis): 0.10009066500060726
total time for current f

will be analysed
total time for current frame (with analysis): 0.24620761999540264
total time for current frame (no analysis): 0.10718649699992966
total time for current frame (no analysis): 0.10400166500039632
total time for current frame (no analysis): 0.10668297800293658
total time for current frame (no analysis): 0.10597007800242864
total time for current frame (no analysis): 0.10341459800110897
total time for current frame (no analysis): 0.1039753360018949
total time for current frame (no analysis): 0.10319828300271183
total time for current frame (no analysis): 0.1187951659958344
total time for current frame (no analysis): 0.12078929600102128
will be analysed
total time for current frame (with analysis): 0.2144115929986583
total time for current frame (no analysis): 0.10348828599671833
total time for current frame (no analysis): 0.11681628500082297
total time for current frame (no analysis): 0.11902037300023949
total time for current frame (no analysis): 0.10564090900152223
total

total time for current frame (no analysis): 0.10186268500547158
total time for current frame (no analysis): 0.10077562199876411
total time for current frame (no analysis): 0.10089821399742505
total time for current frame (no analysis): 0.1018589349987451
will be analysed
total time for current frame (with analysis): 0.21348133900028188
total time for current frame (no analysis): 0.10315672999422532
total time for current frame (no analysis): 0.10103925099974731
total time for current frame (no analysis): 0.09947276399907423
total time for current frame (no analysis): 0.09960243300156435
total time for current frame (no analysis): 0.10259138800029177
total time for current frame (no analysis): 0.10082951100048376
total time for current frame (no analysis): 0.09964730399951804
total time for current frame (no analysis): 0.10151680200215196
Game is finished, 
 your final reward is: 0, duration was 308 timesteps
train
will be analysed
will be analysed
total time for current frame (with ana

total time for current frame (no analysis): 0.1081311719972291
total time for current frame (no analysis): 0.10203517900663428
total time for current frame (no analysis): 0.10105253400251968
total time for current frame (no analysis): 0.10043360700365156
total time for current frame (no analysis): 0.10010885399969993
total time for current frame (no analysis): 0.10009580599580659
total time for current frame (no analysis): 0.09995806700317189
total time for current frame (no analysis): 0.10205150000547292
will be analysed
total time for current frame (with analysis): 0.21737053000106243
total time for current frame (no analysis): 0.10177529499924276
total time for current frame (no analysis): 0.10174987399659585
total time for current frame (no analysis): 0.10091879100218648
total time for current frame (no analysis): 0.10293247299705399
total time for current frame (no analysis): 0.09986406099778833
total time for current frame (no analysis): 0.09897259699937422
total time for current

total time for current frame (no analysis): 0.1010147990018595
total time for current frame (no analysis): 0.10226236900052754
will be analysed
total time for current frame (with analysis): 0.21596412399958353
total time for current frame (no analysis): 0.1048062839981867
total time for current frame (no analysis): 0.10195682400080841
total time for current frame (no analysis): 0.10724047099938616
total time for current frame (no analysis): 0.10602076400391525
total time for current frame (no analysis): 0.10318985400226666
total time for current frame (no analysis): 0.10544186199695105
total time for current frame (no analysis): 0.11995955999736907
total time for current frame (no analysis): 0.10714226200070698
total time for current frame (no analysis): 0.10887595399981365
will be analysed
total time for current frame (with analysis): 0.2161220170019078
total time for current frame (no analysis): 0.10343903700413648
total time for current frame (no analysis): 0.10057277500163764
total

total time for current frame (no analysis): 0.10098280400416115
total time for current frame (no analysis): 0.09904054700018605
total time for current frame (no analysis): 0.09961804500198923
total time for current frame (no analysis): 0.1003045750039746
total time for current frame (no analysis): 0.10197950099973241
total time for current frame (no analysis): 0.1000439190029283
total time for current frame (no analysis): 0.10169409099762561
total time for current frame (no analysis): 0.10250034100317862
will be analysed
total time for current frame (with analysis): 0.22899190799944336
total time for current frame (no analysis): 0.10159464799653506
total time for current frame (no analysis): 0.10244895000505494
total time for current frame (no analysis): 0.1023173779976787
total time for current frame (no analysis): 0.10573618700436782
total time for current frame (no analysis): 0.10328045899950666
total time for current frame (no analysis): 0.10733443700155476
total time for current f

total time for current frame (no analysis): 0.10458202600420918
total time for current frame (no analysis): 0.09864707200176781
will be analysed
total time for current frame (with analysis): 0.20981742900039535
total time for current frame (no analysis): 0.10021561000030488
total time for current frame (no analysis): 0.10462507200281834
total time for current frame (no analysis): 0.10520513700612355
total time for current frame (no analysis): 0.10559166399616515
total time for current frame (no analysis): 0.10323955999774626
total time for current frame (no analysis): 0.10484884600009536
total time for current frame (no analysis): 0.10417744200094603
total time for current frame (no analysis): 0.10336355300387368
total time for current frame (no analysis): 0.12152510600571986
will be analysed
total time for current frame (with analysis): 0.22081704499578336
total time for current frame (no analysis): 0.10224084500077879
total time for current frame (no analysis): 0.11216584400244756
to

will be analysed
total time for current frame (with analysis): 0.2226214109978173
total time for current frame (no analysis): 0.10761797499435488
total time for current frame (no analysis): 0.10127641200233484
total time for current frame (no analysis): 0.10217056900000898
total time for current frame (no analysis): 0.10187615700124297
total time for current frame (no analysis): 0.10010513100132812
total time for current frame (no analysis): 0.10153087800426874
total time for current frame (no analysis): 0.10329877200274495
total time for current frame (no analysis): 0.10063989499758463
total time for current frame (no analysis): 0.10001507299602963
will be analysed
total time for current frame (with analysis): 0.21398396699805744
total time for current frame (no analysis): 0.1032980179952574
total time for current frame (no analysis): 0.11013465999712935
total time for current frame (no analysis): 0.11221679500158643
total time for current frame (no analysis): 0.1053783690003911
total

AttributeError: 'int' object has no attribute 'ndim'