### CONCERNS
1. list of directions/positions are never emptied during an episode, which makes the simulation slower and slower after each timestep

### IDEAS
other possible features:
1. robot is fallen down or not
2. distance to border (& which border?)

time optimalization:
1. clear last item from history every 4 timesteps (we only use the current and previous state and the one before that)
2. interval of states to be interpreted: skip N frames before evaluation next state
3. Since the rewards are so sparse, maybe use Imitation learning instead of DQN --> we are "experts" since we know the tactic of the blue bot. we can use this to teach our bot how to defeat the other agent.
4. rewrite the random action function 

In [1]:
from VisualModule import AgentEnvironment
from DQN_Agent import NeurosmashAgent

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize

import gym
import math
import random
from collections import namedtuple
from itertools import count
from PIL import Image
import os

from stopwatch import Stopwatch


Using TensorFlow backend.


In [3]:
model_output_dir = "output/model_output/"

if not os.path.exists(model_output_dir):
    os.makedirs(model_output_dir)

max_distance = 600
show_images = False
skip_frames = 15
state_size =  11 # agent_pos, enemy_pos, vec_agent, vec_enemy, rel_pos_enemy, done
action_size = 3
episode_count = 1000
batch_size = 32
size       = 768         # Please check the Updates section above for more details
timescale  = 10           # Please check the Updates section above for more details

In [4]:
agent = NeurosmashAgent(state_size = state_size, action_size = action_size) # action size: move in x or y direction, or do nothing

In [5]:
def compute_reward(standard_reward, distance):
    distance_reward = (max_distance-distance)/max_distance
    total_reward = (distance_reward + standard_reward)/ 2
    return total_reward

In [6]:
def direction(agent_path, enemy_path):
    A_X = (agent_path[-1] - np.array(agent_path[-2]))[0]
    A_Y = -(agent_path[-1] - np.array(agent_path[-2]))[1]
    E_X = (enemy_path[-1] - np.array(enemy_path[-2]))[0]
    E_Y = -(enemy_path[-1] - np.array(enemy_path[-2]))[1]
    return [A_X,A_Y],[E_X,E_Y]

def do_action(action, total_steps, eval_pic, environment):
    stopwatch = Stopwatch() 
    stopwatch.start()
    info, reward, agent_coord, enemy_coord, following_state = environment.actionLoop(action, 0, eval_pic)
    stopwatch.stop()
    #print(f"Total time for do action: {stopwatch.duration}")
    if len(environment.agent_path) < 2:
        distance = 500 # Initial distance, only for initialisation
        agent_direction = [1,0] # By definition of facing each other
        enemy_direction = [-1,0]
    else:
        distance = np.sqrt(np.square(np.array(list(np.array(agent_coord)- np.array(enemy_coord))).sum(axis=0)))
        # Extract all variables 
        agent_direction, enemy_direction  = direction(environment.agent_path, environment.enemy_path)
    
    complete_reward = compute_reward(reward, distance)

    rel_pos_enemy = np.array(enemy_coord) - np.array(agent_coord)
    return info, complete_reward, np.array(agent_coord), np.array(enemy_coord), agent_direction, enemy_direction, distance, rel_pos_enemy, following_state


In [7]:
env = AgentEnvironment(size=size, timescale=timescale)
def init_environment(agent_here):
    #env = AgentEnvironment(size=size, timescale=timescale)
    info, reward, state = env.reset() 
  
    agent_trajectories = []
    enemy_trajectories = []
    
    small_init_state = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
    small_init_state = np.reshape(small_init_state, [1, state_size])
 
    
    #for i in range(3):
    print(f"act called from init: {small_init_state}")

    action = agent_here.act(small_init_state) # get next action
    # action = 3 (if above does not work)
    
    #pre_state_img = np.flip(np.array(state).reshape(3,256,256).transpose(1,2,0),0)
    #step_number_now = i+1
    info, complete_reward, agent_pos, enemy_pos, agent_direction, enemy_direction, distance, relative_pos_enemy, next_state = do_action(action, 1, True, env)  
    
    #post_state_img = np.flip(np.array(next_state).reshape(3,256,256).transpose(1,2,0),0)

    #agent_pos, enemy_pos = env_feat.coord(pre_state_img, post_state_img)
    agent_trajectories.append(list(agent_pos))
    enemy_trajectories.append(list(enemy_pos))
        
    return info, complete_reward, next_state, agent_trajectories, enemy_trajectories, agent_direction, relative_pos_enemy, enemy_direction, env

In [8]:
complete_rewards = []
for e in range(episode_count):
    status, complete_reward, next_state, agent_trajectories, enemy_trajectories, agent_dir, relative_pos_enemy, enemy_dir, environment = init_environment(agent)
    small_state = [agent_trajectories[-1][0], agent_trajectories[-1][1], enemy_trajectories[-1][0], enemy_trajectories[-1][1], agent_dir[0], agent_dir[1], relative_pos_enemy[0], relative_pos_enemy[1], enemy_dir[0], enemy_dir[1], 0]#"agent direction", "relative position enemy", "enemy direction" ]
    small_state = np.reshape(small_state, [1, state_size])

    done = 0
    total_reward = 0
    total_timesteps = 1
    distances = []
    evaluate_frame = False

    while done == False:
        if (total_timesteps % skip_frames == 0) or (total_timesteps % skip_frames == skip_frames-1):
            evaluate_frame = True
        else:
            evaluate_frame = False
        
        print(f"act called from main loop: {small_state}")
        action = agent.act(small_state) #step(info, reward, state)
        #print(f"agent chooses action: {action}")
        stopwatch = Stopwatch() 
        stopwatch.start()
        status, complete_reward, agent_pos, enemy_pos, agent_dir, enemy_dir, distance, enemy_pos_rel, next_state = do_action(action, total_timesteps, evaluate_frame, environment)   
        stopwatch.stop()
        #print(f"Total time for one step: {stopwatch.duration}")
        
        total_reward += complete_reward

        if status == 1:
            print(f"Game is finished, \n your final reward is: {total_reward}, duration was {total_timesteps} timesteps")
            done = 1
        
        agent_trajectories.append(list(agent_pos))
        enemy_trajectories.append(list(enemy_pos))
        distances.append(distance)
        
        done_list = [done]
        next_small_state = [agent_trajectories[-1][0], agent_trajectories[-1][1], enemy_trajectories[1][0], enemy_trajectories[1][1], agent_dir[0], agent_dir[1], enemy_pos_rel[0], enemy_pos_rel[1], enemy_dir[0], enemy_dir[1], done]  
    
        next_small_state = np.reshape(next_small_state, [1, state_size]) # why?
        small_state = np.reshape(small_state, [1, state_size])
        
        if (total_timesteps % skip_frames == 0):
            print(f"agent will remember: {small_state}")
            agent.remember(small_state, action, complete_reward, next_small_state, list(done_list))
        
        small_state = next_small_state # new small state
        total_timesteps += 1
        
    complete_rewards.append(total_reward)

    if len(agent.memory) > batch_size:
        agent.train(batch_size)
        print("train")

    if e % 50 == 0:
        agent.save(model_output_dir + "weights_"+ '{:04d}'.format(e) + ".hdf5")
            
    
    

act called from init: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
act called from main loop: [137, 187, 653, 196, 1, 0, 516, 9, -1, 0, 0]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[165 180  

act called from main loop: [[  0   0   0   0   1   0   0   0   4 -49   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   4 -49   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   4 -49   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   4 -49   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   4 -49   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   4 -49   0]]
act called from main loop: [[490 197   0   0  43 -16 -41  16 -34  14   0]]
agent will remember: [[490 197   0   0  43 -16 -41  16 -34  14   0]]
act called from main loop: [[495 198   0   0   5  -1 -15   6  31   9   0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 31  9  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 31  9  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 31  9  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 31  9  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 31  9  0]]
act called 

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[166 177   0   0  29  10 436  14 -64   5   0]]
agent will remember: [[166 177   0   0  29  10 436  14 -64   5   0]]
act called from main loop: [[170 177   0   0   4   0 429  14  -3   0   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  0  0]]
act called fr

agent will remember: [[ 451  100    0    0   37   10 -110   43    0   18    0]]
act called from main loop: [[ 454   99    0    0    3    1 -111   43    2    1    0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 2 1 0]]
act called from main loop: [[482  78   0   0  28  21 -89  27  50  37   0]]
agent will remember: [[482  78   0   0  28  21 -89  27  50  37   0]]
a

act called from main loop: [[ 0  0  0  0  6 -1  0  0 10  4  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0 10  4  0]]
act called from main loop: [[225 179   0   0  55   3 330  12 -54  -4   0]]
agent will remember: [[225 179   0   0  55   3 330  12 -54  -4   0]]
act called from main loop: [[226 178   0   0   1   1 322  11  -7   2   0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -7  2  0]]
act called fr

act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -11   4   0]]
act called from main loop: [[343 155   0   0   0  -8  50  14 -43   0   0]]
agent will remember: [[34

act called from main loop: [[ 504  126    0    0   45    1 -182  -82  -54   -3    0]]
agent will remember: [[ 504  126    0    0   45    1 -182  -82  -54   -3    0]]
act called from main loop: [[ 511  127    0    0    7   -1 -191  -86   -2    3    0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  7 

act called from main loop: [[611 140   0   0  37 -14 -14 -16  59 -14   0]]
agent will remember: [[611 140   0   0  37 -14 -14 -16  59 -14   0]]
act called from main loop: [[612 141   0   0   1  -1 -41 -15 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -26  -2   0]]
act called from main loop: [[  

act called from main loop: [[364 156   0   0  55   6  71  62 -22 -13   0]]
agent will remember: [[364 156   0   0  55   6  71  62 -22 -13   0]]
act called from main loop: [[366 155   0   0   2   1  71  82   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   2 -19   0]]
act called from main loop: [[  

act called from main loop: [[  0   0   0   0   1   1   0   0  41 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  41 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  41 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  41 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  41 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  41 -12   0]]
act called from main loop: [[685  74   0   0  39   6 -99 103  13  12   0]]
agent will remember: [[685  74   0   0  39   6 -99 103  13  12   0]]
act called from main loop: [[610  74   0   0 -75   0 -24  97   0   6   0]]
act called from main loop: [[  0   0   0   0 -75   0   0   0   0   6   0]]
Game is finished, 
 your final reward is: 89.87666666666668, duration was 197 timesteps
not done yet, target : 245.05728963216146
not done yet, target : 195.58302647908528
not done yet, target : 389.5796876017252
not done yet, target : 356.7518259684245
no

act called from main loop: [[ 0  0  0  0  2  1  0  0 -4 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -4 -3  0]]
act called from main loop: [[359 162   0   0  -5  -7  47  29 -10   7   0]]
agent will remember: [[359 162   0   0  -5  -7  47  29 -10   7   0]]
act called from main loop: [[357 160   0   0  -2   2  52  45   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2   0   0   3 -14   0]]
act called from main loop: [[  0   0   0   0  -2   2 

act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
state that the model will use to predict action: [[  0   0   0   0   2   7   0   0  -2 -11   0]]
act called from main loop: [[375 136   0   0  44  25 -55 -16  -8   8   0]]
agent will remember: [[375 136   0   0  44  25 -55 -16  -8   8   0]]
act calle

act called from main loop: [[  0   0   0   0   2   0   0   0   4 -36   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   4 -36   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   4 -36   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   4 -36   0]]
act called from main loop: [[ 419  159    0    0 -203  -17  -21   -4   45   43    0]]
agent will remember: [[ 419  159    0    0 -203  -17  -21   -4   45   43    0]]
act called from main loop: [[420 158   0   0   1   1 -18   1   4  -4   0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  4 -4  0]]
act called 

act called from main loop: [[ 0  0  0  0  6  0  0  0 -3 -3  0]]
act called from main loop: [[ 0  0  0  0  6  0  0  0 -3 -3  0]]
act called from main loop: [[ 0  0  0  0  6  0  0  0 -3 -3  0]]
act called from main loop: [[ 0  0  0  0  6  0  0  0 -3 -3  0]]
act called from main loop: [[ 0  0  0  0  6  0  0  0 -3 -3  0]]
act called from main loop: [[ 0  0  0  0  6  0  0  0 -3 -3  0]]
act called from main loop: [[220 167   0   0  46  11 343  28 -36  -1   0]]
agent will remember: [[220 167   0   0  46  11 343  28 -36  -1   0]]
act called from main loop: [[221 166   0   0   1   1 337  31  -5  -2   0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5 -2  0]]
act called fr

act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 10 -1  0]]
act called from main loop: [[423  30   0   0  17   8 -33  54 -20  18   0]]
agent will remember: [[423  30   0   0  17   8 -33  54 -20  18   0]]
act called from main loop: [[425  30   0   0   2   0 -36  49  -1   5   0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  5  0]]
act called fr

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


act called from init: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
act called from main loop: [137, 187, 651, 195, 1, 0, 514, 8, -1, 0, 0]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[166 182  

act called from main loop: [[  0   0   0   0  81   0   0   0 -10  20   0]]
state that the model will use to predict action: [[  0   0   0   0  81   0   0   0 -10  20   0]]
act called from main loop: [[  0   0   0   0  81   0   0   0 -10  20   0]]
act called from main loop: [[  0   0   0   0  81   0   0   0 -10  20   0]]
act called from main loop: [[  0   0   0   0  81   0   0   0 -10  20   0]]
act called from main loop: [[  0   0   0   0  81   0   0   0 -10  20   0]]
act called from main loop: [[  0   0   0   0  81   0   0   0 -10  20   0]]
act called from main loop: [[ 466  107    0    0   54    3 -182   63  -39   13    0]]
agent will remember: [[ 466  107    0    0   54    3 -182   63  -39   13    0]]
act called from main loop: [[ 469  107    0    0    3    0 -191   69   -6   -6    0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -6  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -6  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -6  0]]
act calle

Game is finished, 
 your final reward is: 103.73583333333333, duration was 225 timesteps
agent will remember: [[571 138   0   0  39 -22 -23  -7  49  -6   0]]
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not d

act called from main loop: [[ 0  0  0  0  5 -2  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0  0 -3  0]]
act called from main loop: [[452 187   0   0  43  17 -17  48 -23 -13   0]]
agent will remember: [[452 187   0   0  43  17 -17  48 -23 -13   0]]
act called from main loop: [[453 186   0   0   1   1  49  88  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  67 -39   0]]
act called from main loop: [[  0   0   0   0   1   1 

act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[603 146   0   0  48   7 -25  46  52  -3   0]]
agent will remember: [[603 146   0   0  48   7 -25  46  52  -3   0]]
act called from main loop: [[606 147   0   0   3  -1 -29  46  -1  -1   0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -1 -1  0]]
act called fr

act called from main loop: [[355 218   0   0  59 -31  76 -38 -32  -8   0]]
agent will remember: [[355 218   0   0  59 -31  76 -38 -32  -8   0]]
act called from main loop: [[357 219   0   0   2  -1  74 -36   0  -3   0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -3  0]]
act called fr

act called from main loop: [[  0   0   0   0   1   0   0   0 -14  -5   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0 -14  -5   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0 -14  -5   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0 -14  -5   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0 -14  -5   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0 -14  -5   0]]
act called from main loop: [[532 234   0   0  30 -18  58 -16  44  -7   0]]
agent will remember: [[532 234   0   0  30 -18  58 -16  44  -7   0]]
act called from main loop: [[534 235   0   0   2  -1  59 -15   3  -2   0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  3 -2  0]]
act called 

act called from main loop: [[ 0  0  0  0  4  0  0  0 -4 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -4 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4 -3  0]]
act called from main loop: [[366 202   0   0  61  -4  89   1   1  -8   0]]
agent will remember: [[366 202   0   0  61  -4  89   1   1  -8   0]]
act called from main loop: [[367 201   0   0   1   1  78   2 -10   0   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -10   0   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -10   0   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -10   0   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -10   0   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -10   0   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -10

act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 4 7 0]]
act called from main loop: [[ 721  168    0    0   46    3 -116   51   61    0    0]]
agent will remember: [[ 721  168    0    0   46    3 -116   51   61    0    0]]
act called from main loop: [[ 725  168    0    0    4    0 -119   47    1    4    0]]
act called from main loop: [[0 0 0 0 4 0 0 0 1 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 1 4 0]]
act ca

act called from main loop: [[266 161   0   0  44   5 250  23 -32  -8   0]]
agent will remember: [[266 161   0   0  44   5 250  23 -32  -8   0]]
act called from main loop: [[268 160   0   0   2   1 246  18  -2   6   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  6  0]]
act called fr

act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  -9 -13   0]]
act called from main loop: [[356  82   0   0   7   0   0   9   8  31   0]]
agent will remember: [[35

act called from main loop: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -7  1  0]]
act called from main loop: [[349  30   0   0   0  32  56  31  -9  -7   0]]
agent will remember: [[349  30   0   0   0  32  56  31  -9  -7   0]]
act called from main loop: [[352  30   0   0   3   0  57  10   4  21   0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  4 21  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  4 21  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  4 21  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  4 21  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  4

act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -2  2  0]]
act called from main loop: [[ 363   30    0    0  -29    2 -333    0 -281    0    0]]
agent will remember: [[ 363   30    0    0  -29    2 -333    0 -281    0    0]]
act called from main loop: [[362  30   0   0  -1   0 -77  25 255 -25   0]]
act called from main loop: [[  0   0   0   0  -1   0   0   0 255 -25   0]]
Game is finished, 
 your final reward is: 183.0416666666667, duration was 377 timesteps
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not

act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  2  0]]
act called from main loop: [[396 216   0   0  30 -13 -18 -43 -37   1   0]]
agent will remember: [[396 216   0   0  30 -13 -18 -43 -37   1   0]]
act called from main loop: [[399 218   0   0   3  -2 -18 -43   3  -2   0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0  3 -2  0]]
act called fr

act called from main loop: [[ 0  0  0  0  1 -3  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -2 -1  0]]
Game is finished, 
 your final reward is: 91.7025, duration was 201 timesteps
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, target : nan
not done yet, targ

ValueError: Error when checking input: expected dense_1_input to have shape (11,) but got array with shape (1,)