### CONCERNS
1. list of directions/positions are never emptied during an episode, which makes the simulation slower and slower after each timestep

### IDEAS
other possible features:
1. robot is fallen down or not
2. distance to border (& which border?)

time optimalization:
1. clear last item from history every 4 timesteps (we only use the current and previous state and the one before that)
2. interval of states to be interpreted: skip N frames before evaluation next state
3. Since the rewards are so sparse, maybe use Imitation learning instead of DQN --> we are "experts" since we know the tactic of the blue bot. we can use this to teach our bot how to defeat the other agent.
4. rewrite the random action function 

In [4]:
from VisualModule import AgentEnvironment
from DQN_Agent import NeurosmashAgent

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize

import gym
import math
import random
from collections import namedtuple
from itertools import count
from PIL import Image
import os

from stopwatch import Stopwatch


Using TensorFlow backend.


In [5]:
model_output_dir = "output/model_output/"

if not os.path.exists(model_output_dir):
    os.makedirs(model_output_dir)

max_distance = 600
show_images = False
skip_frames = 15
state_size =  11 # agent_pos, enemy_pos, vec_agent, vec_enemy, rel_pos_enemy, done
action_size = 3
episode_count = 1000
batch_size = 32
size       = 768         # Please check the Updates section above for more details
timescale  = 10           # Please check the Updates section above for more details

In [6]:
agent = NeurosmashAgent(state_size = state_size, action_size = action_size) # action size: move in x or y direction, or do nothing

In [7]:
def compute_reward(standard_reward, distance):
    distance_reward = (max_distance-distance)/max_distance
    total_reward = (distance_reward + standard_reward)/ 2
    return total_reward

In [8]:
def direction(agent_path, enemy_path):
    A_X = (agent_path[-1] - np.array(agent_path[-2]))[0]
    A_Y = -(agent_path[-1] - np.array(agent_path[-2]))[1]
    E_X = (enemy_path[-1] - np.array(enemy_path[-2]))[0]
    E_Y = -(enemy_path[-1] - np.array(enemy_path[-2]))[1]
    return [A_X,A_Y],[E_X,E_Y]

def do_action(action, total_steps, eval_pic, environment):
    stopwatch = Stopwatch() 
    stopwatch.start()
    info, reward, agent_coord, enemy_coord, following_state = environment.actionLoop(action, 0, eval_pic)
    stopwatch.stop()
    #print(f"Total time for do action: {stopwatch.duration}")
    if len(environment.agent_path) < 2:
        distance = 500 # Initial distance, only for initialisation
        agent_direction = [1,0] # By definition of facing each other
        enemy_direction = [-1,0]
    else:
        distance = np.sqrt(np.square(np.array(list(np.array(agent_coord)- np.array(enemy_coord))).sum(axis=0)))
        # Extract all variables 
        agent_direction, enemy_direction  = direction(environment.agent_path, environment.enemy_path)
    
    complete_reward = compute_reward(reward, distance)

    rel_pos_enemy = np.array(enemy_coord) - np.array(agent_coord)
    return info, complete_reward, np.array(agent_coord), np.array(enemy_coord), agent_direction, enemy_direction, distance, rel_pos_enemy, following_state


In [9]:
env = AgentEnvironment(size=size, timescale=timescale)
def init_environment(agent_here):
    #env = AgentEnvironment(size=size, timescale=timescale)
    info, reward, state = env.reset() 
  
    agent_trajectories = []
    enemy_trajectories = []
    
    small_init_state = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
    small_init_state = np.reshape(small_init_state, [1, state_size])
 
    
    #for i in range(3):
    print(f"act called from init: {small_init_state}")

    action = agent_here.act(small_init_state) # get next action
    # action = 3 (if above does not work)
    
    #pre_state_img = np.flip(np.array(state).reshape(3,256,256).transpose(1,2,0),0)
    #step_number_now = i+1
    info, complete_reward, agent_pos, enemy_pos, agent_direction, enemy_direction, distance, relative_pos_enemy, next_state = do_action(action, 1, True, env)  
    
    #post_state_img = np.flip(np.array(next_state).reshape(3,256,256).transpose(1,2,0),0)

    #agent_pos, enemy_pos = env_feat.coord(pre_state_img, post_state_img)
    agent_trajectories.append(list(agent_pos))
    enemy_trajectories.append(list(enemy_pos))
        
    return info, complete_reward, next_state, agent_trajectories, enemy_trajectories, agent_direction, relative_pos_enemy, enemy_direction, env

In [None]:
complete_rewards = []
for e in range(episode_count):
    status, complete_reward, next_state, agent_trajectories, enemy_trajectories, agent_dir, relative_pos_enemy, enemy_dir, environment = init_environment(agent)
    small_state = [agent_trajectories[-1][0], agent_trajectories[-1][1], enemy_trajectories[-1][0], enemy_trajectories[-1][1], agent_dir[0], agent_dir[1], relative_pos_enemy[0], relative_pos_enemy[1], enemy_dir[0], enemy_dir[1], 0]#"agent direction", "relative position enemy", "enemy direction" ]
    small_state = np.reshape(small_state, [1, state_size])

    done = 0
    total_reward = 0
    total_timesteps = 1
    distances = []
    evaluate_frame = False

    while done == False:
        if (total_timesteps % skip_frames == 0) or (total_timesteps % skip_frames == skip_frames-1):
            evaluate_frame = True
        else:
            evaluate_frame = False
        
        print(f"act called from main loop: {small_state}")
        action = agent.act(small_state) #step(info, reward, state)
        #print(f"agent chooses action: {action}")
        stopwatch = Stopwatch() 
        stopwatch.start()
        status, complete_reward, agent_pos, enemy_pos, agent_dir, enemy_dir, distance, enemy_pos_rel, next_state = do_action(action, total_timesteps, evaluate_frame, environment)   
        stopwatch.stop()
        #print(f"Total time for one step: {stopwatch.duration}")
        
        total_reward += complete_reward

        if status == 1:
            print(f"Game is finished, \n your final reward is: {total_reward}, duration was {total_timesteps} timesteps")
            done = 1
        
        agent_trajectories.append(list(agent_pos))
        enemy_trajectories.append(list(enemy_pos))
        distances.append(distance)
        
        done_list = [done]
        next_small_state = [agent_trajectories[-1][0], agent_trajectories[-1][1], enemy_trajectories[1][0], enemy_trajectories[1][1], agent_dir[0], agent_dir[1], enemy_pos_rel[0], enemy_pos_rel[1], enemy_dir[0], enemy_dir[1], done]  
    
        next_small_state = np.reshape(next_small_state, [1, state_size]) # why?
        small_state = np.reshape(small_state, [1, state_size])
        
        if (total_timesteps % skip_frames == 0):
            print(f"agent will remember: {small_state}")
            agent.remember(small_state, action, complete_reward, next_small_state, list(done_list))
        
        small_state = next_small_state # new small state
        total_timesteps += 1
        
    complete_rewards.append(total_reward)

    if len(agent.memory) > batch_size:
        agent.train(batch_size)
        print("train")

    if e % 50 == 0:
        agent.save(model_output_dir + "weights_"+ '{:04d}'.format(e) + ".hdf5")
            
    
    

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 652 196   1   0 515   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[167 182   0   0 

agent will remember: [[306 207   0   0  -5 -60  26 -79 -39  22   0]]
act called from main loop: [[306 207   0   0   0   0  30 -74   4  -5   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  4 -5  0]]
act called from main loo

act called from main loop: [[513 193   0   0  44  -5 -26   3  48   7   0]]
agent will remember: [[513 193   0   0  44  -5 -26   3  48   7   0]]
act called from main loop: [[514 195   0   0   1  -2 -22   3   5  -2   0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0  5 -2  0]]
act called fr

act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -4  0]]
act called from main loop: [[303 163   0   0  27  15 171  16 -42  12   0]]
agent will remember: [[303 163   0   0  27  15 171  16 -42  12   0]]
act called from main loop: [[304 161   0   0   1   2 166  19  -4  -1   0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0 -4 -1  0]]
act called fr

act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -1 -8  0]]
act called from main loop: [[ 446   82    0    0   16   19 -138   58   57   16    0]]
agent will remember: [[ 446   82    0    0   16   19 -138   58   57   16    0]]
act called from main loop: [[ 448   79    0    0    2    3 -139   

train
act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 668 196   1   0 531   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[160 188   

act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -1  0]]
act called from main loop: [[ 474  266    0    0   46  -12 -175  -22  -16  -31    0]]
agent will remember: [[ 474  266    0    0   46  -12 -175  -22  -16  -31    0]]
act called from main loop: [[ 478  268    0    0    4   -2 -179  -18    0   -6    0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0  4 

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 197   1   0 514  10  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[169 176   0   0 

act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[  0   0   0   0  -2   0   0   0 -11   3   0]]
act called from main loop: [[284 191   0   0  -4  -1  32 -18 -31  10   0]]
agent will remember: [[284 191   0   0  -4  -1  32 -18 -31  10   0]]
act called from main loop: [[284 242   0   0   0 -51  32 -64   0  -5   0]]
act called from main loop: [[  0   0   0   0   0 -51   0   0   0  -5   0]]
act called from main loop: [[  0   0   0   0   0 -51   0   0   0  -5   0]]
act called from main loop: [[  

act called from main loop: [[ 0  0  0  0  3  0  0  0 10  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 10  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 10  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 10  1  0]]
act called from main loop: [[400 178   0   0  33 -16 -26 -15  22 -17   0]]
agent will remember: [[400 178   0   0  33 -16 -26 -15  22 -17   0]]
act called from main loop: [[402 178   0   0   2   0 -29 -16  -1   1   0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  1  0]]
act called fr

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[160 187   0   0  23   0 460   5 -46   4   0]]
agent will remember: [[160 187   0   0  23   0 460   5 -46   4   0]]
act called from main loop: [[164 188   0   0   4  -1 441   4 -15   0   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -15

act called from main loop: [[369 276   0   0   0 -19  59 -19  29 -12   0]]
agent will remember: [[369 276   0   0   0 -19  59 -19  29 -12   0]]
act called from main loop: [[368 278   0   0  -1  -2  57 -16  -3  -5   0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0 -1 -2  0  0 -3 -5  0]]
act called fr

act called from main loop: [[ 0  0  0  0  1  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -2  0  0]]
act called from main loop: [[ 600  135    0    0   26   17 -227  155    6   18    0]]
agent will remember: [[ 600  135    0    0   26   17 -227  155    6   18    0]]
act called from main loop: [[ 603  133    0    0    3    2 -236  152   -6    5    0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -6  5  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -6  5  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -6  5  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -6  5  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -6  5  0]]
act called from main loop: [[ 0  0  0  0  3 

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 204   1   0 514  17  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main lo

act called from main loop: [[0 0 0 0 2 2 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 2 2 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 2 2 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 2 2 0 0 3 1 0]]
act called from main loop: [[455 157   0   0  19   5  71  33  37  16   0]]
agent will remember: [[455 157   0   0  19   5  71  33  37  16   0]]
act called from main loop: [[454 158   0   0  -1  -1  69  24  -3   8   0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0 -1 -1  0  0 -3  

act called from main loop: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  1  0  0]]
act called from main loop: [[388 182   0   0  14  -3  96  48  -2 -22   0]]
agent will remember: [[388 182   0   0  14  -3  96  48  -2 -22   0]]
act called from main loop: [[390 176   0   0   2   6  89  62  -5  -8   0]]
act called from main loop: [[ 0  0  0  0  2  6  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  6  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  6  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  6  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  6  0  0 -5

Game is finished, 
 your final reward is: 164.42583333333332, duration was 349 timesteps
 0.1325 - 0.95 - [[172 176   0   0   3   0 432   9 -10  -3   0]]
not done yet, target : nan
 0.4666666666666667 - 0.95 - [[ 347  120    0    0    3    1 -115   75    0   -7    0]]
not done yet, target : nan
 0.37666666666666665 - 0.95 - [[292 218   0   0   2   0 180 -32 -19  32   0]]
not done yet, target : nan
 0.3858333333333333 - 0.95 - [[388 195   0   0   1   1 152 -15  -3  -2   0]]
not done yet, target : nan
 0.47333333333333333 - 0.95 - [[284 242   0   0   0 -51  32 -64   0  -5   0]]
not done yet, target : nan
 0.3358333333333333 - 0.95 - [[ 478  268    0    0    4   -2 -179  -18    0   -6    0]]
not done yet, target : nan
 0.39166666666666666 - 0.95 - [[312 182   0   0   4   1 141 -11 -19   0   0]]
not done yet, target : nan
 0.4475 - 0.95 - [[435 178   0   0  -1   0  79 -16  10  -2   0]]
not done yet, target : nan
 0.37166666666666665 - 0.95 - [[ 428  254    0    0   -1    0 -113  -41   -4  

act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[350 185   0   0  42  -5  92  -1 -28  -6   0]]
agent will remember: [[350 185   0   0  42  -5  92  -1 -28  -6   0]]
act called from main loop: [[348 186   0   0  -2  -1 103  -2   9   0   0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  9  0  0]]
act called fr

act called from main loop: [[  0   0   0   0   2   1   0   0 -19  -1   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0 -19  -1   0]]
act called from main loop: [[310 245   0   0  -8 -27 162 -42  47 -17   0]]
agent will remember: [[310 245   0   0  -8 -27 162 -42  47 -17   0]]
act called from main loop: [[310 245   0   0   0   0 161 -41  -1  -1   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1 -1  0]]
act called from main loop: [[ 0  

act called from main loop: [[ 433  220    0    0   42    9 -149   44   21  -20    0]]
agent will remember: [[ 433  220    0    0   42    9 -149   44   21  -20    0]]
act called from main loop: [[ 314  220    0    0 -119    0  -26   46    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
state that the model will use to predict action: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0    0    0 -119    0    0    0    4   -2    0]]
act called from main loop: [[   0    0

act called from main loop: [[594 239   0   0  32 -17 -21   1  37 -30   0]]
agent will remember: [[594 239   0   0  32 -17 -21   1  37 -30   0]]
act called from main loop: [[596 239   0   0   2   0 -20 -17   3  18   0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 18  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 18  0]]
Game is finished, 
 your final reward is: 185.87833333333333, duration was 393 timesteps
 0.225 - 0.95 - [[227 191   0   0   1   0 327   3  -2   2   0]]
not done yet, target : nan
 0.43916666666666665 - 0.95 - [[342 224   0   0  -3  -4  54  19   6 -37   0]]
not done yet, target : nan
 0.4741666666666667 - 0.95 - [[367 162   0   0   3   0 -15 -16  10   1   0]]
not done yet, target : nan
 0.4575 - 0.95 - [[ 574  152    0    0    1    1 -207  156   -2    0    0]]
not done yet, target : nan
 0.47333333333333333 - 0.95 - [[284 242   0   0   0 -51  32 -64   0  -5   0]]
not done yet, target : nan
 0.3825 - 0.95 - [[374 179   0   0   0  -1 112  29   1  

act called from main loop: [[304 164   0   0  28   9 151  23 -67  -2   0]]
agent will remember: [[304 164   0   0  28   9 151  23 -67  -2   0]]
act called from main loop: [[309 162   0   0   5   2 141  27  -5  -2   0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -5 -2  0]]
act called fr

act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -5  0]]
act called from main loop: [[ 617   57    0    0   36   13 -162   81   53    5    0]]
agent will remember: [[ 617   57    0    0   36   13 -162   81   53    5    0]]
act called from main loop: [[ 619   56    0    0    2    1 -160   77    4    5    0]]
act called from main loop: [[0 0 0 0 2 1 0 0 4 5 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 4 5 0]]
ac

act called from main loop: [[ 0  0  0  0  3 -2  0  0 -4  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -4  0  0]]
act called from main loop: [[237 213   0   0  26  -9 287 -15 -40  -8   0]]
agent will remember: [[237 213   0   0  26  -9 287 -15 -40  -8   0]]
act called from main loop: [[240 228   0   0   3 -15 272 -29 -12  -1   0]]
act called from main loop: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
state that the model will use to predict action: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
state that the model will use to predict action: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   3 -15   0   0 -12  -1   0]]
act calle

act called from main loop: [[  0   0   0   0   1  -2   0   0   6 -18   0]]
act called from main loop: [[  0   0   0   0   1  -2   0   0   6 -18   0]]
act called from main loop: [[ 377  399    0    0   27  -30 -110  -97  -36  -16    0]]
agent will remember: [[ 377  399    0    0   27  -30 -110  -97  -36  -16    0]]
act called from main loop: [[ 380  400    0    0    3   -1 -115  -97   -2   -1    0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -2 -1  0]]
act called from main l

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[160 185   0   0  23   2 457   8 -46   2   0]]
agent will remember: [[160 185   0   0  23   2 457   8 -46   2   0]]
act called from main loop: [[165 186   0   0   5  -1 446  17  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -6 -10   0]]


act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
state that the model will use to predict action: [[  0   0   0   0  -1   4   0   0 -27  11   0]]
act called from main loop: [[328 215   0   0  35  49 105 -17 -16  42   0]]
agent will remember: [[328 215   0   0  35  49 105 -17 -16  42   0]]
act calle

act called from main loop: [[0 0 0 0 4 0 0 0 4 0 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 4 0 0]]
act called from main loop: [[371 177   0   0  37  -9 -23 -13  40  -9   0]]
agent will remember: [[371 177   0   0  37  -9 -23 -13  40  -9   0]]
act called from main loop: [[373 179   0   0   2  -2 -23 -10   2  -5   0]]
state that the model will use to predict action: [[373 179   0   0   2  -2 -23 -10   2  -5   0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  2 

act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[  0   0   0   0   2  -1   0   0  -9 -16   0]]
act called from main loop: [[274 195   0   0  44 -11 251   7 -36   4   0]]
agent will remember: [[27

act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  1 -5  0]]
act called from main loop: [[ 344  248    0    0 -185   -2  -31  -15   43  -14    0]]
agent will remember: [[ 344  248    0    0 -185   -2  -31  -15   43  -14    0]]
act called from main loop: [[348 248   0   0   4   0 -34 -15   1   0   0]]
act called from main loop: [[0 0 0 0 4 0 0 0 1 0 0]]
ac

act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act cal

agent will remember: [[416 112   0   0  32  14 -85  85 -42   9   0]]
act called from main loop: [[420 112   0   0   4   0 -80  83   9   2   0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
state that the model will use to predict action: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
state that the model will use to predict action: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 9 2 0]]
act called from m

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 665 213   1   0 528  26  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[162 182   0   0 

act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -3  6  0]]
act called from main loop: [[ 493  227    0    0   49   -8 -

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main lo

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  2  0]]
act called from main loop: [[ 494  180    0    0   50   -5 -

act called from main loop: [[ 0  0  0  0  4  0  0  0  1 11  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  1 11  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 11  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 11  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 11  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 11  0]]
act called from main loop: [[  30   30    0    0 -562  187  584  169   40    3    0]]
agent will remember: [[  30   30    0    0 -562  187  584  169   40    3    0]]
act called from main loop: [[ 30  30   0   0   0   0 587 177   3  -8   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  3 -8  0]]
Game is finished, 
 your final re

act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3  1  0]]
act called from main loop: [[305 169   0   0  26   3 156   5 -58   3   0]]
agent will remember: [[305 169   0   0  26   3 156   5 -58   3   0]]
act called from main loop: [[308 168   0   0   3   1 168   6  15   0   0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 15  0  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 15  0  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 15  0  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 15  0  0]]
act called fr

act called from main loop: [[  0   0   0   0   4   1   0   0   4 -21   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0   4 -21   0]]
state that the model will use to predict action: [[  0   0   0   0   4   1   0   0   4 -21   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0   4 -21   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0   4 -21   0]]
act called from main loop: [[564 113   0   0  47  14 -17 124  67  12   0]]
agent will remember: [[564 113   0   0  47  14 -17 124  67  12   0]]
act called from main loop: [[567 112   0   0   3   1 -17 143   3 -18   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0   3 -18   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0   3 -18   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0   3 -18   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0   3 -18   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0   3 -18   0]]
act calle

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[164 179   0   0  27   8 449   5 -38  12   0]]
agent will remember: [[164 179   0   0  27   8 449   5 -38  12   0]]
act called from main loop: [[169 180   0   0   5  -1 437   5  -7  -1   0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -7

act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  1  0]]
act called from main loop: [[ 506   94    0    0   28   17 -182   23   -6   28    0]]
agent will remember: [[ 506   94    0    0   28   17 -182   23   -6   28    0]]
act called from main loop: [[ 510   94    0    0    4    0 -185   

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 667 196   1   0 530   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[167 180   0   0 

act called from main loop: [[450 186   0   0  42  18   1  35 -24 -41   0]]
agent will remember: [[450 186   0   0  42  18   1  35 -24 -41   0]]
act called from main loop: [[452 186   0   0   2   0  21  34  22   1   0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 22  1  0]]
act called fr

act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7  6  0]]
act called from main loop: [[552 170   0   0 -92 126 -39 -14  22 -25   0]]
agent will remember: [[552 170   0   0 -92 126 -39 -14  22 -25   0]]
act called from main loop: [[555 172   0   0   3  -2 -37  -5   5 -11   0]]
act called from main loop: [[  0   0   0   0   3  -2   

act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  5  0]]
act called from main loop: [[270 202   0   0  41 -14 255  -6 -41 -10   0]]
agent will remember: [[270 202   0   0  41 -14 255  -6 -41 -10   0]]
act called from main loop: [[272 203   0   0   2  -1 249  -8  -4   1   0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -4  1  0]]
act called fr

act called from main loop: [[396 217   0   0  54   5  64  30 -16  11   0]]
agent will remember: [[396 217   0   0  54   5  64  30 -16  11   0]]
act called from main loop: [[399 216   0   0   3   1  68  25   7   6   0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 7 6 0]]
act called from m

act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0  3  6  0  0 -1 -1  0]]
act called from main loop: [[670 180   0   0  47  12 -75 -40  47  23   0]]
agent will remember: [[670 180   0   0  47  12 -75 -40  47  23   0]]
act called from main loop: [[671 179   0   0   1   1 -64 -36  12  -3   0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 12 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 12 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 12 -3  0]]
act called fr

act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[30 30  0  0  0  0  0  0  0  0  0]]
Game is finished, 
 your final reward is: 176.3025, duration was 375 timesteps
agent will remember: [[30 30  0  0  0  0  0  0  0  0  0]]
 0.39416666666666667 - 0.95 - [[ 401  164    0    0    3    0 -137   10    7    8    0]]
not done yet, target : nan
 0.225 - 0.95 - [[226 184   0   0  -1   1 328   2   1   1   0]]
not done yet, 

act called from main loop: [[  0   0   0   0   4   1   0   0 -11  -2   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0 -11  -2   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0 -11  -2   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0 -11  -2   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0 -11  -2   0]]
act called from main loop: [[  0   0   0   0   4   1   0   0 -11  -2   0]]
act called from main loop: [[303 172   0   0  24   3 152  37 -61   7   0]]
agent will remember: [[303 172   0   0  24   3 152  37 -61   7   0]]
act called from main loop: [[307 170   0   0   4   2 146  42  -2  -3   0]]
act called from main loop: [[ 0  0  0  0  4  2  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4  2  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4  2  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4  2  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4  2  0  0 -2 -3  0]]
act called 

act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
state that the model will use to predict action: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   0    0    0    0    2    0    0    0 -367  122    0]]
act called from main loop: [[   

act called from main loop: [[ 0  0  0  0 -2 -8  0  0  2 -2  0]]
act called from main loop: [[ 0  0  0  0 -2 -8  0  0  2 -2  0]]
act called from main loop: [[455 173   0   0  -2  15  65   5   3 -29   0]]
agent will remember: [[455 173   0   0  -2  15  65   5   3 -29   0]]
act called from main loop: [[455 192   0   0   0 -19  67 -24   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19   0   0   2  10   0]]
act called from main loop: [[  0   0   0   0   0 -19 

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 204   1   0 514  17  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
s

act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  7  0]]
act called from main loop: [[ 432  125    0    0   34    9 -104   46  -41   -1    0]]
agent will remember: [[ 432  125    0    0   34    9 -104   46  -41   -1    0]]
act called from main loop: [[ 434  125    0    0    2    0 -108   40   -2    6    0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2 

act called from main loop: [[ 717  154    0    0  152  -30 -143  -48   34    5    0]]
agent will remember: [[ 717  154    0    0  152  -30 -143  -48   34    5    0]]
act called from main loop: [[618 118   0   0 -99  36 -27  -6  17  -6   0]]
act called from main loop: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
act called from main loop: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
act called from main loop: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
state that the model will use to predict action: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
act called from main loop: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
state that the model will use to predict action: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
act called from main loop: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
act called from main loop: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
state that the model will use to predict action: [[  0   0   0   0 -99  36   0   0  17  -6   0]]
act called from ma

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
a

act called from main loop: [[  0   0   0   0   4  -1   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -12  -1   0]]
state that the model will use to predict action: [[  0   0   0   0   4  -1   0   0 -12  -1   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -12  -1   0]]
act called from main loop: [[450 184   0   0  39  19  44  42  28 -38   0]]
state that the model will use to predict action: [[450 184   0   0  39  19  44  42  28 -38   0]]
agent will remember: [[450 184   0   0  39  19  44  42  28 -38   0]]
act called from main loop: [[450 184   0   0   0   0  76  32  32  10   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 32 10  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 32 10  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 32 10  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 32 10  0]]
act calle

act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0 18  0  0]]
act called from main loop: [[ 562  236    0    0 -154    2  -27  -13   37    7    0]]
state that the model will use to predict action: [[ 562  236    0    0 -154    2  -27  -13   37    7    0]]
agent will remember: [[ 562  236    0    0 -154    2  -27  -13   37    7    0]]


act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
state that the model will use to predict action: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -11   0   0]]
act

agent will remember: [[346 178   0   0  42  49  85  23 -53  27   0]]
act called from main loop: [[350 179   0   0   4  -1  90  32   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   4  -1   0   0   9 -10   0]]
act calle

act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0 -3 -1  0]]
act called from main loop: [[330 164   0   0  53  -2 -31 115  43  -9   0]]
state that the model will use to predict action: [[330 164   0   0  53  -2 -31 115  43  -9   0]]
agent will remember: [[330 164   0   0  53  -2 -31 115  43  -9   0]]
act called from main loop: [[ 479  165    0    0  149   -1 -176  120    4   -6    0]]
act called from main loop: [[  0   0   0   0 149  -1   0   0   4  -6   0]]


act called from main loop: [[ 768  205    0    0  231  -11 -231   -1   25   12    0]]
agent will remember: [[ 768  205    0    0  231  -11 -231   -1   25   12    0]]
act called from main loop: [[ 770  209    0    0    2   -4 -229    4    4   -9    0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main loop: [[ 0  0  0  0  2 -4  0  0  4 -9  0]]
act called from main l

act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[226 186   0   0  60   0 338  12 -37  -3   0]]
agent will remember: [[226 186   0   0  60   0 338  12 -37  -3   0]]
act called from main loop: [[228 186   0   0   2   0 332  11  -4   1   0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -4  1  0]]
act called fr

act called from main loop: [[ 0  0  0  0  6 -1  0  0 -7 44  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0 -7 44  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0 -7 44  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0 -7 44  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0 -7 44  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0 -7 44  0]]
act called from main loop: [[ 543  204    0    0   39    6 -100  -34  -33   46    0]]
agent will remember: [[ 543  204    0    0   39    6 -100  -34  -33   46    0]]
act called from main loop: [[ 547  204    0    0    4    0 -108  -39   -4    5    0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -4  5  0]]
state that the model will use to predict act

act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0 

act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  0  0]]
act called from main loop: [[275 178   0   0  49  -1 245  28 -47 -11   0]]
agent will remember: [[275 178   0   0  49  -1 245  28 -47 -11   0]]
act called from main loop: [[278 177   0   0   3   1 244  30   2  -1   0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  2 -1  0]]
act called fr

act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 40  0]]
act called from main loop: [[492 115   0   0 -36   6 -21  14  47   1   0]]
state that the model will use to predict action: [[492 115   0   0 -36   6 -21  14  47   1   0]]
agent will remember: [[492 115   0   0 -36   6 -21  14  47   1   0]]
act called from main loop: [[493 115   0   0   1   0 -24  13  -2   1   0]]
state that the model will use to predict action: [[493 115   0   0   1   0 -24  13  -2

act called from main loop: [[137 187 667 195   1   0 530   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act cal

act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0 -3  8  0]]
act called from main loop: [[405 268   0   0  -9  -2 -21  -9  35 -27   0]]
agent will remember: [[405 268   0   0  -9  -2 -21  -9  35 -27   0]]
ac

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 668 195   1   0 531   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict acti

act called from main loop: [[444 189   0   0  32  19  74  41  49 -38   0]]
agent will remember: [[444 189   0   0  32  19  74  41  49 -38   0]]
act called from main loop: [[444 190   0   0   0  -1  69  41  -5  -1   0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -5 -1  0]]
state that th

act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  4 -1  0]]
act called from main loop: [[609 176   0   0  46  14 -30  79  40  -7   0]]
agent will remember: [[609 176   0   0  46  14 -30  79  40  -7   0]]
act called from main loop: [[612 175   0   0   3   1 -28  79   5   1   0]]
act called from main loop: [[0 0 0 0 3 1 0 0 5 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 5 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 5 1 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 5 1 0]]
act called from main loop: [[0 0 0 

act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 12 -6  0]]
act called from main loop: [[224 180   0   0  57   2 339  21 -51  -4   0]]
agent will remember: [[224 180   0   0  57   2 339  21 -51  -4   0]]
act called from main loop: [[226 179   0   0   2   1 333  17  -4   5   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -4  5  0]]
act called fr

act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  0 10  0]]
act called from main loop: [[499 262   0   0  19 -18 -68 -53

act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act cal

act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0  4 -3  0]]
act called from main loop: [[301 178   0   0  -6 -19  15  27 -25  -2   0]]
agent will remember: [[301 178   0   0  -6 -19  15  27 -25  -2   0]]
act called from main loop: [[301 179   0   0   0  -1   5  32 -10  -6   0]]
act called from main loop: [[  0   0   0   0   0  -1   

act called from main loop: [[ 0  0  0  0  3  1  0  0  5 -2  0]]
act called from main loop: [[ 540   72    0    0  152    7 -150   40   26   13    0]]
agent will remember: [[ 540   72    0    0  152    7 -150   40   26   13    0]]
act called from main loop: [[ 544   72    0    0    4    0 -152   36    2    4    0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
state that the model will use to predict action: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
state that the model will use to predict action: [[0 0 0 0 4 0 0 0 2 4 0]]
act called from main loop: [[0 0 0 0 4 0 0 0 2 4 0]]
act 

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[161 184   0   0  24   3 443   7 -50   5   0]]
state that the model will use to predict action: [[161 184   0   0  24   3 443   7 -50   5   0]]
agent will remember: [[161 184   0   0  24   3 443   7 -50   5   0]]
act called from main loop: [[166 184   0   0   5   0 445   7   7   0   0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 7 0 0]]
act called from main loop:

act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
state that the model will use to predict action: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[  0   0   0   0   1  -1   0   0 -37  10   0]]
act called from main loop: [[ 502  207    0    0   50   -3 -143  -87    2   21    0]]
agent will remember: [[ 502  207    0    0   50   -3 -143  -87    2   21    0]]
act called from main loop: [[ 507  207    0    0    5    0 -147  -85    1   -2    0]]
act called from main loop: [[ 0  0  0  0  5  0  0  

act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -6 -1  0]]
act called from main loop: [[675 206   0   0  34 -16 -26 -15

agent will remember: [[172 176   0   0  35  11 430  15 -61   5   0]]
act called from main loop: [[174 175   0   0   2   1 433  32   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   5 -16   0]]
state that the model will use to predict action: [[  0   0   0   0   2   1   0   0   5 -16   0]]
act calle

act called from main loop: [[ 0  0  0  0  1  2  0  0  2 18  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0  2 18  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0  2 18  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0  2 18  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0  2 18  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0  2 18  0]]
act called from main loop: [[497 187   0   0  50  -2 -18  53  32 -65   0]]
agent will remember: [[497 187   0   0  50  -2 -18  53  32 -65   0]]
act called from main loop: [[503 188   0   0   6  -1 -22  45   2   7   0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  2  7  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  2  7  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  2  7  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  2  7  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  2  7  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  2  7  0]]
state that th

act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 16  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -6  0  0]]
act called from main loop: [[276 193   0   0  48  -6 248   5 -39 -10   0]]
agent will remember: [[276 

act called from main loop: [[ 0  0  0  0  3  0  0  0  8 12  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  8 12  0]]
act called from main loop: [[520 130   0   0  26  21 -75  43 -25  51   0]]
agent will remember: [[520 130   0   0  26  21 -75  43 -25  51   0]]
act called from main loop: [[523 130   0   0   3   0 -80  56  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
state that the model will use to predict action: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0  -2 -13   0]]
act called from main loop: [[  

act called from main loop: [[137 187 667 196   1   0 530   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0

act called from main loop: [[ 0  0  0  0 -2  1  0  0 25 -6  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0 25 -6  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0 25 -6  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0 25 -6  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0 25 -6  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0 25 -6  0]]
act called from main loop: [[292 238   0   0 -24 -24 173  -8  -1  -4   0]]
agent will remember: [[292 238   0   0 -24 -24 173  -8  -1  -4   0]]
act called from main loop: [[291 244   0   0  -1  -6 166  -6  -8  -8   0]]
act called from main loop: [[ 0  0  0  0 -1 -6  0  0 -8 -8  0]]
act called from main loop: [[ 0  0  0  0 -1 -6  0  0 -8 -8  0]]
act called from main loop: [[ 0  0  0  0 -1 -6  0  0 -8 -8  0]]
act called from main loop: [[ 0  0  0  0 -1 -6  0  0 -8 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1 -6  0  0 -8 -8  0]]
act called from main loop: [[ 0  0  0  0 -1 -6  0  0 -8

act called from main loop: [[  0   0   0   0   3   0   0   0 -24 -23   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0 -24 -23   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0 -24 -23   0]]
act called from main loop: [[487 236   0   0  35   8 -41  -6  -6  50   0]]
agent will remember: [[487 236   0   0  35   8 -41  -6  -6  50   0]]
act called from main loop: [[485 240   0   0  -2  -4 -38  -9   1  -1   0]]
act called from main loop: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -4  0  0  1 -1  0]]


act called from main loop: [[137 187 651 204   1   0 514  17  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act cal

agent will remember: [[441 198   0   0  34   5  43  12   8 -10   0]]
act called from main loop: [[441 199   0   0   0  -1  44  57   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  0   0   0   0   0  -1   0   0   1 -46   0]]
act called from main loop: [[  

act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 2 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 2 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 2 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 2 0]]
Game is finished, 
 your final reward is: 93.59083333333332, duration was 206 timesteps
 0.4608333333333333 - 0.95 - [[406 192   0   0   2  -3  62 -15   4  -2   0]]
not done yet, target : nan
 0.4766666666666667 - 0.95 - [[372 200   0   0  -1  -1  35  -7  -9  -2   0]]
not done yet, target : nan
 0.4633333333333333 - 0.95 - [[487 256   0   0   3  -2 -26 -18   5   2   0]]
not done yet, target : nan
 0.4

act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  1  0]]
act called from main loop: [[302 186   0   0  21  -3 172 -13

act called from main loop: [[ 0  0  0  0  3  0  0  0  2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  2 -2  0]]
act called from main loop: [[482 231   0   0  44  -7 -31 -13  40  -5   0]]
agent will remember: [[482 231   0   0  44  -7 -31 -13  40  -5   0]]
act called from main loop: [[485 232   0   0   3  -1 -25 -14   9   0   0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  9  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  9  0  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  9  0  0]]
state that the model will use to predict action: [[ 0  

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 667 238   1   0 530  51  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1

act called from main loop: [[  0   0   0   0   4  -1   0   0 -21   1   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -21   1   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -21   1   0]]
act called from main loop: [[  0   0   0   0   4  -1   0   0 -21   1   0]]
act called from main loop: [[440 192   0   0  32  18  83  49  63 -38   0]]
state that the model will use to predict action: [[440 192   0   0  32  18  83  49  63 -38   0]]
agent will remember: [[440 192   0   0  32  18  83  49  63 -38   0]]
act called from main loop: [[439 192   0   0  -1   0  70  45 -14   4   0]]
act called from main loop: [[  0   0   0   0  -1   0   0   0 -14   4   0]]
act called from main loop: [[  0   0   0   0  -1   0   0   0 -14   4   0]]
act called from main loop: [[  0   0   0   0  -1   0   0   0 -14   4   0]]
act called from main loop: [[  0   0   0   0  -1   0   0   0 -14   4   0]]
act called from main loop: [[  0   0   0   0  -1   0   0   0 -14   4   0]]
act calle

act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 10 -2  0]]
act called from main loop: [[432 212   0   0 -27   6 128   8  23  -9   0]]
agent will remember: [[432 212   0   0 -27   6 128   8  23  -9   0]]
act called from main loop: [[428 211   0   0  -4   1 129   8  -3   1   0]]
act called 

act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[495  92   0   0  -9  21 -86 156 -51  15   0]]
agent will remember: [[495  92   0   0  -9  21 -86 156 -51  15   0]]
act called from main loop: [[494  90   0   0  -1   2 -89 174  -4 -16   0]]
act called fr

act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
state that the model will use to predict action: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
state that the model will use to predict action: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
act called from main loop: [[ 0  0  0  0 -4  0  0  0  4  2  0]]
state that the model will use to predict action: [[ 0  0  0 

act called from main loop: [[266 166   0   0  44   6 253  26 -36   2   0]]
agent will remember: [[266 166   0   0  44   6 253  26 -36   2   0]]
act called from main loop: [[266 165   0   0   0   1 252  26  -1   1   0]]
state that the model will use to predict action: [[266 165   0   0   0   1 252  26  -1   1   0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -1  1  0]]
act called from main l

act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
state that the model will use to predict action: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0 -13  -1   0]]
sta

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[166 180   0   0  29   7 436  12 -61   3   0]]
agent will remember: [[166 180   0   0  29   7 436  12 -61   3   0]]
act called from main loop: [[170 180   0   0   4   0 429  13  -3  -1   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -3 -1  0]]
act called 

act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  0 13  0]]
act called from main loop: [[324 181   0   0 -18 -19  14  31 -32 -12   0]]
agent will remember: [[324 181   0   0 -18 -19  14  31 -32 -12   0]]
act called from main loop: [[320 173   0   0  -4   8  -3  40 -21  -1   0]]
act called from main loop: [[  0   0   0   0  -4   8   0   0 -21  -1   0]]
act called from main loop: [[  0   0   0   0  -4   8   0   0 -21  -1   0]]
act called from main loop: [[  0 

act called from main loop: [[459 103   0   0  41  11 -24  35  45 -14   0]]
agent will remember: [[459 103   0   0  41  11 -24  35  45 -14   0]]
act called from main loop: [[462 101   0   0   3   2 -22  58   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
state that the model will use to predict action: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act called from main loop: [[  0   0   0   0   3   2   0   0   5 -21   0]]
act calle

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from 

act called from main loop: [[  0   0   0   0   1  -5   0   0 -19   0   0]]
state that the model will use to predict action: [[  0   0   0   0   1  -5   0   0 -19   0   0]]
act called from main loop: [[  0   0   0   0   1  -5   0   0 -19   0   0]]
act called from main loop: [[446 197   0   0  38   3  -3  63  -3 -40   0]]
agent will remember: [[446 197   0   0  38   3  -3  63  -3 -40   0]]
act called from main loop: [[446 196   0   0   0   1  12  67  15  -3   0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 15 -3  0]]
act called from main l

act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -9  0]]
act called from main loop: [[545 191   0   0  43   4 -34 -14  37  12   0]]
agent will remember: [[545 191   0   0  43   4 -34 -14  37  12   0]]
act called from main loop: [[549 193   0   0   4  -2 -33 -17   5   1   0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0  5

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[160 187   0   0  23   0 442   8 -65   0   0]]
state that the model will use to predict action: [[160 187   0   0  23   0 442   8 -65   0   0]]
agent will remem

act called from main loop: [[  0   0   0   0 -18   8   0   0  -2  -7   0]]
act called from main loop: [[  0   0   0   0 -18   8   0   0  -2  -7   0]]
act called from main loop: [[  0   0   0   0 -18   8   0   0  -2  -7   0]]
act called from main loop: [[  0   0   0   0 -18   8   0   0  -2  -7   0]]
act called from main loop: [[ 434  237    0    0   63  -12 -105  -28  -49    2    0]]
agent will remember: [[ 434  237    0    0   63  -12 -105  -28  -49    2    0]]
act called from main loop: [[435 237   0   0   1   0 -99 -32   7   4   0]]
act called from main loop: [[0 0 0 0 1 0 0 0 7 4 0]]
act called from main loop: [[0 0 0 0 1 0 0 0 7 4 0]]
act called from main loop: [[0 0 0 0 1 0 0 0 7 4 0]]
state that the model will use to predict action: [[0 0 0 0 1 0 0 0 7 4 0]]
act called from main loop: [[0 0 0 0 1 0 0 0 7 4 0]]
act called from main loop: [[0 0 0 0 1 0 0 0 7 4 0]]
state that the model will use to predict action: [[0 0 0 0 1 0 0 0 7 4 0]]
act called from main loop: [[0 0 0 0 1 0 0 0

act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
state that the model will use to predict action: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[   0    0    0    0  162 -138    0    0    2    8    0]]
act called from main loop: [[ 588  375    0    0    6  -34 -173 -161   30  -26    0]]
agent will remember: [[ 588  375    0    0    6  -34 -173 -161   30  -26    0]]
act called from main loop: [[ 589  378

act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2 -5  0]]
act called from main loop: [[270 170   0   0  46   2 252  17 -37  -3   0]]
agent will remember: [[270 170   0   0  46   2 252  17 -37  -3   0]]
act called from main loop: [[275 168   0   0   5   2 235  19 -12   0   0]]
act called from main loop: [[  0   0   0   0   5   2   0   0 -12   0   0]]
act called from main loop: [[  0   0   0   0   5   2   0   0 -12   0   0]]
state that the model will use to 

act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4 -4  0]]
act called from main loop: [[ 478   75    0    0   30   20 -191   66  -20   24    0]]
state that the model will use to predict action: [[ 478   75    0    0   30   20 -191   66  -20   24  

act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act cal

act called from main loop: [[ 0  0  0  0  4 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 -2  0  0]]
act called from main loop: [[440 188   0   0  28  17  97  58  60 -48   0]]
state that the model will use to predict action: [[440 188   0   0  28  17  97  58  60 -48   0]]
agent will remember: [[440 188   0   0  28  17  97  58  60 -48   0]]
act called from main loop: [[440 188   0   0   0   0  84  51 -13   7   0]]
act called from main loop: [[  0   0   0   0   0   0   0   0 -13   7   0]]
act called from main loop: [[  0   0   0   0   0   0   0   0 -13   7   0]]
act called from main loop: [[  0   0   0   0   0   0   0   0 -13   7   0]]
act called from main loop: [[  0   0   0   0   0   0   0   0 -13   7   0]]


act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
state that the model will use to predict action: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
state that the model will use to predict action: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[  0   0   0   0  -5   3   0   0 -21   4   0]]
act called from main loop: [[506 163   0   0  62   3  -8

act called from main loop: [[ 0  0  0  0  2  1  0  0  0 -4  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  0 -4  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 -4  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 -4  0]]
act called from main loop: [[ 774  165    0    0  140    1 -138    0   30   -6    0]]
agent will remember: [[ 774  165    0    0  140    1 -138    0   30   -6    0]]
act called from main loop: [[ 776  167    0    0    2   -2 -138  -15    2   13    0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 13  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -2  0  0  2 13  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 13  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 13  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  2 13  0]]
state that the model will use to predict action: [[ 0  0  0  0  

act called from main loop: [[162 184   0   0  25   3 440  11 -60   1   0]]
state that the model will use to predict action: [[162 184   0   0  25   3 440  11 -60   1   0]]
agent will remember: [[162 184   0   0  25   3 440  11 -60   1   0]]
act called from main loop: [[166 185   0   0   4  -1 430  11  -6  -1   0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 

act called from main loop: [[441 206   0   0  32  12  56  40  22 -35   0]]
agent will remember: [[441 206   0   0  32  12  56  40  22 -35   0]]
act called from main loop: [[441 206   0   0   0   0  89  41  33  -1   0]]
state that the model will use to predict action: [[441 206   0   0   0   0  89  41  33  -1   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 33 -1  0]]
act called from main loop: [[ 0  0  0  0  

agent will remember: [[ 733  220    0    0  254  -12 -231   54   45   -2    0]]
act called from main loop: [[ 737  220    0    0    4    0 -248   49  -13    5    0]]
state that the model will use to predict action: [[ 737  220    0    0    4    0 -248   49  -13    5    0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -13   5   0]]
act called from main loop: [[

act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3  0  0]]
state that the model will use to predi

act called from main loop: [[  0   0   0   0  -1   3   0   0 -17  -5   0]]
act called from main loop: [[  0   0   0   0  -1   3   0   0 -17  -5   0]]
act called from main loop: [[  0   0   0   0  -1   3   0   0 -17  -5   0]]
act called from main loop: [[  0   0   0   0  -1   3   0   0 -17  -5   0]]
state that the model will use to predict action: [[  0   0   0   0  -1   3   0   0 -17  -5   0]]
act called from main loop: [[458 137   0   0  30  17  23 119   6   0   0]]
agent will remember: [[458 137   0   0  30  17  23 119   6   0   0]]
act called from main loop: [[461 136   0   0   3   1  15 104  -5  16   0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -5 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -5 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -5 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -5 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -5 16  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -5 16  0]]


act called from main loop: [[ 0  0  0  0  0  0  0  0 20  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 20  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 20  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 20  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 20  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 20  0  0]]
act called from main loop: [[526  30   0   0  15   0  36  49  23  25   0]]
agent will remember: [[526  30   0   0  15   0  36  49  23  25   0]]
act called from main loop: [[526  30   0   0   0   0  40  47   4   2   0]]
act called from main loop: [[0 0 0 0 0 0 0 0 4 2 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 4 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 4 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 4 2 0]]
Game is finished, 
 your final reward is: 98.14166666666667, duration was 214 timesteps
 0.4841666666666667 - 0.95 - [[451 198   0   0   3   1 -19   0   0   1   0]

act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[306 177   0   0  25   2 169   8 -41   8   0]]
state

act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 11 -2  0]]
act called from main loop: [[393 163   0   0  47  -1  27  40 -15 -11   0]]
agent will remember: [[393 163   0   0  47  -1  27  40 -15 -11   0]]
act called from main loop: [[393 164   0   0   0  -1  25  36  -2   3   0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -2  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -2  3  0]]
act called from main loop: [[ 0  

act called from main loop: [[ 408   90    0    0 -168   17  -41   76   10   24    0]]
agent will remember: [[ 408   90    0    0 -168   17  -41   76   10   24    0]]
act called from main loop: [[ 590   90    0    0  182    0 -221   79    2   -3    0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
state that the model will use to predict action: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
state that the model will use to predict action: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[  0   0   0   0 182   0   0   0   2  -3   0]]
act called from main loop: [[

act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -13  -6   0]]
act called from main loop: [[  0   0   0   0   5   0   0

act called from main loop: [[ 446   87    0    0   42    7 -159   96  -47   13    0]]
agent will remember: [[ 446   87    0    0   42    7 -159   96  -47   13    0]]
act called from main loop: [[ 449   87    0    0    3    0 -161   90    1    6    0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 1 6 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 1 6 0]]
act called from main loo

act called from main loop: [[137 187 663 196   1   0 526   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: 

act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
state that the model will use to predict action: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
state that the model will use to predict action: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0   0 -18   5   0]]
act called from main loop: [[  0   0   0   0  -4   0   0

act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0  0 -1  0]]
act called from main loop: [[505 168   0   0  47 -12 -79  17

act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  7  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -1  7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -1  7  0]]
act called from main loop: [[661 133   0   0  31  -7 -34 -14  25  -7   0]]
agent will remember: [[661 133   0   0  31  -7 -34 -14  25  -7   0]]
act called from main loop: [[662 136   0   0   1  -3 -38 -10  -3  -7   0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -3 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -3  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0 -3 -7  0]]
act called from main loop: [[ 0  

agent will remember: [[163 179   0   0  26   8 437  17 -68   4   0]]
act called from main loop: [[168 180   0   0   5  -1 427  17  -5  -1   0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  5 

act called from main loop: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 -9  0  0]]
act called from main loop: [[ 388   90    0    0    0   19 -102   79  -48   15    0]]
agent will remember: [[ 388   90    0    0    0   19 -102   79  -48   15    0]]
act called from main loop: [[ 388   88    0    0    0    2 -106   98   -4  -17    0]]
state that the model will use to predict action: [[ 388   88    0    0    0    2 -106   98   -4  -17    0]]
act called from main loop: [[  0   0   0   0   0   2   0   0  -4 -17   0]]
act called from main loop: [[  0   0   0   0   0   2   0   0  -4 -17   0]]
act called from main

act called from main loop: [[  0   0   0   0   1   0   0   0   1 -18   0]]
state that the model will use to predict action: [[  0   0   0   0   1   0   0   0   1 -18   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   1 -18   0]]
act called from main loop: [[438  30   0   0  16   2 -30  19  10  49   0]]
agent will remember: [[438  30   0   0  16   2 -30  19  10  49   0]]
act called from main loop: [[434  36   0   0  -4  -6 -25  16   1  -3   0]]
act called from main loop: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0 -4 -6  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0 -

act called from main loop: [[276 190   0   0  48  -7 237   3 -40  -5   0]]
agent will remember: [[276 190   0   0  48  -7 237   3 -40  -5   0]]
act called from main loop: [[280 189   0   0   4   1 236   2   3   2   0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 2

act called from main loop: [[0 0 0 0 4 1 0 0 4 4 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 4 4 0]]
act called from main loop: [[525 140   0   0  29  17 -84  60 -20  34   0]]
agent will remember: [[525 140   0   0  29  17 -84  60 -20  34   0]]
act called from main loop: [[528 140   0   0   3   0 -84  56   3   4   0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 3 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 3 4 0]]
act called from main loop:

act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
state that the model will use to predict action: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predi

act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
state that the model will use to predict action: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
state that the model will use to predict action: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
state that the model will use to predict action: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0   0   0   0  -3   1   0   0   9 -18   0]]
act called from main loop: [[  0  

act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -4 -4  0]]
act called from main loop: [[304 209   0   0 -11 -37  22 -44 -32  -3   0]]
state that the model will use to predict action: [[304 209   0   0 -1

act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0  0  7  7  0  0  3 -3  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[ 0  0  0  0  6  1  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  6  1  0  0 -3 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  6  1  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  6  1  0  0 -3 -7  0]]
act called from main loop: [[ 0  0  0  0  6  1  0  0 -3 -7  0]]
act called from main loop: [[ 668  138    0    0  134    7 -116   94   60    5    0]]
agent will remember: [[ 668  138    0    0  134    7 -116   94   60    5    0]]
act called from main loop: [[ 673  136    0    0    5    2 -125   91   -4    5    0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  2  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4  5  0]]
act called from main loop: [[ 0  0  0  0  5  2  0  0 -4  5  0]]


act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1

act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  4  5  0]]
act called from main loop: [[444 188   0   0  33  15  79  48  47 -47   0]]
agent will remember: [[444 188   0   0  33  15  79  48  47 -47   0]]
act called from main loop: [[443 189   0   0  -1  -1  82  39   2   8   0]]
state that th

act called from main loop: [[ 437  220    0    0 -183   -7  -21   27   37   -6    0]]
agent will remember: [[ 437  220    0    0 -183   -7  -21   27   37   -6    0]]
act called from main loop: [[441 220   0   0   4   0 -16  35   9  -8   0]]
state that the model will use to predict action: [[441 220   0   0   4   0 -16  35   9  -8   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  9 -8  0]]


act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[164 181   0   0  27   6 438  10 -61   4   0]]
agent will remember: [[164 181   0   0  27   6 438  10 -61   4   0]]
act called from main loop: [[169 181   0   0   5   0 430  15  -3  -5   0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3 -5  0]]
act called 

act called from main loop: [[446 157   0   0  36  29  22  70   1 -25   0]]
agent will remember: [[446 157   0   0  36  29  22  70   1 -25   0]]
act called from main loop: [[448 154   0   0   2   3  38  92  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
act called from main loop: [[  0   0   0   0   2   3   0   0  18 -19   0]]
state that the model will use t

act called from main loop: [[ 703   95    0    0   45    3 -142  140   55   16    0]]
agent will remember: [[ 703   95    0    0   45    3 -142  140   55   16    0]]
act called from main loop: [[ 703   94    0    0    0    1 -136  139    6    2    0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
state that the model will use to predict action: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
act called from main loop: [[0 0 0 0 0 1 0 0 6 2 0]]
state that the model will use to predict action: [[0 0 0 0 0 1 0 0 6 2 0]]
Game is finished, 
 your final reward is: 93.68083333333333, duration was 204 timesteps
 0.4608333333333333 - 0.95 - [[ 608   57    0    0    3    2 -147  100   -1    3    0]

act called from main loop: [[  0   0   0   0   2   0   0   0 -10   1   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -10   1   0]]
state that the model will use to predict action: [[  0   0   0   0   2   0   0   0 -10   1   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -10   1   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -10   1   0]]
act called from main loop: [[273 174   0   0  49   1 244  20 -36   5   0]]
agent will remember: [[273 174   0   0  49   1 244  20 -36   5   0]]
act called from main loop: [[278 173   0   0   5   1 236  21  -3   0   0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  

act called from main loop: [[  0   0   0   0   3   0   0   0 -38   1   0]]
state that the model will use to predict action: [[  0   0   0   0   3   0   0   0 -38   1   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0 -38   1   0]]
state that the model will use to predict action: [[  0   0   0   0   3   0   0   0 -38   1   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0 -38   1   0]]
act called from main loop: [[  0   0   0   0   3   0   0   0 -38   1   0]]
act called from main loop: [[512 108   0   0  31  22 -57  34  -9  77   0]]
agent will remember: [[512 108   0   0  31  22 -57  34  -9  77   0]]
act called from main loop: [[514 108   0   0   2   0 -59  44   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0  

act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0

act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -25  -2   0]]
act called from main loop: [[  0   0   0   0   5   0   0

agent will remember: [[514 140   0   0  52   6 -24  66  63  -2   0]]
act called from main loop: [[521 140   0   0   7   0 -44  75 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act called from main loop: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
state that the model will use to predict action: [[  0   0   0   0   7   0   0   0 -13  -9   0]]
act calle

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
state that the model will use to predict action: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called

agent will remember: [[338 184   0   0 -15 -19 103   7  20 -21   0]]
act called from main loop: [[336 183   0   0  -2   1 106   6   1   2   0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
act called from main loop: [[ 0  0  0  0 -2  1  0  0  1  2  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 

agent will remember: [[398  99   0   0   3  29  70 119  -2 -47   0]]
act called from main loop: [[398  96   0   0   0   3  71 122   1   0   0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 0 3 0 0 1 0 0]]
act called from main loop: [[405  78   0   0   7  18  49 113 -15  27   0]]
state that the model will use to predict action: [[405  78   0   0   7  18  49 113 -15  27  

act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act cal

act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -7  6  0]]
act called from 

act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[  0   0   0   0   8  -2   0   0 -18   1   0]]
act called from main loop: [[370 257   0   0 -70  -6 -33  56   2  24   0]]
agent will remember: [[370 257   0   0 -70  -6 -33  56   2  24   0]]
act called from main loop: [[363 257   0   0  -7   0 -36  57 -10  -1   0]]
act called from main loop: [[  0   0   0   0  -7   0   0   0 -10  -1   0]]
state that the model will use to predict action: [[  0   0   0   0  -7   0   0   0 -10  -1   0]]
act calle

act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 20 -1  0]]
act called from 

act called from main loop: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0 -5  2  0]]
act called from main loop: [[276 176   0   0  52  -4 246   6 -29  -5   0]]
agent will remember: [[276 176   0   0  52  -4 246   6 -29  -5   0]]
act called from main loop: [[279 176   0   0   3   0 234   5  -9   1   0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -9  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -9  1  0]]
act called 

act called from main loop: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 -6  7  0]]
act called from main loop: [[445 136   0   0 -61  18  -9  69 -50  17   0]]
agent will remember: [[445 136   0   0 -61  18  -9  69 -50  17   0]]
act called from main loop: [[ 539  135    0    0   94    1 -108   72   -5   -2    0]]
act called from main loop: [[ 0  0  0  0 94  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0 94  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0 94  1  0  0 -5 -2  0]]
state that the model w

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 662 193   1   0 525   6  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1

act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -6 -1  0]]
act called from 

agent will remember: [[417 242   0   0  46  -2 -29  -4  45   0   0]]
act called from main loop: [[419 242   0   0   2   0 -26  -6   5   2   0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 5 2 0]]
state that the mo

agent will remember: [[677 248   0   0  29 -20 -23 -14  27 -22   0]]
act called from main loop: [[679 250   0   0   2  -2 -24 -16   1   0   0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  1  0  0]]
Game is finished, 
 your final reward is: 130.24749999999997, duration was 278 timesteps
 0.4625 - 0.95 - [[ 530  272    0    0 -136    2  -39   -6   -4  -10    0]]
not done yet, target : nan
 0.4475 - 0.95 - [[435 178   0   0  -1   0  

agent will remember: [[265 157   0   0  47   2 259  36 -42  -9   0]]
act called from main loop: [[268 156   0   0   3   1 254  38  -2  -1   0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  3 

act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  7 -3  0]]
act called from main loop: [[556  83   0   0  50   5 -97  35  10   2   0]]
agent will remember: [[556  83   0   0  50   5 -97  35  10   2   0]]
act called from main loop: [[560  82   0   0   4   1 -98  39   3  -3   0]]
state that th

act called from main loop: [[137 187 665 196   1   0 528   9  -1   0   0]]
state that the model will use to predict action: [[137 187 665 196   1   0 528   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0

act called from main loop: [[331 209   0   0 -22  -7  61  -5 -45   7   0]]
agent will remember: [[331 209   0   0 -22  -7  61  -5 -45   7   0]]
act called from main loop: [[328 210   0   0  -3  -1  63  -7  -1   1   0]]
state that the model will use to predict action: [[328 210   0   0  -3  -1  63  -7  -1   1   0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -1  1  0]]


agent will remember: [[475 196   0   0  29 -60 -42 -57   4 -11   0]]
act called from main loop: [[472 156   0   0  -3  40 -33 -16   6  -1   0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 40  0  0  6 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 

act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[276 188   0   0  50  -7 246   1 -37  -7   0]]
state that the model will use to predict action: [[276 188   0   0  50  -7 246   1 -37  -7   0]]
agent will remember: [[276 188   0   0  50  -7 246   1 -37  -7   0]]
act called from main loop: [[280 188   0   0   4   0 240   1  -2   0   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  0  0]]


act called from main loop: [[  0   0   0   0   6  -1   0   0 -55 -14   0]]
act called from main loop: [[  0   0   0   0   6  -1   0   0 -55 -14   0]]
act called from main loop: [[  0   0   0   0   6  -1   0   0 -55 -14   0]]
act called from main loop: [[  0   0   0   0   6  -1   0   0 -55 -14   0]]
act called from main loop: [[  0   0   0   0   6  -1   0   0 -55 -14   0]]
act called from main loop: [[  0   0   0   0   6  -1   0   0 -55 -14   0]]
act called from main loop: [[481 237   0   0  -3 -11  -2 -19  -4  47   0]]
agent will remember: [[481 237   0   0  -3 -11  -2 -19  -4  47   0]]
act called from main loop: [[478 239   0   0  -3  -2   0 -20  -1  -1   0]]
act called from main loop: [[ 0  0  0  0 -3 -2  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 -2  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 -2  0  0 -1 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 -2  0  0 -1 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -2  0  0 

act called from main loop: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
act called from main loop: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
act called from main loop: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
act called from main loop: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
act called from main loop: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
act called from main loop: [[  0   0   0   0   1  -2   0   0  13 -10   0]]
act called from main loop: [[ 663  280    0    0  127   -4 -123   -9   31   18    0]]
agent will remember: [[ 663  280    0    0  127   -4 -123   -9   31   18    0]]
act called from main loop: [[ 664  281    0    0    1   -1 -119  -10    5    0    0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  5  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  5  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  5  0  0]]
act calle

act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -9  0]]
state that the model will use to predict action: [[ 0  0  

act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
state that the model will use to predict action: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
state that the model will use to predict action: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called from main loop: [[   0    0    0    0 -110    1    0    0    6    0    0]]
act called

act called from main loop: [[ 0  0  0  0  3 -3  0  0 11 -6  0]]
act called from main loop: [[ 0  0  0  0  3 -3  0  0 11 -6  0]]
act called from main loop: [[ 690  285    0    0   26  -28 -237  -27   36    6    0]]
state that the model will use to predict action: [[ 690  285    0    0   26  -28 -237  -27   36    6    0]]
agent will remember: [[ 690  285    0    0   26  -28 -237  -27   36    6    0]]
act called from main loop: [[ 474  262    0    0 -216   23  -38    3  -17   -7    0]]
act called from main loop: [[   0    0    0    0 -216   23    0    0  -17   -7    0]]
act called from main loop: [[   0    0    0    0 -216   23    0    0  -17   -7    0]]
act called from main loop: [[   0    0    0    0 -216   23    0    0  -17   -7    0]]
act called from main loop: [[   0    0    0    0 -216   23    0    0  -17   -7    0]]
act called from main loop: [[   0    0    0    0 -216   23    0    0  -17   -7    0]]
act called from main loop: [[   0    0    0    0 -216   23    0    0  -17   -7    

act called from main loop: [[227 177   0   0  57   2 328   5 -40   6   0]]
state that the model will use to predict action: [[227 177   0   0  57   2 328   5 -40   6   0]]
agent will remember: [[227 177   0   0  57   2 328   5 -40   6   0]]
act called from main loop: [[227 177   0   0   0   0 329   3   1   2   0]]
state that the model will use to predict action: [[227 177   0   0   0   0 329   3   1   2   0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 2 0]]
act 

act called from main loop: [[304 248   0   0   2 -61 -25 -65 -34   8   0]]
agent will remember: [[304 248   0   0   2 -61 -25 -65 -34   8   0]]
act called from main loop: [[307 244   0   0   3   4 -31 -55  -3  -6   0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  4  0  0 -3 -6  0]]
act called 

act called from main loop: [[344 171   0   0  36 -10 -12 -15  55 -10   0]]
state that the model will use to predict action: [[344 171   0   0  36 -10 -12 -15  55 -10   0]]
agent will remember: [[344 171   0   0  36 -10 -12 -15  55 -10   0]]
act called from main loop: [[349 170   0   0   5   1 -31 -14 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
state that the model will use to predict action: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -14   0   0]]
act called from main loop: [[  0   0   0   0   5   1   0   0 -

agent will remember: [[610 213   0   0  43  -8 -37  -7  33 -13   0]]
act called from main loop: [[614 212   0   0   4   1 -23 -14  18   8   0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 18  8  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 18  8  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 18  8  0]]
Game is finished, 
 your final reward is: 142.43583333333333, duration was 304 timesteps
 0.43416666666666665 - 0.95 - [[446 196   0   0   0   1  12  67  15  -3   0]]
not done yet, target : nan
 0.3225 - 0.95 - [[ 620  213    0    0    4    0 -241   28    1   -8    0]]
not done yet, target : nan
 0.415 - 0.95 - [[352 145   0   0   2   2  63  39  -3  13   0]]
not done yet, target : nan
 0.42083333333333334 - 0.95 - [[324 236   0   0  -3   0 112 -17  -2   1   0]]
not done yet, target : nan
 0.39916666666666667 - 0.95 - [[443 189   0   0  -1  -1  82  39   2   8   0]]
not done yet, target : nan
 0.4533333333333333 - 0.95 - [[388 130   0   0   2   0 -17  73  -4

agent will remember: [[273 172   0   0  49   0 242  14 -35   4   0]]
act called from main loop: [[277 171   0   0   4   1 233  18  -5  -3   0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -5 -3  0]]
act called from main l

act called from main loop: [[512  99   0   0  26  18  -7 151  20   3   0]]
agent will remember: [[512  99   0   0  26  18  -7 151  20   3   0]]
act called from main loop: [[514  98   0   0   2   1  -9 124   0  28   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  0 28  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  0 28  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  0 28  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  0 28  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
s

act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
state that the model will use to predict action: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
state that the model will use to predict action: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
state that the model will use to predict action: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
act called from main loop: [[  0   0   0   0   3 -59   0   0  -5  -7   0]]
state that the model will use to p

act called from main loop: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0 20 -2  0]]
act called from main loop: [[467 264   0   0  40 -10 -34 -15  17 -11   0]]
agent will remember: [[467 264   0   0  40 -10 -34 -15  17 -11   0]]
act called from main loop: [[471 264   0   0   4   0 -33 -14   5  -1   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  5 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  5 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  5 -1  0]]
act called from main loop: [[ 0  

act called from main loop: [[227 181   0   0  59   1 330  -4 -51   1   0]]
agent will remember: [[227 181   0   0  59   1 330  -4 -51   1   0]]
act called from main loop: [[226 181   0   0  -1   0 328  -5  -3   1   0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  0  0  0 -3  1  0]]
act called 

act called from main loop: [[ 0  0  0  0  0  3  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0 -4 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  3  0  0 -4 -1  0]]
act called from main loop: [[ 0  0  0  0  0  3  0  0 -4 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  3  0  0 -4 -1  0]]
act called from main loop: [[ 443  284    0    0   32  -22 -142  -62  -22  -12    0]]
state that the model will use to predict action: [[ 443  284    0    0   32  -22 -142  -62  -22  -12    0]]
agent will remember: [[ 443  284    0    0   32  -22 -142  -62  -22  -12    0]]
act called from main loop: [[ 448  285    0    0    5   -1 -144  -48    3  -15    0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0   3 -15   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0   3 -15   0]]
state that the model will use to predict action: [[  0   0   0

agent will remember: [[498 329   0   0  29 -23 -36 -16  32 -21   0]]
act called from main loop: [[500 330   0   0   2  -1 -31 -16   7  -1   0]]
state that the model will use to predict action: [[500 330   0   0   2  -1 -31 -16   7  -1   0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  7 -1  0]]
Game is finished, 
 your final reward is: 90.08249999999998, duration was 197 timesteps
 0.4625 - 0.95 - [[502  85   0   0   3   1 -17  62   3  -1   0]]
not done yet, target : nan
 0.12833333333333333 - 0.95 - [[166 184   0   0   4  -1 445   1  -2  -3   0]]
not done yet, target : nan
 0.47583333333333333 - 0.95 - [[517 150   0   0   2   0 -16 -13   2  -2   0]]
not done yet, target : nan
 0.2525 - 0.95 - [[ 574  276    0    0    1   -2 -195 -102   -4   -6    0]]
not done yet, target : nan
 0.4525 - 0.95 - [[408 185   0   0   4  -3  53   4 -10  -1   0]]
not done yet, target : nan
 0.4683333333333333 - 0.95 - [[500 130   0   0  -2   1 -24 -14   7   0   0]]
not done yet, target : nan
 0.445

act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2  3  0]]
act called from 

act called from main loop: [[ 0  0  0  0  4  4  0  0 37  8  0]]
act called from main loop: [[ 0  0  0  0  4  4  0  0 37  8  0]]
act called from main loop: [[336 168   0   0  37  46  97  29 -54   7   0]]
state that the model will use to predict action: [[336 168   0   0  37  46  97  29 -54   7   0]]
agent will remember: [[336 168   0   0  37  46  97  29 -54   7   0]]
act called from main loop: [[340 169   0   0   4  -1  91  31  -2  -3   0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -2 -3  0]]
act called from main l

act called from main loop: [[ 0  0  0  0 -2 -3  0  0  7  1  0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0  7  1  0]]
act called from main loop: [[ 512  274    0    0   83  -47 -118  -42  -34  -23    0]]
agent will remember: [[ 512  274    0    0   83  -47 -118  -42  -34  -23    0]]
act called from main loop: [[ 512  274    0    0    0    0 -120  -44   -2    2    0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 -2  2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  2  0]]


act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 652 196   1   0 515   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
s

act called from main loop: [[328 172   0   0 -28   0  65  13 -26   7   0]]
state that the model will use to predict action: [[328 172   0   0 -28   0  65  13 -26   7   0]]
agent will remember: [[328 172   0   0 -28   0  65  13 -26   7   0]]
act called from main loop: [[325 172   0   0  -3   0  80  32  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
state that the model will use to predict action: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  12 -19   0]]
act called from main loop: [[  0   0   0   0  -3   0   0   0  

act called from main loop: [[ 0  0  0  0  4  0  0  0 -2 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -2 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -2 -1  0]]
act called from main loop: [[ 471  174    0    0   25    0 -275  -44  -33  -29    0]]
agent will remember: [[ 471  174    0    0   25    0 -275  -44  -33  -29    0]]
act called from main loop: [[ 474  173    0    0    3    1 -289  -55  -11   12    0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -11  12   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -11  12   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -11  12   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -11  12   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -11  12   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -11  12   0]]
state that the model will use to predict action: [[  0   0   0

act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -23   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0   3 -23   0]]
act called from main loop: [[  0  

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[163 182   0   0  26   5 442   5 -60   9   0]]
state that the model will use to predict action: [[163 182   0   0  26   5 442   5 -60   9   0]]
agent will remember: [[163 182   0   0  26   5 442   5 -60   9   0]]
act called from main loop: [[169 182   0   0   6   0 442   5   6   0   0]]
act called from main loop: [[0 0 0 0 6 0 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 6 0 0 0 6 0 0]]
act called from main l

act called from main loop: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4 17  0]]
act called from main loop: [[387 272   0   0  26 -22 -49 -91 -30  11   0]]
agent will remember: [[387 272   0   0  26 -22 -49 -91 -30  11   0]]
act called from main loop: [[387 274   0   0   0  -2 -51 -93  -2   0   0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  0 -2  0  0 -2  0  0]]
act called from main loop: [[ 0  

agent will remember: [[ 458  404    0    0   86 -134  -99 -129   33  -22    0]]
act called from main loop: [[400 293   0   0 -58 111 -39  -9   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
state that the model will use to predict action: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
state that the model will use to predict action: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
act called from main loop: [[  0   0   0   0 -58 111   0   0   2  -9   0]]
state that the model will use to predict action: [[

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[162 184   0   0  25   3 449  -5 -40  17   0]]
state that the model will use to predict action: [[162 184   0   0  25   3 449  -5 -40  17   0]]
agent will remember: [[162 184   0   0  25   3 449  -5 -40

act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 11 -1  0]]
act called from main loop: [[350 247   0   0 -22 -26  -9 -27

act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  6 -3  0]]
act called from main loop: [[ 0  0  0 

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 195   1   0 514   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main 

act called from main loop: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
act called from main loop: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
state that the model will use to predict action: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
act called from main loop: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
act called from main loop: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
act called from main loop: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
act called from main loop: [[  0   0   0   0   2   2   0   0  -5 -14   0]]
act called from main loop: [[366 128   0   0  16  16   5  84 -54   0   0]]
agent will remember: [[366 128   0   0  16  16   5  84 -54   0   0]]
act called from main loop: [[367 128   0   0   1   0   1  80  -3   4   0]]
state that the model will use to predict action: [[367 128   0   0   1   0   1  80  -3   4   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -3  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -3  4  0]]
act calle

act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 1 1 0]]
act called from main loop: [[ 479   30    0    0   13    0 -153   58   43   22    0]]
agent will remember: [[ 479   30    0    0   13    0 

act called from main loop: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0  -9 -24   0]]
act called from main loop: [[224 172   0   0  54   7 336  15 -48  21   0]]
agent will remember: [[224 172   0   0  54   7 336  15 -48  21   0]]
act called from main loop: [[224 172   0   0   0   0 334  15  -2   0   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  

act called from main loop: [[ 0  0  0  0 -1  1  0  0 42 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  1  0  0 42 -3  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0 42 -3  0]]
act called from main loop: [[502 184   0   0  50  -4 -23  84   5  -3   0]]
agent will remember: [[502 184   0   0  50  -4 -23  84   5  -3   0]]
act called from main loop: [[508 184   0   0   6   0 -72  81 -43   3   0]]
state that the model will use to predict action: [[508 184   0   0   6   0 -72  81 -43   3   0]]
act called from main loop: [[  0   0   0   0   6   0   0   0 -43   3   0]]
act called from main loop: [[  0   0   0   0   6   0   0   0 -43   3   0]]
state that the model will use to predict action: [[  0   0   0   0   6   0   0   0 -43   3   0]]
act called from main loop: [[  0   0   0   0   6   0   0   0 -43   3   0]]
state that the model will use to predict action: [[  0   0   0   0   6   0   0   0 -43   3   0]]
act called from main loop: [[  0   0   0   0   6   

act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 16  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 16

act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -3 -1  0]]
act called from main loop: [[226 189   0   0  61  -3 331  -3 -55   2   0]]
agent will remember: [[226 

act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  0  0]]
act called from main loop: [[302 258   0   0   1 -19 153 -38 -18  -8   0]]
agent will remember: [[302 258   0   0   1 -19 153 -38 -18  -8   0]]
act called from main loop: [[304 253   0   0   2   5 147 -38  -4   5   0]]
act called from main loop: [[ 0  0  0  0  2  5  0  0 -4  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  5  0  0 -4  5  0]]
act called 

act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  8 -6  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  3 -1  0]]
act called from main loop: [[654 195   0   0  40  -5 -14 -14  51  -2   0]]
agent will remember: [[654 195   0   0  40  -5 -14 -14  51  -2   0]]
act called from main loop: [[655 197   0   0   1  -2 -16 -14  -1  -2   0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0 -1 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0 -1 -2  0]]
act called from main loop: [[ 0  0  0  0  1 -2  0  0 -1 -2  0]]
state that the model will use to 

act called from main loop: [[ 0  0  0  0  4 -2  0  0 -3  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -2  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 -3  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -2  0  0 -3  2  0]]
act called from main loop: [[225 186   0   0  60   0 343  14 -31  -7   0]]
agent will remember: [[225 186   0   0  60   0 343  14 -31  -7   0]]
act called from main loop: [[226 186   0   0   1   0 334  10  -8   4   0]]
state that the model will use to predict action: [[226 186   0   0   1   0 334  10  -8   4   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -8  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -8  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -8  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -8  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -8  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -8  4  0]]


act called from main loop: [[ 0  0  0  0  0 -2  0  0  1  0  0]]
act called from main loop: [[ 296  266    0    0    2   -4   34 -110  -45   13    0]]
state that the model will use to predict action: [[ 296  266    0    0    2   -4   34 -110  -45   13    0]]
agent will remember: [[ 296  266    0    0    2   -4   34 -110  -45   13    0]]
act called from main loop: [[301 168   0   0   5  98  36 -13   7   1   0]]
act called from main loop: [[ 0  0  0  0  5 98  0  0  7  1  0]]
act called from main loop: [[ 0  0  0  0  5 98  0  0  7  1  0]]
act called from main loop: [[ 0  0  0  0  5 98  0  0  7  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 98  0  0  7  1  0]]
act called from main loop: [[ 0  0  0  0  5 98  0  0  7  1  0]]
act called from main loop: [[ 0  0  0  0  5 98  0  0  7  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 98  0  0  7  1  0]]
act called from main loop: [[ 0  0  0  0  5 98  0  0  7  1  0]]
act called from main loop: [[ 0

act called from main loop: [[ 0  0  0  0  0  0  0  0 10 -4  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 10 -4  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 10 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 10 -4  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 10 -4  0]]
act called from main loop: [[462 227   0   0 -56  62 -32 -14  27   7   0]]
agent will remember: [[462 227   0   0 -56  62 -32 -14  27   7   0]]
act called from main loop: [[468 228   0   0   6  -1 -33 -14   5  -1   0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  5 -1  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  6 -1  0  0  5 -1  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  5 -1  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  5 -1  0]]
act called from main loop: [[ 0  0  0  0  6 -1  0  0  5 -1  0]]
state that the model will use to 

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[165 178   0   0  28   9 444   8 -42   9   0]]
agent will remember: [[165 178   0   0  28   9 444   8 -42   9   0]]
act called from main loop: [[171 178   0   0   6   0 439   8   1   0   0]]
act called from main loop: [[0 0 0 0 6 0 0 0 1 0 0]]
act called from main loop: [[0 0 0 0 6 0 0 0 1 0 0]]
act called from main loop: [[0 0 

act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  3 -2  0  0 -1  0  0]]
act called from main loop: [[439 153   0   0  23  -5 -90  22 -32  10   0]]
state that the model will use to predict action: [[439 153   0   0  23  -5 -90  22 -32  10   0]]
agent will remember: [[439 153   0  

act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 3 0 0

act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0

act called from main loop: [[406 193   0   0  34 -19  54  17  14 -11   0]]
state that the model will use to predict action: [[406 193   0   0  34 -19  54  17  14 -11   0]]
agent will remember: [[406 193   0   0  34 -19  54  17  14 -11   0]]
act called from main loop: [[410 194   0   0   4  -1  54  12   4   4   0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  4  4  0]]


act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  9 -1  0]]
act called from main loop: [[ 694  168    0    0  201   -1 -195   88   31   10    0]]
state that the model will use to predict action: [[ 694  168    0    0  201   -1 -195   88   31   10  

act called from main loop: [[137 187 652 196   1   0 515   9  -1   0   0]]
state that the model will use to predict action: [[137 187 652 196   1   0 515   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0

act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 0 0 0 0 2 0 0]]
act called from main loop: [[325 216   0   0 -25 -10  51   3 -46 -16   0]]
state that the model will use to predict action: [[325 216   0   0 -25 -10  51   3 -46 -16   0]]
agent will remember: [[325 216   0   0 -25 -10  51   3 -46 -16   0]]
act called from main loop: [[322 218   0   0  -3  -2  58   3   4  -2   0]]
stat

act called from main loop: [[ 451  213    0    0   53   -4 -176   -4   28   21    0]]
agent will remember: [[ 451  213    0    0   53   -4 -176   -4   28   21    0]]
act called from main loop: [[ 456  212    0    0    5    1 -176   -5    5    2    0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
state that the model will use to predict action: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
state that the model will use to predict action: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from main loop: [[0 0 0 0 5 1 0 0 5 2 0]]
act called from

act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 25 -4  0]]
act called from main loop: [[611 184   0   0  50   2 -32  60  42   2   0]]
agent will remember: [[611 184   0   0  50   2 -32  60  42   2   0]]
act called from main loop: [[615 184   0   0   4   0 -39  63  -3  -3   0]]
state that the model will use to predict action: [[615 184   0   0   4   0 -39  63  -3  -3   0]]
act called from main l

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from 

act called from main loop: [[  0   0   0   0   0   1   0   0 -10   4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -10   4   0]]
act called from main loop: [[343 219   0   0 -11  -6  49  -8 -30 -14   0]]
state that the model will use to predict action: [[343 219   0   0 -11  -6  49  -8 -30 -14   0]]
agent will remember: [[343 219   0   0 -11  -6  49  -8 -30 -14   0]]
act called from main loop: [[341 222   0   0  -2  -3  49  -3  -2  -8   0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -3  0  0 -2 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -

act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
state that the model will use to predict action: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
state that the model will use to predict action: [[  0   0   0   0   3  -1   0   0 -18   4   0]]
act called from main loop: [[ 213  133    0    0 -225  144    0  -14  -37   -7    0]]
agent will remember: [[ 213  133    0    0 -225  144    0  -14  -37   -7    0]]
act called from main loop: [[ 469  285  

act called from main loop: [[137 187 651 204   1   0 514  17  -1   0   0]]
state that the model will use to predict action: [[137 187 651 204   1   0 514  17  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  1  0]]
act called from main loop: [[387 128   0   0  33  10 -19  64 -59   5   0]]
state that the model will use to predict action: [[387 128   0   0  33  10 -19  64 -59   5   0]]
agent will remember: [[387 128   0   0  33  10 -19  64 -59   5   0]]
act called from main loop: [[390 128   0   0   3   0 -22  62   0   2   0]]
state that the model will use to predict action: [[390 128   0   0   3   0 -22  62   0   2   0]]
act called from main loop: [[0 0 0 0 3 0 0 0 0 2 0]]
act called from main

act called from main loop: [[   0    0    0    0 -113   17    0    0    2    3    0]]
act called from main loop: [[   0    0    0    0 -113   17    0    0    2    3    0]]
act called from main loop: [[498 127   0   0  40  -3 -25 -13  55   0   0]]
state that the model will use to predict action: [[498 127   0   0  40  -3 -25 -13  55   0   0]]
agent will remember: [[498 127   0   0  40  -3 -25 -13  55   0   0]]
act called from main loop: [[500 129   0   0   2  -2 -27 -14   0  -1   0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -1  0]]
act called from main loop: [[ 0  0  0  0  

act called from main loop: [[ 0  0  0  0  2 -1  0  0  4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  4 -4  0]]
Game is finished, 
 your final reward is: 116.29916666666668, duration was 253 timesteps
 0.30083333333333334 - 0.95 - [[280 188   0   0   4   0 244  -5  -2   1   0]]
not done yet, target : nan
 0.36833333333333335 - 0.95 - [[452 204   0   0   1  -1 -95 -63 -37  10   0]]
not done yet, target : nan
 0.4841666666666667 - 0.95 - [[409 160   0   0   4   0 -29  10   3 -23   0]]
not done yet, target : nan
 0.42 - 0.95 - [[450 200   0   0  -2   1  74  22   9  -3   0]]
not done yet, target : nan
 0.4241666666666667 - 0.95 - [[372 187   0   0   2   1  83   8 -13  44   0]]
not done yet, target : nan
 0.4725 - 0.95 - [[592 217   0   0   4   0 -18 -15   1  11   0]]
not done yet, target : nan
 0.49833333333333335 - 0.95 - [[4

act called from main loop: [[ 0  0  0  0  3  0  0  0 -4  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -4  6  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -4  6  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -4  6  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -4  6  0]]
act called from main loop: [[302 189   0   0  23  -1 165   9 -52   0   0]]
state that the model will use to predict action: [[302 189   0   0  23  -1 165   9 -52   0   0]]
agent will remember: [[302 189   0   0  23  -1 165   9 -52   0   0]]
act called from main loop: [[305 188   0   0   3   1 167   6   5   4   0]]
state that the model will use to predict action: [[305 188   0   0   3   1 167   6   5   4   0]]
act called from main loop: [[0 0 0 0 3 1 0 0 5 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 5 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 5 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0

act called from main loop: [[ 0  0  0  0 -3  0  0  0 10  5  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3  0  0  0 10  5  0]]
act called from main loop: [[ 0  0  0  0 -3  0  0  0 10  5  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3  0  0  0 10  5  0]]
act called from main loop: [[ 0  0  0  0 -3  0  0  0 10  5  0]]
act called from main loop: [[ 0  0  0  0 -3  0  0  0 10  5  0]]
act called from main loop: [[416 228   0   0 -25  -4 140 -21  25  -2   0]]
state that the model will use to predict action: [[416 228   0   0 -25  -4 140 -21  25  -2   0]]
agent will remember: [[416 228   0   0 -25  -4 140 -21  25  -2   0]]
act called from main loop: [[410 227   0   0  -6   1 136 -19 -10  -1   0]]
act called from main loop: [[  0   0   0   0  -6   1   0   0 -10  -1   0]]
act called from main loop: [[  0   0   0   0  -6   1   0   0 -10  -1   0]]
act called from main loop: [[  0   0   0   0  -6   1   0   0 -10  -1   0]]
act called from main loop: [[  

act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 -1 -9  0]]
act called from main loop: [[615 162   0   0  43  10 -40 107   5  -1   0]]
agent will remember: [[615 162   0   0  43  10 -40 107   5  -1   0]]
act called from main loop: [[619 160   0   0   4   2 -42 109   2   0   0]]
act called from main loop: [[0 0 0 0 4 2 0 0 2 0 0]]
act called from main loop: [[0 0 0 0 4 2 0 0

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main 

act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from main loop: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -3  0  0 -6 -1  0]]
act called from 

act called from main loop: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
act called from main loop: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
act called from main loop: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
state that the model will use to predict action: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
act called from main loop: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
act called from main loop: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
act called from main loop: [[  0   0   0   0   0  -2   0   0  71 -18   0]]
act called from main loop: [[435 164   0   0  24  25  73   8 -50  -1   0]]
agent will remember: [[435 164   0   0  24  25  73   8 -50  -1   0]]
act called from main loop: [[436 158   0   0   1   6  72  14   0   0   0]]
act called from main loop: [[0 0 0 0 1 6 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 1 6 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 1 6 0 0 0 0 0]]
act called from main loop: [[0 0 0 0 1 6 0 0 0 0 0]]
state that the model w

act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 21 -1  0]]
act called from 

act called from main loop: [[222 173   0   0  53   7 341  10 -32  -3   0]]
agent will remember: [[222 173   0   0  53   7 341  10 -32  -3   0]]
act called from main loop: [[224 172   0   0   2   1 342   0   3  11   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  3 11  0]]
act called 

act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
state that the model will use to predict action: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
state that the model will use to predict action: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
state that the model will use to predict action: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
state that the model will use to predict action: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called from main loop: [[  0   0   0   0  -1   2   0   0 -14 -11   0]]
act called f

act called from main loop: [[   0    0    0    0 -136    1    0    0   -5    6    0]]
act called from main loop: [[   0    0    0    0 -136    1    0    0   -5    6    0]]
act called from main loop: [[   0    0    0    0 -136    1    0    0   -5    6    0]]
act called from main loop: [[   0    0    0    0 -136    1    0    0   -5    6    0]]
state that the model will use to predict action: [[   0    0    0    0 -136    1    0    0   -5    6    0]]
act called from main loop: [[ 480   76    0    0  180   11 -230   70  -17  -28    0]]
state that the model will use to predict action: [[ 480   76    0    0  180   11 -230   70  -17  -28    0]]
agent will remember: [[ 480   76    0    0  180   11 -230   70  -17  -28    0]]
act called from main loop: [[ 484   74    0    0    4    2 -229   71    5    1    0]]
act called from main loop: [[0 0 0 0 4 2 0 0 5 1 0]]
state that the model will use to predict action: [[0 0 0 0 4 2 0 0 5 1 0]]
act called from main loop: [[0 0 0 0 4 2 0 0 5 1 0]]
act cal

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 196   1   0 514   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1

act called from main loop: [[  0   0   0   0   2   1   0   0   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   0 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   2   1   0   0   0 -10   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   0 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   2   1   0   0   0 -10   0]]
act called from main loop: [[404 190   0   0  35 -13  68   3  25   1   0]]
agent will remember: [[404 190   0   0  35 -13  68   3  25   1   0]]
act called from main loop: [[410 189   0   0   6   1  70   5   8  -1   0]]
act called from main loop: [[ 0  0  0  0  6  1  0  0  8 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  6  1  0  0  8 -1  0]]
act called from main loop: [[ 0  0  0  0  6  1  0  0  8 -1  0]]
act called from main loop: [[ 0  0  0  0  6  1  0  0  8 -1  0]]
act calle

act called from main loop: [[ 0  0  0  0  4  1  0  0  5 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  5 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  5 -2  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  5 -2  0]]
act called from main loop: [[593 102   0   0  69  19 -46 147  58 -15   0]]
agent will remember: [[593 102   0   0  69  19 -46 147  58 -15   0]]
act called from main loop: [[595 103   0   0   2  -1 -26 148  22  -2   0]]
state that the model will use to predict action: [[595 103   0   0   2  -1 -26 148  22  -2   0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 22 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0 22 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 22 -2  0]]
state that the model will use to predict a

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  

act called from main loop: [[ 0  0  0  0  3  0  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  1 -5  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  1 -5  0]]
act called from main loop: [[402 214   0   0  38  -2 -30 -16 -46  -7   0]]
agent will remember: [[402 214   0   0  38  -2 -30 -16 -46  -7   0]]
act called from main loop: [[406 215   0   0   4  -1 -37 -15  -3  -2   0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 -3 -2  0]]
act called from main loop: [[ 0  

act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  0  0]]
act called from main loop: [[535 234   0   0  47   0 -31  17  46   0   0]]
state that the model will use to predict action: [[535 234   0   0  47   0 -31  17  46   0   0]]
agent will remember: [[535 234   0   0  47   0 -31  17  46   0   0]]
act called from main loop: [[540 234   0   0   5   0 -24  14  12   3   0]]


act called from main loop: [[137 187 668 196   1   0 531   9  -1   0   0]]
state that the model will use to predict action: [[137 187 668 196   1   0 531   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the m

act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0  1 -3  0]]
state that the m

act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0  3 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0  3 -5

act called from main loop: [[137 187 668 196   1   0 531   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: 

act called from main loop: [[ 0  0  0  0  3  0  0  0 -6  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -6  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6  1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -6  1  0]]
act called from main loop: [[398 224   0   0  36  -6 -15 -34 -32  -2   0]]
state that the model will use to predict action: [[398 224   0   0  36  -6 -15 -34 -32  -2   0]]
agent will remember: [[398 224   0   0  36  -6 -15 -34 -32  -2   0]]
act called from main loop: [[372 205   0   0 -26  19   8 -14  -3  -1   0]]
state that the model will use to predict action: [[372 205   0   0 -26  19   8 -14  -3  -1   0]]
act called from main loop: [[  0   0   0   0 -26  19   0   0  -3  -1   0]]
act called from main loop: [[  0   0   0   0 -26  19   0   0  -3  -1   0]]
act called from main loop: [[  

agent will remember: [[ 624  211    0    0  228    2 -229   17   23   17    0]]
act called from main loop: [[ 628  212    0    0    4   -1 -213   12   20    4    0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 20  4  0]]


act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 652 196   1   0 515   9  -1   0   0]]
state that the model will use to predict action: [[137 187 652 196   1   0 515   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called

act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
state that the model will use to predict action: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
state that the model will use to predict action: [[  0   0   0   0  -2  -1   0   0 -14  -2   0]]
act called from main loop: [[315 211   0   0 -29 -15 154

act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from main loop: [[ 0  0  0  0  1 -3  0  0  4 -4  0]]
act called from 

act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
state that the model will use to predict action: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[  0   0   0   0  95 -22   0   0   5  -1   0]]
act called from main loop: [[ 547  262    0    0   21   26 -174   -2  -63   -8    0]]
state that the model will use to predict action: [[ 547  262    0    0   21   26 -174   -2  -63   -8    0]]
agent will remember: [[ 547  262    0    0   21   26 -174   -2  -63   -8    0]]
act called from main loop: [[

act called from main loop: [[137 187 668 195   1   0 531   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0

act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 6 0 0]]
act called from main loop: [[409 206   0   0  37 -22  55  -8   4  -3   0]]
state that the model will use to predict action: [[409 206   0   0  37 -22  55  -8   4  -3   0]]
agent will remember: [[409 206   0   0  37 -22  55  -8   4  -3   0]]
act called from main loop: [[411 208   0   0   2  -2  53  -8   0  -2   0]]
act called from main loop: [[ 0  0  0  0  2 -2  0  0  0 -2  0]]
act called from main loop: [[ 0  0  0  0  2 -2

act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 0 0]]
act called from main loop: [[556 165   0   0  33 -11 -19 -18  38  -6   0]]
agent will remember:

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 668 195   1   0 531   8  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0 

act called from main loop: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  4  1  0]]
act called from main loop: [[323 194   0   0 -29  -6  46   8 -53  -2   0]]
state that the model will use to predict action: [[323 194   0   0 -29  -6  46   8 -53  -2   0]]
agent will remember: [[323 194   0   0 -29  -6  46   8 -53  -2   0]]
act called from main loop: [[320 194   0   0  -3   0  55  17   6  -9   0]]
act called from main loop: [[ 0  0  0  0 -3  0  0  0  6 -9  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3  0  0  0  6 -9  0]]
act called from main loop: [[ 0  0  0  0 -3  0  0  0  6 -9  0]]


act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -1 15  0]]
act called from main loop: [[ 0  0  

act called from main loop: [[ 0  0  0  0  8  0  0  0  8 -2  0]]
act called from main loop: [[ 0  0  0  0  8  0  0  0  8 -2  0]]
act called from main loop: [[ 0  0  0  0  8  0  0  0  8 -2  0]]
act called from main loop: [[ 0  0  0  0  8  0  0  0  8 -2  0]]
act called from main loop: [[ 704  139    0    0  175   13 -175   85   34   12    0]]
agent will remember: [[ 704  139    0    0  175   13 -175   85   34   12    0]]
act called from main loop: [[ 708  139    0    0    4    0 -176  100    3  -15    0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -15   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0   3 -15   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -15   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -15   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -15   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0   3 -15   0]]
state that the model

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[161 184   0   0  24   3 458  -5 -32  18   0]]
agent will remember: [[161 184   0   0  24   3 458  -5 -32  18   0]]
act called from main loop: [[166 184   0   0   5   0 443   0 -10  -5   0]]
state that the model will use to predict action: [[166 184   0   0   5   0 443   0 -10  -5   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -10  -5   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -10  -5   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -10  -5   0]]
act called from main loop: [[  

act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0 -5 14  0]]
act called from main loop: [[298 198   0   0 -28  10  21 -10 -63   6   0]]
agent will remember: [[298 198   0   0 -28  10  21 -10 -63   6   0]]
act called from main loop: [[298 198   0   0   0   0  17  -6  -4  -4   0]]
state that the model will use to predict action: [[298 198   0   0   0   0  17  -6  -4  -4   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -4 -4  0]]
act called from main l

act called from main loop: [[  0   0   0   0   2   1   0   0   8 -37   0]]
act called from main loop: [[  0   0   0   0   2   1   0   0   8 -37   0]]
act called from main loop: [[ 488  223    0    0  161    6 -137   43   49   37    0]]
agent will remember: [[ 488  223    0    0  161    6 -137   43   49   37    0]]
act called from main loop: [[ 491  221    0    0    3    2 -134   44    6    1    0]]
act called from main loop: [[0 0 0 0 3 2 0 0 6 1 0]]
act called from main loop: [[0 0 0 0 3 2 0 0 6 1 0]]
state that the model will use to predict action: [[0 0 0 0 3 2 0 0 6 1 0]]
act called from main loop: [[0 0 0 0 3 2 0 0 6 1 0]]
state that the model will use to predict action: [[0 0 0 0 3 2 0 0 6 1 0]]
act called from main loop: [[0 0 0 0 3 2 0 0 6 1 0]]
state that the model will use to predict action: [[0 0 0 0 3 2 0 0 6 1 0]]
act called from main loop: [[0 0 0 0 3 2 0 0 6 1 0]]
state that the model will use to predict action: [[0 0 0 0 3 2 0 0 6 1 0]]
act called from main loop: [[0 0 

act called from main loop: [[ 0  0  0  0  4  0  0  0 15 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 15 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 15 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15 -1  0]]
act called from main loop: [[ 771  182    0    0  203    4 -198   35   15   -7    0]]
agent will remember: [[ 771  182    0    0  203    4 -198   35   15   -7    0]]
act called from main loop: [[ 774  182    0    0    3    0 -183   24   18   11    0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 18 11  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 18 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 18 11  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 18 11  0]]
state that the model will use to predict a

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[161 185   0   0  24   2 443   6

act called from main loop: [[315 205   0   0 -31 -12 147   3  14 -14   0]]
agent will remember: [[315 205   0   0 -31 -12 147   3  14 -14   0]]
act called from main loop: [[312 204   0   0  -3   1 163   4  13   0   0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
act called from main loop: [[ 0  0  0  0 -3  1  0  0 13  0  0]]
state that 

act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0  3 -6  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 5 0]]
act called from main loop: [[ 672   96    0    0   23   17 -183   34   35   21    0]]
agent will remember: [[ 672   96    0    0   23   17 -183   34   35   21    0]]
act called from main loop: [[ 675   95    0    0    3    1 -168   33   18    2    0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 18  2  0]]
act called from main loop: [[ 0  0 

act called from main loop: [[0 0 0 0 5 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 5 0 0 0 3 3 0]]
act called from main loop: [[222 171   0   0  51   8 346  21 -36  -4   0]]
agent will remember: [[222 171   0   0  51   8 346  21 -36  -4   0]]
act called from main loop: [[224 171   0   0   2   0 336  24  -8  -3   0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -8 -3  0]]
act called from

act called from main loop: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
act called from main loop: [[ 0  0  0  0 -1  4  0  0  0 -6  0]]
act called from main loop: [[318 201   0   0  -1 -20  66  52  26 -25   0]]
state that the model will use to predict action: [[318 201   0   0  -1 -20  66  52  26 -25   0]]
agent will remember: [[318 201   0   0  -1 -20  66  52  26 -25   0]]
act called from main loop: [[317 201   0   0  -1   0  69  48   2   4   0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0  2  4  0]]
act called from main loop: [[ 0  0  0  0 -1  0  0  0  2  4  0]]


act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  3 -8  0]]
act called from main loop: [[517 143   0   0 -42   0 -31   6  54  22   0]]
agent will remember: [[517 

act called from main loop: [[ 0  0  0  0  1  0  0  0  0 27  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0  0 27  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0  0 27  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0  0 27  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0  0 27  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0  0 27  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0  0 27  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0  0 27  0]]
act called from main loop: [[  30   30    0    0 -758  154  665  144   42   14    0]]
state that the model will use to predict action: [[  30   30    0    0 -758  154  665  144   42   14    0]]
agent will remember: [[  30   30    0    0 -758  154  665  144   42   14    0]]
act called from main loop: [[ 30  30   0   0   0   0 670 145   5  -1   0]]
state that the model will use to predict action: [[ 30  30   0   0   0   0 670 145   5  -1   0]

act called from main loop: [[222 169   0   0  50   8 340  32 -35  -4   0]]
agent will remember: [[222 169   0   0  50   8 340  32 -35  -4   0]]
act called from main loop: [[224 168   0   0   2   1 332  33  -6   0   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6

act called from main loop: [[ 0  0  0  0  1 -1  0  0 -3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 -3 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 -3 -6  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0 -3 -6  0]]
act called from main loop: [[486 190   0   0  43 -13 -12  32 -19   8   0]]
state that the model will use to predict action: [[486 190   0   0  43 -13 -12  32 -19   8   0]]
agent will remember: [[486 190   0   0  43 -13 -12  32 -19   8   0]]
act called from main loop: [[494 192   0   0   8  -2 -21  30  -1   0   0]]
state that the model will use to predict action: [[494 192   0   0   8  -2 -21  30  -1   0   0]]
act called from main loop: [[ 0  0  0  0  8 -2  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  8 -2  0  0 -1  0  0]]
act calle

act called from main loop: [[ 0  0  0  0  5  1  0  0  6 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  1  0  0  6 -4  0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0  6 -4  0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0  6 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  5  1  0  0  6 -4  0]]
act called from main loop: [[ 0  0  0  0  5  1  0  0  6 -4  0]]
act called from main loop: [[528 219   0   0  39  -6 -18   5  50  -6   0]]
agent will remember: [[528 219   0   0  39  -6 -18   5  50  -6   0]]
act called from main loop: [[532 218   0   0   4   1 -24   3  -2   3   0]]
state that the model will use to predict action: [[532 218   0   0   4   1 -24   3  -2   3   0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0 -2  3  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0 -2  3  0]]
state that the model will use to predict a

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 667 200   1   0 530  13  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1

act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 2 7 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 2 7 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0

act called from main loop: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
state that the model will use to predict action: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
state that the model will use to predict action: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
act called from main loop: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
state that the model will use to predict action: [[  0   0   0   0   1   1   0   0  -4 -12   0]]
act called from main loop: [[ 624  158    0    0   27   16 -254   45  -19   26    0]]
state that the model will use to predict action: [[ 624  158    0    0   27   16 -254   45  -19   26    0]]
agent will remember: [[ 624  158    0    0   27   16 -254   45  -

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 667 196   1   0 530   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called

act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
state that the model will use to predict action: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
state that the model will use to predict action: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
state that the model will use to predict action: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
state that the model will use to predict action: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[0 0 0 0 2 1 0 0 0 1 0]]
act called from main loop: [[406 190   0   0  34 -17  78  -1  42

act called from main loop: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
state that the model will use to predict action: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
act called from main loop: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
state that the model will use to predict action: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
act called from main loop: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
state that the model will use to predict action: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
act called from main loop: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
state that the model will use to predict action: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
act called from main loop: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
act called from main loop: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
state that the model will use to predict action: [[  0   0   0   0   4 -48   0   0   7  -3   0]]
act called from main loop: [[446 243   0   0  -6 -26  71 -52  16 

act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
state that the model will use to predict action: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
state that the model will use to predict action: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
state that the model will use to predict action: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4   -1    0]]
act called from main loop: [[   0    0    0    0 -150    0    0    0    4 

act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0 -1 -3  0]]
Game is finished, 
 your final reward is: 146.07750000000004, duration was 311 timesteps
 0.2916666666666667 - 0.95 - [[280 176   0   0   4   0 232  18 -10   1   0]]
not done yet, target : nan
 0.49666666666666665 - 0.95 - [[492 276   0   0   4   0 -39  35 -10   1   0]]
not done yet, target : 

act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -9 -5  0]]
act called from main loop: [[275 185   0   0  48  -9 244  11 -32   4   0]]
agent will remember: [[275 185   0   0  48  -9 244  11 -32   4   0]]
act called from main loop: [[278 185   0   0   3   0 234  10  -7   1   0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -7  1  0]]
act called from main loop: [[ 0  

agent will remember: [[ 474  249    0    0   40  -15 -163  -30  -22  -24    0]]
act called from main loop: [[ 479  250    0    0    5   -1 -170   -8   -2  -23    0]]
state that the model will use to predict action: [[ 479  250    0    0    5   -1 -170   -8   -2  -23    0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
state that the model will use to predict action: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[  0   0   0   0   5  -1   0   0  -2 -23   0]]
act called from main loop: [[

act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
state that the model will use to predict action: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
act called from main loop: [[  0   0   0   0 221   0   0   0   4  -1   0]]
state that the model will use to predict action: [[  0   0   0   0 221   0   0   0   4  -1   0]]
Game is finished, 
 your final reward is: 93.59416666666665, duration was 205 timesteps
 0.4825 - 0.95 - [[532 218   0   0   4   1 -24   3  -2   3   0]]
not done yet, target : nan
 0.3925 - 0.95 - [[ 664  2

act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -5  5  0]]
act called from main loop: [[278 178   0   0  53  -4 243  -5 -33   1   0]]
agent will remember: [[278 178   0   0  53  -4 243  -5 -33   1   0]]
act called from main loop: [[281 178   0   0   3   0 237  -1  -3  -4   0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3 -4  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3 -4  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3

act called from main loop: [[ 0  0  0  0  0  1  0  0 45 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 45 -4  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 45 -4  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 45 -4  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0 45 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0 45 -4  0]]
act called from main loop: [[505 187   0   0  53   0 -43  28 -52  23   0]]
state that the model will use to predict action: [[505 187   0   0  53   0 -43  28 -52  23   0]]
agent will remember: [[505 187   0   0  53   0 -43  28 -52  23   0]]
act called from main loop: [[510 187   0   0   5   0 -18  24  30   4   0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 30  4  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 30  4  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 30  4  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 30  4  0]]


act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
state that the model will use to predict action: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
state that the model will use to predict action: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0   0  15   0   0]]
act called from main loop: [[  0   0   0   0 -72   1   0

act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 17 -1  0]]
act called from 

act called from main loop: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
act called from main loop: [[ 0  0  0  0  5 -1  0  0 -1 -3  0]]
act called from main loop: [[444 220   0   0  31  -8 -83   5 -11 -24   0]]
agent will remember: [[444 220   0   0  31  -8 -83   5 -11 -24   0]]
act called from main loop: [[446 221   0   0   2  -1 -83   5   2  -1   0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  2 -1  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0  2 -1  0]]
state that 

act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  0  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  6  0  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  0  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  0  0]]
act called from main loop: [[ 682  251    0    0   59    0 -102  -53   52   15    0]]
agent will remember: [[ 682  251    0    0   59    0 -102  -53   52   15    0]]
act called from main loop: [[684 251   0   0   2   0 -98 -53   6   0   0]]
state that the model will use to predict action: [[684 251   0   0   2   0 -98 -53   6   0   0]]
act called from main loop: [[0 0 0 0 2 0 0 0 6 0 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 6 0 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 6 0 0]]
act called from main loop: [[0 0 

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[159 188   0   0  22  -1 445   3 -58   5   0]]
agent will remember: [[159 188   0   0  22  -1 445   3 -58   5   0]]
act called from main loop: [[163 190   0   0   4  -2 453   2  12  -1   0]]
state that the model will use to predict action: [[163 190   0   0   4  -2 453   2  12  -1   0]]
act called from main loop: [[ 0  0  0  0  4 -2  0  0 12 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  

act called from main loop: [[347 242   0   0  -4  -7  64  -9 -12 -16   0]]
state that the model will use to predict action: [[347 242   0   0  -4  -7  64  -9 -12 -16   0]]
agent will remember: [[347 242   0   0  -4  -7  64  -9 -12 -16   0]]
act called from main loop: [[345 243   0   0  -2  -1  53 -10 -13   0   0]]
state that the model will use to predict action: [[345 243   0   0  -2  -1  53 -10 -13   0   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
state that the model will use to predict action: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
state that the model will use to predict action: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
act called from main loop: [[  0   0   0   0  -2  -1   0   0 -13   0   0]]
act called from ma

agent will remember: [[392 274   0   0  46  19 -30  83 -16 -46   0]]
act called from main loop: [[397 276   0   0   5  -2 -17  78  18   3   0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main loop: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  5 -2  0  0 18  3  0]]
act called from main l

act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0 23 -7  0]]
act called from main loop: [[ 0  0  0 

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 651 204   1   0 514  17  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -

act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -4  2  0]]
act called from main loop: [[334 163   0   0 -18   3  44  38 -43 -10   0]]
agent will remember: [[334 163   0   0 -18   3  44  38 -43 -10   0]]
act called from main loop: [[329 163   0   0  -5   0  42  46  -7  -8   0]]
act called 

act called from main loop: [[0 0 0 0 4 1 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 1 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 1 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 1 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 1 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 1 0]]
act called from main loop: [[ 482  150    0    0   20   -6 -162   19   52    8    0]]
agent will remember: [[ 482  150    0    0   20   -6 -162   19   52    8    0]]
act called from main loop: [[ 484  150    0    0    2    0 -163   17    1    2    0]]
state that the model will use to predict action: [[ 484  150    0    0    2    0 -163   17    1    2    0]]
act called from main loop: [[0 0 0 0 2 0 0 0 1 2 0]]
act called from main loop: [[0 0 0 0 2 0 0 0

act called from main loop: [[ 0  0  0  0  3 -1  0  0  0 -4  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  0 -4  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  0 -4  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  0 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  0 -4  0]]
act called from main loop: [[530 187   0   0  41   0 -44  33  37 -12   0]]
agent will remember: [[530 187   0   0  41   0 -44  33  37 -12   0]]
act called from main loop: [[534 188   0   0   4  -1 -21  29  27   3   0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 27  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 27  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 27  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 27  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0 27  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0 27  3  0]]
act called 

act called from main loop: [[137 187 665 196   1   0 528   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act c

act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -3  0  0]]
act called from 

act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -5  2  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -5  2

act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[ 0  0  0  0  4  1  0  0  9 -1  0]]
act called from main loop: [[  30   30    0    0 -646  250  670  229   48    5    0]]
state that the model will use to predict action: [[  30   30    0    0 -646  250  670  229   48    5  

act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0 -2  6  0]]
act called from main loop: [[ 0  0  0 

act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -38   7   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -38   7   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -38   7   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -38   7   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -38   7   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -38 

act called from main loop: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -7 -5  0]]
act called from main loop: [[ 420  173    0    0 -266   32  -22   18   65  -12    0]]
agent will remember: [[ 420  173    0    0 -266   32  -22   18   65  -12    0]]
act called from main loop: [[421 171   0   0   1   2 -29 -15  -6  35   0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0 -6 35  0]]
act called from main loop: [[ 0  0  0  0  1  2  0  0 

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[167 179   0   0  30   8 437  14 -63   3   0]]
state that the model will use to predict action: [[167 179   0   0  30   8 437  14 -63   3   0]]
agent will remember: [[167 179   0   0  30   8 437  14 -63   3   0]]
act called from main loop: [[171 179   0   0   4   0 430  15  -3  -1   0]]
state that the model will use to predict action: [[171 179   0   0   4   0 430  15  -3  -1   0]]
act called from main loop: [[ 0

act called from main loop: [[ 0  0  0  0 -3 -1  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0  1 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0 -3 -1  0  0  1 -1  0]]
act called from main loop: [[ 0  0  0  0 -3 -1  0  0  1 -1  0]]
act called from main loop: [[321 196   0   0 -29 -16 129   2  -6  -7   0]]
state that the model will use to predict action: [[321 196   0   0 -29 -16 129   2  -6  -7   0]]
agent will remember: [[321 196   0   0 -29 -16 129   2  -6  -7   0]]
act called from main loop: [[318 194   0   0  -3   2 138   6   6  -2   0]]
state that the model will use to predict action: [[318 194   0   0  -3   2 138   6   6  -2   0]]
act called from main loop: [[ 0  0  0  0 -3  2  0  0  6 -2  0]]
act called from main loop: [[ 0  0  0  0 -3  2  0  0  6 -2  0]]
act called from main loop: [[ 0  0  0  0 -3  2  0  0  6 -2  0]]
state that the model will use to predict action: [[ 0

act called from main loop: [[ 0  0  0  0  4  0  0  0  2 -4  0]]
act called from main loop: [[432 163   0   0  38  12  51  51  22 -24   0]]
agent will remember: [[432 163   0   0  38  12  51  51  22 -24   0]]
act called from main loop: [[434 161   0   0   2   2  51  59   2  -6   0]]
state that the model will use to predict action: [[434 161   0   0   2   2  51  59   2  -6   0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0  2 -6  0]]
state that the model will use to predict a

act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  1  0  0 19  4  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 19  4

act called from main loop: [[  0   0   0   0   5   0   0   0 -15   0   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -15   0   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -15   0   0]]
state that the model will use to predict action: [[  0   0   0   0   5   0   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -15   0   0]]
act called from main loop: [[  0   0   0   0   5   0   0   0 -15   0   0]]
act called from main loop: [[223 175   0   0  53   6 345  14 -30   2   0]]
agent will remember: [[223 175   0   0  53   6 345  14 -30   2   0]]
act called from main loop: [[224 174   0   0   1   1 341  15  -3   0   0]]
state that the model will use to predict

act called from main loop: [[  0   0   0   0  -1  -7   0   0 -10 -23   0]]
act called from main loop: [[  0   0   0   0  -1  -7   0   0 -10 -23   0]]
state that the model will use to predict action: [[  0   0   0   0  -1  -7   0   0 -10 -23   0]]
act called from main loop: [[422 235   0   0  24 -10  22  38  -6 -15   0]]
agent will remember: [[422 235   0   0  24 -10  22  38  -6 -15   0]]
act called from main loop: [[422 236   0   0   0  -1  26  39   4  -2   0]]
state that the model will use to predict action: [[422 236   0   0   0  -1  26  39   4  -2   0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  4 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  4 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  4 -2  0]]
act calle

act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
act called from main loop: [[ 0  0  0  0 -2 -1  0  0  1 -8  0]]
state that the m

agent will remember: [[256 488   0   0 -16 -33  56 -70 -24   6   0]]
act called from main loop: [[256 489   0   0   0  -1  55 -71  -1   0   0]]
state that the model will use to predict action: [[256 489   0   0   0  -1  55 -71  -1   0   0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0 -1  0  0]]
Game is finished, 
 your final reward is: 121.3741666666667, duration was 260 timesteps
 0.2825 - 0.95 - [[271 162   0   0   2   1 242  19 -14   5   0]]
not done yet, target : nan
 0.26 - 0.95 - [[ 632  190    0    0    5   -2 -304   16   -2    1    0]]
not done yet, target : nan
 0.2991666666666667 - 0.95 - [[272 203   0   0   2  -1 249  -8  -4   1   0]]
not done yet, target : nan
 0.4575 - 0.95 - [[546 172   0  

act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  2 -1  0  0 -1  0  0]]
act called from main loop: [[270 204   0   0  44 -12 247 -10 -39   4   0]]
state that the model will use to predict action: [[270 204   0   0  44 -12 247 -10 -39   4   0]]
agent will remember: [[270 204   0   0  44 -12 247 -10 -39   4   0]]
act called from main loop: [[273 205   0   0   3  -1 241 -13  -3   2   0]]
state that the model will use to predict action: [[273 205   0   0   3  -1 241 -13  -3

act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
state that the model will use to predict action: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
state that the model will use to predict action: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
state that the model will use to predict action: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
state that the model will use to predict action: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called from main loop: [[  0   0   0   0 104 -48   0   0   9   0   0]]
act called f

act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[0 0 0 0 4 1 0 0 3 0 0]]
state that the model will use to predict action: [[0 0 0 0 4 1 0 0 3 0 0]]
act called from main loop: [[486 270   0   0  44  -5 -25 -12  46  -5   0]]
state that the model

agent will remember: [[222 172   0   0  51   6 336  21 -52   0   0]]
act called from main loop: [[224 171   0   0   2   1 331  21  -3   1   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -3  1  0]]
act called from main loop: [[ 0  0  0  0  2 

act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0  6 49  0]]
state that the model will use to predict action: [[ 0  0  

act called from main loop: [[ 677  188    0    0   60   -7 -181 -108   54   12    0]]
agent will remember: [[ 677  188    0    0   60   -7 -181 -108   54   12    0]]
act called from main loop: [[ 526   93    0    0 -151   95  -36  -12   -6   -1    0]]
state that the model will use to predict action: [[ 526   93    0    0 -151   95  -36  -12   -6   -1    0]]
act called from main loop: [[   0    0    0    0 -151   95    0    0   -6   -1    0]]
act called from main loop: [[   0    0    0    0 -151   95    0    0   -6   -1    0]]
state that the model will use to predict action: [[   0    0    0    0 -151   95    0    0   -6   -1    0]]
act called from main loop: [[   0    0    0    0 -151   95    0    0   -6   -1    0]]
act called from main loop: [[   0    0    0    0 -151   95    0    0   -6   -1    0]]
state that the model will use to predict action: [[   0    0    0    0 -151   95    0    0   -6   -1    0]]
act called from main loop: [[   0    0    0    0 -151   95    0    0   -6   -1  

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 653 196   1   0 516   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main 

act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0 0 0 4 0]]
act called from main loop: [[0 0 0 0 3 1 0 0 0 4 0]]
state that the model will use to predict action: [[0 0 0 0 3 1 0

act called from main loop: [[ 604  200    0    0   56   -5 -180  -61   -5   23    0]]
agent will remember: [[ 604  200    0    0   56   -5 -180  -61   -5   23    0]]
act called from main loop: [[ 606  200    0    0    2    0 -179  -64    3    3    0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 3 0]]
act called from main loop: [[0 0 0 0 2 0 0 0 3 3 0]]
state that the model will use to predict action: [[0 0 0 0 2 0 0 0 3 3 0]]
ac

act called from main loop: [[626 204   0   0  34 -18 -18 -17  54 -17   0]]
agent will remember: [[626 204   0   0  34 -18 -18 -17  54 -17   0]]
act called from main loop: [[628 205   0   0   2  -1 -39 -17 -19  -1   0]]
Game is finished, 
 your final reward is: 104.12, duration was 226 timesteps
 0.4866666666666667 - 0.95 - [[ 398   57    0    0    1    1 -107   91   -5   -9    0]]
not done yet, target : nan
 0.4791666666666667 - 0.95 - [[595 222   0   0   2  -3 -21  -4   1  -2   0]]
not done yet, target : nan
 0.4891666666666667 - 0.95 - [[583 240   0   0   3   0 -27  14  -8   0   0]]
not done yet, target : nan
 0.2966666666666667 - 0.95 - [[279 186   0   0   2   0 238   6  -4  -4   0]]
not done yet, target : nan
 0.4658333333333333 - 0.95 - [[656 190   0   0   4   0 -27 -14   6   1   0]]
not done yet, target : nan
 0.13333333333333333 - 0.95 - [[168 182   0   0   4   0 438   2  -6   0   0]]
not done yet, target : nan
 0.4666666666666667 - 0.95 - [[296 272   0   0   0   2  71 -31  -4  

act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -5 -8  0]]
act called from main loop: [[268 164   0   0  46   4 257  40 -34   3   0]]
agent will remember: [[268 164   0   0  46   4 257  40 -34   3   0]]
act called from main loop: [[270 162   0   0   2   2 253  43  -2  -1   0]]
act called from main loop: [[ 0  0  0  0  2  2  0  0 

act called from main loop: [[  0   0   0   0   2   0   0   0 -58   8   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0 -58   8   0]]
state that the model will use to predict action: [[  0   0   0   0   2   0   0   0 -58   8   0]]
act called from main loop: [[482 116   0   0  44   8 -96  45  38  11   0]]
agent will remember: [[482 116   0   0  44   8 -96  45  38  11   0]]
act called from main loop: [[486 116   0   0   4   0 -85  44  15   1   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 15  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 15  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 15  1  0]]
act called from main loop: [[ 0  0  0  0  

act called from main loop: [[ 0  0  0  0  3  0  0  0  0 19  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0  0 19  0]]
Game is finished, 
 your final reward is: 91.99833333333335, duration was 201 timesteps
 0.4625 - 0.95 - [[496 211   0   0   5  -2 -19  64   0   0   0]]
not done yet, target : nan
 0.11583333333333333 - 0.95 - [[166 185   0   0   5  -1 448  13  14 -10   0]]
not done yet, target : nan
 0.4525 - 0.95 - [[325  57   0   0   3   0 -35  92   4   0   0]]
not done yet, target : nan
 0.42916666666666664 - 0.95 - [[507 186   0   0   2   2 -55 140  -1 -13   0]]
not done yet, target : nan
 0.435 - 0.95 - [[299 140   0   0  -1   2  70   8 -14 -11   0]]
not done yet, target : nan
 0.42916666666666664 - 0.95 - [[359 171   0   0   0   0  57  28  -6   2   0]]
not done yet, target : nan
 0.4875 - 0.95 - [[514 108   0   0   2   0 -59  44   0 -10   0]]
not done yet, target : nan
 0.3825 - 0.95 - [[ 767  216    0    0    3    0 -158   17   10   -5    0]]
not done yet, target : nan

act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  1  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -6  1  0]]
act called from main loop: [[273 171   0   0  49   0 248  30 -41  -7   0]]
agent will remember: [[273 171   0   0  49   0 248  30 -41  -7   0]]
act called from main loop: [[276 170   0   0   3   1 242  29  -3   2   0]]
state that the model will use to predict action: [[276 170   0   0   3   1 242  29  -3   2   0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
act called from main loop: [[ 0  0  0  0  3  1  0  0 -3  2  0]]
state that the model will use to predict act

act called from main loop: [[  0   0   0   0   4   0   0   0 -10  -1   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -10  -1   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -10  -1   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0 -10  -1   0]]
act called from main loop: [[  0   0   0   0   4   0   0   0 -10  -1   0]]
state that the model will use to predict action: [[  0   0   0   0   4   0   0   0 -10  -1   0]]
act called from main loop: [[537 141   0   0  86  13 -64 106  45  19   0]]
agent will remember: [[537 141   0   0  86  13 -64 106  45  19   0]]
act called from main loop: [[542 141   0   0   5   0 -74  99  -5   7   0]]
state that the model will use to predict action: [[542 141   0   0   5   0 -74  99  -5   7   0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -5  7  0]]
act called from main loop: [[ 0  0  0  0  5  0  0  0 -5  7  0]]
state that the model will use to predict action: [[ 0  0  0  0

act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0  11 -10   0]]
act called from main loop: [[  0  

act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4 -1  0  0  6  3  0]]
act called from main loop: [[224 180   0   0  57   3 333   1 -61   0   0]]
agent will remember: [[224 180   0   0  57   3 333   1 -61   0   0]]
act called from main loop: [[226 180   0   0   2   0 330  -4  -1   5   0]]
state that the model will use to predict action: [[22

act called from main loop: [[  0   0   0   0 -25   9   0   0  -3 -10   0]]
act called from main loop: [[  0   0   0   0 -25   9   0   0  -3 -10   0]]
act called from main loop: [[  0   0   0   0 -25   9   0   0  -3 -10   0]]
state that the model will use to predict action: [[  0   0   0   0 -25   9   0   0  -3 -10   0]]
act called from main loop: [[  0   0   0   0 -25   9   0   0  -3 -10   0]]
state that the model will use to predict action: [[  0   0   0   0 -25   9   0   0  -3 -10   0]]
act called from main loop: [[ 450  204    0    0   68  -15 -106    2  -29  -25    0]]
state that the model will use to predict action: [[ 450  204    0    0   68  -15 -106    2  -29  -25    0]]
agent will remember: [[ 450  204    0    0   68  -15 -106    2  -29  -25    0]]
act called from main loop: [[ 451  205    0    0    1   -1 -107   -4    0    5    0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  5  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  0  5  0]]
state that the mod

act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[ 0  0  0  0  1 -1  0  0  6  3  0]]
act called from main loop: [[578 224   0   0  37 -12 -36 -17  23 -14   0]]
state that the model will use to predict action: [[578 224   0   0  37 -12 -36 -17  23 -14   0]]
agent will remember: [[578 224   0   0  37 -12 -36 -17  23

act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[161 185   0   0  24   2 447   4 -43   6   0]]
agent will remember: [[161 185   0   0  24   2 447   4 -43   6   0]]
act called from main loop: [[166 185   0   0   5   0 442   4   0   0   0]]
state that the model will use to predict action: [[166 185   0   0   5   0 442   4   0   0   0]]
act called from main loop: [[0 0 0 0 5 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 5 0 0 0 0 0 0]]


act called from main loop: [[ 0  0  0  0 -1  1  0  0 -8 -6  0]]
act called from main loop: [[ 0  0  0  0 -1  1  0  0 -8 -6  0]]
act called from main loop: [[334 237   0   0 -17 -24 110  -8   7  -5   0]]
agent will remember: [[334 237   0   0 -17 -24 110  -8   7  -5   0]]
act called from main loop: [[332 237   0   0  -2   0 120  -8   8   0   0]]
state that the model will use to predict action: [[332 237   0   0  -2   0 120  -8   8   0   0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
act called from main loop: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 -2  0  0  0  8  0  0]]
act called from main loop: [[ 0  0  0  0 -

act called from main loop: [[313 256   0   0 -34 -14 154  11  35 -30   0]]
agent will remember: [[313 256   0   0 -34 -14 154  11  35 -30   0]]
act called from main loop: [[312 259   0   0  -1  -3 159  10   4  -2   0]]
state that the model will use to predict action: [[312 259   0   0  -1  -3 159  10   4  -2   0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
act called from main loop: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0 -1 -3  0  0  4 -2  0]]


act called from main loop: [[300 212   0   0 -21  10  44  -2 -44   7   0]]
agent will remember: [[300 212   0   0 -21  10  44  -2 -44   7   0]]
act called from main loop: [[300 211   0   0   0   1  50 -12   6  11   0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called from main loop: [[ 0  0  0  0  0  1  0  0  6 11  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  1  0  0  6 11  0]]
act called 

act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 -2  0]]
state that the m

act called from main loop: [[137 187 667 196   1   0 530   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will u

agent will remember: [[321 117   0   0  36  16  98  75 -62   8   0]]
act called from main loop: [[324 115   0   0   3   2  93  79  -2  -2   0]]
state that the model will use to predict action: [[324 115   0   0   3   2  93  79  -2  -2   0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
act called from main loop: [[ 0  0  0  0  3  2  0  0 -2 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  2  0  0 

act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  1 13  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  1 13  0]]
act called from main loop: [[ 460   30    0    0   22    0 -129   75   35   19    0]]
state that the model will use to predict action: [[ 460   

act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0  1 -4  0]]
act called from 

act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
state that the model will use to predict action: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
act called from main loop: [[ 0  0  0  0  3 -1  0  0  3  5  0]]
state that the model will use to predict action: [[ 0  0  

act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
state that the model will use to predict action: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
state that the model will use to predict action: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
state that the model will use to predict action: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0   0   0   0   2   0   0   0   2 -18   0]]
act called from main loop: [[  0  

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 667 196   1   0 530   9  -1   0   0]]
state that the model will use to predict action: [[137 187 667 196   1   0 530   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0 

act called from main loop: [[  0   0   0   0   3   1   0   0 -13   0   0]]
act called from main loop: [[  0   0   0   0   3   1   0   0 -13   0   0]]
state that the model will use to predict action: [[  0   0   0   0   3   1   0   0 -13   0   0]]
act called from main loop: [[366 159   0   0  52   5  61  55 -31  -3   0]]
agent will remember: [[366 159   0   0  52   5  61  55 -31  -3   0]]
act called from main loop: [[368 158   0   0   2   1  54  58  -5  -2   0]]
state that the model will use to predict action: [[368 158   0   0   2   1  54  58  -5  -2   0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -5 -2  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -5 -2  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  

act called from main loop: [[ 424  157    0    0 -118   -9  -15  -11   49    1    0]]
agent will remember: [[ 424  157    0    0 -118   -9  -15  -11   49    1    0]]
act called from main loop: [[427 157   0   0   3   0 -23 -10  -5  -1   0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
act called from main loop: [[ 0  0  0  0  3  0  0  0 -5 -1  0]]
state that the model will use to predict action: [[ 0  0  0  0  3  0  0  0 

act called from main loop: [[137 187 652 196   1   0 515   9  -1   0   0]]
state that the model will use to predict action: [[137 187 652 196   1   0 515   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0

act called from main loop: [[346 158   0   0  38   3  75  20 -29  -1   0]]
agent will remember: [[346 158   0   0  38   3  75  20 -29  -1   0]]
act called from main loop: [[346 158   0   0   0   0  74  17  -1   3   0]]
state that the model will use to predict action: [[346 158   0   0   0   0  74  17  -1   3   0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0 -1  3  0]]


act called from main loop: [[  0   0   0   0 -31   0   0   0 -12   0   0]]
act called from main loop: [[ 387  126    0    0   77   12 -124  124  -42   -4    0]]
agent will remember: [[ 387  126    0    0   77   12 -124  124  -42   -4    0]]
act called from main loop: [[ 388  125    0    0    1    1 -123  128    2   -3    0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
act called from main loop: [[ 0  0  0  0  1  1  0  0  2 -3  0]]
state that the model will use to predict action: [[ 0

act called from main loop: [[ 610  110    0    0   42    4 -281   46   37   12    0]]
state that the model will use to predict action: [[ 610  110    0    0   42    4 -281   46   37   12    0]]
agent will remember: [[ 610  110    0    0   42    4 -281   46   37   12    0]]
act called from main loop: [[ 611  110    0    0    1    0 -264   42   18    4    0]]
state that the model will use to predict action: [[ 611  110    0    0    1    0 -264   42   18    4    0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 18  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 18  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 18  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 18  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 18  4  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 18  4  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 18  4  0]]
state that the mod

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
state that the model will use to predict action: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 652 196   1   0 515   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called

act called from main loop: [[369 192   0   0  64  -6  60  -6 -46   6   0]]
agent will remember: [[369 192   0   0  64  -6  60  -6 -46   6   0]]
act called from main loop: [[373 192   0   0   4   0  55  -7  -1   1   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 -1  1  0]]
state that the model will use to 

act called from main loop: [[ 599  217    0    0   51  -11 -227  -39  -54  -20    0]]
agent will remember: [[ 599  217    0    0   51  -11 -227  -39  -54  -20    0]]
act called from main loop: [[ 600  217    0    0    1    0 -228  -28    0  -11    0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
state that the model will use to predict action: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
state that the model will use to predict action: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[  0   0   0   0   1   0   0   0   0 -11   0]]
act called from main loop: [[

agent will remember: [[587 250   0   0  45 -17 -43 -13  35  -3   0]]
act called from main loop: [[591 250   0   0   4   0 -27 -16  20   3   0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
state that the model will use to predict action: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main loop: [[ 0  0  0  0  4  0  0  0 20  3  0]]
act called from main l

act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
state that the model will use to predict action: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
state that the model will use to predict action: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
state that the model will use to predict action: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0   0   0   0   0   1   0   0 -12  -4   0]]
act called from main loop: [[  0  

act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
state that the model will use to predict action: [[ 0  0  0  0  0  0  0  0  2 -5  0]]
act called from main loop: [[494 185   0   0  46   7 -19  62 -27  19   0]]
state that the model will use to predict action: [[494 185   0   0  46   7 -19  62 -27  19   0]]
agent will remember: [[494 185   0   0  46   7 -19  62 -27  19   0]]
act called from main loop: [[497 184   0   0   3   1   2  64  24  -1   0]]


act called from main loop: [[ 715  150    0    0   53   -9 -202  105   32   22    0]]
agent will remember: [[ 715  150    0    0   53   -9 -202  105   32   22    0]]
act called from main loop: [[ 717  150    0    0    2    0 -201  112    3   -7    0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
act called from main loop: [[ 0  0  0  0  2  0  0  0  3 -7  0]]
act called from main

act called from init: [[0 0 0 0 0 0 0 0 0 0 0]]
act called from main loop: [[137 187 667 196   1   0 530   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1

act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from main loop: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  2  1  0  0 -2  0  0]]
act called from 

act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
state that the model will use to predict action: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8  0]]
act called from main loop: [[ 0  0  0  0  0 -1  0  0  0 -8

act called from main loop: [[137 187 653 196   1   0 516   9  -1   0   0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will use to predict action: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
act called from main loop: [[ 0  0  0  0  1  0  0  0 -1  0  0]]
state that the model will u