In [None]:
from settings import s, e
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
import pickle

from IPython.display import HTML, clear_output, display, update_display
import matplotlib.pyplot as plt


from simple import Game

from agent_code.tensor_agent.agent import TensorAgent
from agent_code.tensor_agent.hyperparameters import hp
from agent_code.tensor_agent.X import RelativeX2 as game_state_X
from agent_code.tensor_agent.model import FullModel

clear_output()

In [None]:
hp.buffer_size = 100
hp.target_network_period = 10
hp.epsilon = 0.
hp.learning_rate=0.001
hp.discount_factor=0.99
hp.peaceful = False

crate_density = 0.5
aux_reward_crates = 0.2

hurry_up = 1 * (1 - hp.discount_factor)

In [None]:
choices = ['RIGHT', 'LEFT', 'UP', 'DOWN', 'BOMB', 'WAIT']
action_y_map = {choices[i]: i for i in range(len(choices))}
D = len(choices)

K.clear_session()
model = FullModel(game_state_X.shape, D)

tensor_agent = TensorAgent(game_state_X.shape, D, weights=None, model=model) # alt: weights='tensor_agent-model.h5'

agents = {
    'tensor_agent': tensor_agent,
    'tensor_agent-copy1': tensor_agent.clone(),
    'tensor_agent-copy2': tensor_agent.clone(),
    'tensor_agent-copy3': tensor_agent.clone()
}
train = {a: False for a in agents}
clear_output()

In [None]:
#model.target.summary()

In [None]:

def get_valid_actions(x, y, b, game):
    # choices = ['RIGHT', 'LEFT', 'UP', 'DOWN', 'BOMB', 'WAIT']
    valid = np.ones((6))
    if not game.tile_is_free(x, y-1):
        valid[2] = 0 # UP invalid
    if not game.tile_is_free(x, y+1):
        valid[3] = 0 # DOWN invalid
    if not game.tile_is_free(x-1, y):
        valid[1] = 0 # LEFT invalid
    if not game.tile_is_free(x+1, y):
        valid[0] = 0 # RIGHT invalid
    if b<1:
        valid[4] = 0

    #valid[4] = 0
    #valid[5] = 0
    return valid

In [None]:
d = display(f'Starting...', display_id='progress')

n_steps = 2000000
game = None

train = {n: True for n in agents.keys() }
#train['tensor_agent'] = True

moving_reward = 0
episode_count = 0

for step in range(n_steps):
    if game is None:
        episode_count += 1
        game = Game(*Game.create_arena(agents.keys(), crate_density=crate_density), aux_reward_crates=aux_reward_crates)

    actions = {}
    Xs = {}
    for agent in game.agents:
        x, y, name, b, _ = agent
        
        game_state = game.get_game_state(agent)
        Xs[name] = game_state_X.get(game_state)
        valid_actions = get_valid_actions(x, y, b, game)
        actions[name] = agents[name].act(Xs[name], train=train[name], valid_actions=valid_actions)
    
    actions_as_string = {n: choices[actions[n]] for n in actions.keys()}
    rewards = game.step(actions_as_string)
    moving_reward = 0.99 * moving_reward + rewards['tensor_agent']
    
    for _, _, name, _, _ in game.agents:
        if train[name]:
            agents[name].reward_update([Xs[name], actions[name], rewards[name]-hurry_up])
    
    d.update(f'Episode {episode_count} Step: {step+1}/{n_steps} Trained: {model.steps} Moving Reward: {moving_reward:.2f}')
    
    if game.terminated:
        for name, a in agents.items():
            a.end_of_episode(save='tensor_agent-model.h5') # alt: save=None
        
        game = None

d.update(f'Complete')

'Step: 3200/2000000 Trained: 775 Moving Reward: 0.0'

In [None]:
d = display(f'Starting...', display_id='progress_test')

n_steps = 1000
game = None

train = {n: False for n in agents.keys()}
moving_reward = 0

imgs = []

for step in range(n_steps):
    if game is None:
        game = Game(*Game.create_arena(agents.keys(), crate_density=0.75), aux_reward_crates=aux_reward_crates)

    actions = {}
    Xs = {}
    img = np.zeros((17, 17, 3))
    
    for agent in game.agents:
        x, y, name, b, _ = agent
        
        game_state = game.get_game_state(agent)
        img[:,:,0] = (game_state['arena'] == -1) * 0.75
        img[:,:,0] += game_state['arena'] == 1
        coins = game_state['coins']
        for i in range(len(coins)):
            img[coins[i][0], coins[i][1], 1] = 0.75
        img[x,y,1] = 1
        img[:,:,2] = game_state['explosions'] / np.max(game_state['explosions'])
        bombs = game_state['bombs']
        
        for i in range(len(bombs)):
            img[bombs[i][0], bombs[i][1], 2] = 0.75 - bombs[i][2] / (s.bomb_timer) / 2
        
        Xs[name] = game_state_X.get(game_state)
        valid_actions = get_valid_actions(x, y, b, game)
        actions[name] = agents[name].act(Xs[name], train=train[name], valid_actions=valid_actions)
    
    imgs.append(img)
    
    actions_as_string = {n: choices[actions[n]] for n in actions.keys()}
    print(actions_as_string)
    rewards = game.step(actions_as_string)
    moving_reward = moving_reward + rewards['tensor_agent']
    
    d.update(f'Step: {step+1}/{n_steps} Moving Reward: {moving_reward:.2f}')
    
    if game.terminated:
        #for name, a in agents.items():
            #a.end_of_episode(save='tensor_agent-model.h5') # alt: save=None
        
        break


In [None]:

def animation(imgs, interval=50):
    import matplotlib.animation
    
    steps = len(imgs)
    
    fig = plt.gcf()
    plt.axis('off')

    def animate(t):
        plt.imshow(imgs[t])

    ani = matplotlib.animation.FuncAnimation(fig, animate, frames=steps, interval=interval)
    return ani

In [None]:
anim = animation(imgs[:20], interval=300)
HTML(anim.to_jshtml())

In [None]:
from PIL import Image
! rm anim/*
for i in range(len(imgs)):
    Image.fromarray(np.uint8(imgs[i]*255)).resize((17*10,17*10)).save('anim/{:0>3d}.png'.format(i))

In [None]:
! convert anim/*.png movie.gif