In [14]:
from VisualModule import AgentEnvironment
from DQN_Agent import NeurosmashAgent
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
import gym
import math
import random
import pickle
from collections import namedtuple
from itertools import count
from PIL import Image
import os
from stopwatch import Stopwatch

model_output_dir = "output/model_output/"

if not os.path.exists(model_output_dir):
    os.makedirs(model_output_dir)

max_distance = 600
show_images = False
skip_frames = 15
state_size =  13 # agent_pos, enemy_pos, vec_agent, vec_enemy, rel_pos_enemy, done
action_size = 3
episode_count = 1000
batch_size = 32
size       = 768         # Please check the Updates section above for more details
timescale  = 6           # Please check the Updates section above for more details
environment = AgentEnvironment(size=size, timescale=timescale)
agent = NeurosmashAgent(state_size = state_size, action_size = action_size, batch_size=batch_size) 

def compute_reward(standard_reward, distance):
    distance_reward = (max_distance-distance)/max_distance
    total_reward = (distance_reward + standard_reward)/ 2
    return total_reward


def direction(agent_path, enemy_path):
    A_X = (np.array(agent_path[-1]) - np.array(agent_path[-2]))[0]
    A_Y = -(np.array(agent_path[-1]) - np.array(agent_path[-2]))[1]
    E_X = (np.array(enemy_path[-1]) - np.array(enemy_path[-2]))[0]
    E_Y = -(np.array(enemy_path[-1]) - np.array(enemy_path[-2]))[1]
    return [A_X,A_Y],[E_X,E_Y]

def do_action(action, total_steps, eval_pic):
    stopwatch = Stopwatch() 
    stopwatch.start()
    # Normalize the values 
    info, reward, agent_coord, enemy_coord, _ = environment.actionLoop(action, 0, 1)
    # Replace folowing_state with the representation

    
    stopwatch.stop()
    #print(f"Total time for do action: {stopwatch.duration}")
    if len(environment.agent_path) < 2:
        distance = 500 # Initial distance, only for initialisation
        agent_direction = [1,0] # By definition of facing each other
        enemy_direction = [-1,0]
        
    else:
        distance = np.sqrt(np.square(np.array(list(np.array(agent_coord)- np.array(enemy_coord))).sum(axis=0)))
        # Extract all variables 
        agent_direction, enemy_direction  = direction(environment.agent_path, environment.enemy_path)
    
    complete_reward = compute_reward(reward, distance)

    rel_pos_enemy = np.array(enemy_coord) - np.array(agent_coord)
    # Return a value in range 0,1 for following_state
    # Nstatus = d[0]
    # Ncomplete_reward = d[1]
    # Nagent_posx = d[2][0]/700
    # Nagent_posy = d[2][1]/700
    # Nenemy_posx = d[3][0]/700
    # Nenemy_posy = d[3][1]/700
    # Nagent_dirx = d[4][0]/60+0.5
    # Nagent_diry = d[4][1]/60+0.5
    # Nenemy_dirx = d[5][0]/60+0.5
    # Nenemy_diry = d[5][1]/60+0.5
    # Nenemy_pos_relx = d[7][0]/1400+0.5
    # Nenemy_pos_rely = d[7][1]/1400+0.5
    # Ndistance = d[6]/700
   
    d=info, complete_reward, np.array(agent_coord), np.array(enemy_coord), agent_direction, enemy_direction, distance, rel_pos_enemy
    following_state = d[0],d[1],d[2][0]/700,d[2][1]/700,d[3][0]/700,d[3][1]/700,\
     d[4][0]/60+0.5,d[4][1]/60+0.5,d[5][0]/60+0.5,d[5][1]/60+0.5,\
     d[7][0]/1400+0.5,d[7][1]/1400+0.5,d[6]/700
    
    return list(following_state)


def init_environment(env, agent_here):
    info, reward, state = env.reset() 
    agent_trajectories = []
    enemy_trajectories = []
    
    small_init_state = [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], False]
    
    #for i in range(3):
    action = agent_here.act(small_init_state) # get next action
    # action = 3 (if above does not work)
    
    #pre_state_img = np.flip(np.array(state).reshape(3,256,256).transpose(1,2,0),0)
    #step_number_now = i+1
    info, complete_reward, agent_pos, enemy_pos, agent_direction, enemy_direction, distance, relative_pos_enemy, next_state = do_action(action, 1, True)  
    
    #post_state_img = np.flip(np.array(next_state).reshape(3,256,256).transpose(1,2,0),0)

    #agent_pos, enemy_pos = env_feat.coord(pre_state_img, post_state_img)
    agent_trajectories.append(list(agent_pos))
    enemy_trajectories.append(list(enemy_pos))
        
    return 


def view(agent_path,enemy_path):

    x_a,y_a,x_e,y_e = xyvalues(agent_path,enemy_path)
    A_dir=[]
    E_dir=[]
    Initial_a_dir = [1,0] # By definition they are facing each other
    Initial_e_dir = [-1,0]
    Game_1 = pd.DataFrame([x_a,y_a,x_e,y_e]).T
    Game = Game_1.rename(columns={0: "A_x_comp", 1: "A_y_comp",2: "E_x_comp", 3: "E_y_comp"})
    interval=1 # The interval difference to consider a trajectory 
    for i in range(len(Game)-interval):
    
        Agent_dir = pd.DataFrame(Game.iloc[i]-Game.iloc[i-interval]).iloc[0:2].values
        Enemy_dir = pd.DataFrame(Game.iloc[i]-Game.iloc[i-interval]).iloc[2:4].values
        
        Agent_dir=(Initial_a_dir[0]+normalize(Agent_dir,axis=0)[0][0])/2,(Initial_a_dir[1]+normalize(Agent_dir,axis=0)[1][0])/2
        
        Enemy_dir=(Initial_e_dir[0]+normalize(Enemy_dir,axis=0)[0][0])/2,(Initial_e_dir[1]+normalize(Enemy_dir,axis=0)[1][0])/2
    
        A_dir.append(Agent_dir)
        E_dir.append(Enemy_dir) # The direction vector is stored in A_dir, E_dir
        
    for time in range(len(A_dir)):
        V = np.array([E_dir[time]])
        origin = Game[["E_x_comp","E_y_comp"]].iloc[time].values[0],Game[["E_x_comp","E_y_comp"]].iloc[time].values[1] # origin point
        plt.quiver(*origin, V[:,0], V[:,1], color=['b'], scale=30)
    
        VA = np.array([ A_dir[time]])
        origin = Game[["A_x_comp","A_y_comp"]].iloc[time].values[0],Game[["A_x_comp","A_y_comp"]].iloc[time].values[1] # origin point
        plt.quiver(*origin, VA[:,0], VA[:,1], color=['r'], scale=30)
    
    plt.ylim(300, 768)
    plt.xlim(0, 768)
    plt.savefig('result_{}.png'.format(int(len(full_games_y))))
    plt.show()


# Uncoment the model to you want to use and run :)

In [16]:
### Test the model with 
NumberOfGames=10
epsilon = 0 # zero means model, 1 random.
percentage=[]
 
# To load a decent trained model 
#82.3529411764706 # Model 'nn_82%_50games_8X150n.sav'  loss in training 0.0228, b=100, epoch=2
nn_3 = pickle.load(open('nn_82%_50games_8X150n.sav' , 'rb'))
#78  #Model 'nn_78%_games_11X150n.sav' loss in training 0.228 b=10000 epoch=1
#nn_3 = pickle.load(open('nn_78%_games_11X150n.sav' , 'rb'))
#72  #Model 'nn_72%_games_11X150n.sav' loss in training 0.0171 b=100, epoch=1
#nn_3 = pickle.load(open('nn_72%_games_11X150n.sav' , 'rb'))
#64  #Model 'nn_60%_50games_8X150n.sav' loss in training 0.0208 b=100, epoch=10
#nn_3 = pickle.load(open('nn_60%_50games_8X150n.sav' , 'rb'))
#66  #Model 'nn_%_games_8X150n.sav' loss in training 0.0192 b=100, epoch=20

#nn_3 = pickle.load(open('nn_66%_games_8X150n.sav' , 'rb'))

model_game=nn_3

In [13]:
u=0
steps = 0
for i in range(NumberOfGames):
    environment.reset() 
    F_next_state=[]
    steps = 0
    F_reward=[]
    prd=[]
    u+=1
    j=1
    status=0
    action=0
    rand=True
    while status==0 or j<10:  
        if np.random.rand() <= epsilon:
            rand=True
        else:
            rand=False
            
        
        if rand:
            action=random.randrange(3)



        next_state = do_action(action,1,True)
        # action=NewAgent.act(1)

        status=next_state[0] # Normalized status 
        reward=next_state[1] # reward
        next_state = next_state[2:] # State variables
        next_state.append(action/2)

        F_next_state.append(next_state)
        F_reward.append(reward)
        exp_value=[]
        if rand:
                action=random.randrange(3)
        else:
            for k in range(3): # For each action 
                ex_reward=F_next_state[-1][:-1] 
                ex_reward.append(k/2)# Normalized action 
      
                prediction=model_game.predict(np.array(ex_reward).reshape(1, -1))  # Will evaluate the value of the next action 
                ex_reward=[]
                exp_value.append(prediction) # First element is first action and so on 
                
            action = np.argmax(exp_value) # Choose the action with the highest value 
        
        steps += 1
        if len(F_next_state)>3000:
            status=1
        
        j=10
        
        
    print(percentage)
    percentage.append([0,1,1,1,1,1][int(F_reward[-1])])
    print(f"F_reward = {F_reward[-1]}")
    print("Game number",u,["Fail :(","win","win","win","win","win"][int(F_reward[-1])])
    print(f"Timesteps: {steps}")

F_reward = 5.491666666666666
Game number 1 win
Timesteps: 1855
F_reward = 5.354166666666667
Game number 2 win
Timesteps: 905
F_reward = 0.49083333333333334
Game number 3 Fail :(
Timesteps: 3001
F_reward = 5.345833333333333
Game number 4 win
Timesteps: 1427
F_reward = 0.41083333333333333
Game number 5 Fail :(
Timesteps: 3001
F_reward = 0.4166666666666667
Game number 6 Fail :(
Timesteps: 2744
F_reward = 0.3641666666666667
Game number 7 Fail :(
Timesteps: 2622
F_reward = 0.4658333333333333
Game number 8 Fail :(
Timesteps: 2344
F_reward = 5.381666666666667
Game number 9 win
Timesteps: 2191
F_reward = 5.420833333333333
Game number 10 win
Timesteps: 821
F_reward = 0.43916666666666665
Game number 11 Fail :(
Timesteps: 3001
F_reward = 4.723333333333334
Game number 12 win
Timesteps: 1068
F_reward = 0.36583333333333334
Game number 13 Fail :(
Timesteps: 3001
F_reward = 0.3983333333333333
Game number 14 Fail :(
Timesteps: 1322
F_reward = 5.4825
Game number 15 win
Timesteps: 2238
F_reward = 0.31916

IndexError: index out of range