In [1]:
import sys
sys.path.append('..')
from tournament import tournament, Agent, CustomPlayerComp, improved_score_fast_x2
from data_utils import get_depths
discount_factor = 0.99

def run_tournament(trainee_, num_rounds = 10):
    result = tournament(num_rounds, test_agents = [trainee_], time_limit=float('inf'))
    #print(result)
    nice_data = get_depths(result, [trainee_], lambda x:x, discount_factor)
    #print(nice_data)
    # TODO: nicer handling of final states, so wins/losses also propagate from final values via Q, not just via G
    states = [state for game in nice_data[trainee_.name] for state in game]
    print('imported',len(states), 'states')
    return states, result

In [2]:
from neural.keras_utils import deep_model_fun
from neural.neural_agent import NeuralAgent

def load_neural_agent(filename, name = None):
    deep_model, deep_Q_model = deep_model_fun(num_features =32, num_res_modules = 8, drop_rate = 0.1, activation = 'sigmoid')
    deep_Q_model.load_weights(filename)#,custom_objects={'ConvByMoveLayer': ConvByMoveLayer})
    deep_model.compile(optimizer = 'adam',  loss='mean_squared_error')
    my_agent = NeuralAgent(deep_Q_model, name = name)
    return Agent(my_agent,name), deep_model, deep_Q_model

Using TensorFlow backend.


In [3]:
# Run this to generate the initial simulation data to pre-fit the model
trainee = Agent(CustomPlayerComp(score_fn=improved_score_fast_x2, 
                                 name = "Trainee", 
                                 method ='alphabeta', 
                                 iterative = True), 
                "Trainee")

states, result = run_tournament(trainee)
# import pickle
# with open('../data/4x4tiny.pickle', 'wb') as handle:
#     pickle.dump(states, handle)


This script evaluates the performance of the custom heuristic function by
comparing the strength of an agent using iterative deepening (ID) search with
alpha-beta pruning against the strength rating of agents using other heuristic
functions.  The `ID_Improved` agent provides a baseline by measuring the
performance of a basic agent using Iterative Deepening and the "improved"
heuristic (from lecture) on your hardware.  The `Student` agent then measures
the performance of Iterative Deepening and the custom heuristic against the
same opponents.


*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 30 to 10
  Match 2:   Trainee   vs   MM_Null   	Result: 32 to 8
  Match 3:   Trainee   vs   MM_Open   	Result: 19 to 21
  Match 4:   Trainee   vs MM_Improved 	Result: 23 to 17
  Match 5:   Trainee   vs   AB_Null   	Result: 24 to 16
  Match 6:   Trainee   vs   AB_Open   	Result: 22 to 18
  Match

In [4]:
from tournament import tournament, Agent, RandomPlayer
filename = '../data/deep_Q_model_weights.h5'
my_agent, deep_model, deep_Q_model = load_neural_agent(filename, name = 'Trainee')
states, result = run_tournament(my_agent,1)



This script evaluates the performance of the custom heuristic function by
comparing the strength of an agent using iterative deepening (ID) search with
alpha-beta pruning against the strength rating of agents using other heuristic
functions.  The `ID_Improved` agent provides a baseline by measuring the
performance of a basic agent using Iterative Deepening and the "improved"
heuristic (from lecture) on your hardware.  The `Student` agent then measures
the performance of Iterative Deepening and the custom heuristic against the
same opponents.


*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 2 to 2
  Match 2:   Trainee   vs   MM_Null   	Result: 0 to 4
  Match 3:   Trainee   vs   MM_Open   	Result: 2 to 2
  Match 4:   Trainee   vs MM_Improved 	Result: 2 to 2
  Match 5:   Trainee   vs   AB_Null   	Result: 0 to 4
  Match 6:   Trainee   vs   AB_Open   	Result: 2 to 2
  Match 7:   Train

In [5]:
%load_ext autoreload
%autoreload 2
nice_data = get_depths(result, [my_agent], lambda x:x, discount_factor)
print(nice_data)

{'Trainee': [[{'game': array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  0.,
        1.,  1.,  1.]), 'pos': (12, 9), 'move': 10, 'n_score': 0.76711982, 'active_player': None, 'game_': None, 'winner': 1.0, 'next_state': {'game': array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  1.,  0.,
        1.,  1.,  1.]), 'pos': array([ 9, 10], dtype=int64)}, 'G': 0.9521910375044023}, {'game': array([ 1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  1.,  0.,
        1.,  1.,  1.]), 'pos': (10, 2), 'move': 3, 'n_score': 0.69035238, 'active_player': None, 'game_': None, 'winner': 1.0, 'next_state': {'game': array([ 1.,  1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  1.,  0.,
        1.,  1.,  1.]), 'pos': array([2, 3], dtype=int64)}, 'G': 0.96137234721396}, {'game': array([ 1.,  1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,
        1.,  1.,  1.]), 'pos': (3, 11), 'move': 5, 'n_score': 0.89628375, 'active_player': None, 'game_': None, 'winner': 1.0,

In [8]:
from data_utils import prepare_data_for_model
def generate_target(states, deep_Q_model, alpha = 0.5):
    board_full, player_pos,legal_moves,next_move,G = prepare_data_for_model(states,'G')
    # get list of next states for approx 
    next_states = [state['next_state'] for state in states]
    board_full_n, player_pos_n,legal_moves_n,_,_ = prepare_data_for_model(states,None)
    next_Q_values = deep_Q_model.predict([board_full_n, player_pos_n,legal_moves_n])
    # one minus because next turn the adversary chooses the best for them, which is the worst for us
    best_next_Q = 1-next_Q_values.max(axis=1)
    # TODO: handle end state rewards better in reporting
    target_from_Q = 0.5 + discount_factor*(best_next_Q - 0.5)
    target_from_G = G
    target = alpha*target_from_Q + (1-alpha)*target_from_G
    return target

In [9]:
target = generate_target(states, deep_Q_model)

In [10]:

print(target)

[[ 0.59387123]
 [ 0.63646178]
 [ 0.53920965]
 [ 0.51930993]
 [ 0.52189106]
 [ 0.42672468]
 [ 0.26928979]
 [ 0.21840306]
 [ 0.14262352]
 [ 0.20098695]
 [ 0.68572165]
 [ 0.5415381 ]
 [ 0.51568537]
 [ 0.28547258]
 [ 0.32172087]
 [ 0.0502102 ]
 [ 0.14816361]
 [ 0.10072461]
 [ 0.22906866]
 [ 0.37709146]
 [ 0.18044183]
 [ 0.08638223]
 [ 0.37306801]
 [ 0.10892272]
 [ 0.17377666]
 [ 0.21748478]
 [ 0.34374085]
 [ 0.07388907]
 [ 0.13975218]
 [ 0.10992236]
 [ 0.19795361]
 [ 0.42075125]
 [ 0.37481305]
 [ 0.16112808]
 [ 0.05329455]
 [ 0.18973294]
 [ 0.44426868]
 [ 0.40460545]
 [ 0.52888817]
 [ 0.67285579]
 [ 0.57862441]
 [ 0.64691594]
 [ 0.51987764]
 [ 0.54828473]
 [ 0.61984547]
 [ 0.74419455]
 [ 0.70312927]
 [ 0.59213912]
 [ 0.58165265]
 [ 0.35715226]
 [ 0.3407489 ]
 [ 0.16347421]
 [ 0.15512674]
 [ 0.24286337]
 [ 0.15056124]
 [ 0.09171085]
 [ 0.06029906]
 [ 0.09404757]
 [ 0.06734766]
 [ 0.12922767]
 [ 0.2664992 ]
 [ 0.35551107]
 [ 0.51945666]
 [ 0.63437303]
 [ 0.73516871]
 [ 0.61501944]
 [ 0.60689

In [None]:
# import math
# math.sqrt(0.12)

In [None]:
# # this bit here is display only
# %load_ext autoreload
# %autoreload 2
# game = result[1]
# print(game['winner'].name)
# print(game['moves'][1].keys())
# print(nice_data['Trainee'][0][0])