In [1]:
import sys
sys.path.append('..')

In [2]:
from tournament import tournament, Agent, CustomPlayerComp, improved_score_fast_x2
from data_utils import get_depths
discount_factor = 0.99

def run_tournament(trainee_, num_rounds = 10, time_limit=float('inf') ):
    result = tournament(num_rounds, test_agents = [trainee_], time_limit = time_limit)
    #print(result)
    nice_data = get_depths(result, [trainee_], lambda x:x, discount_factor)
    #print(nice_data)
    # TODO: nicer handling of final states, so wins/losses also propagate from final values via Q, not just via G
    states = [state for game in nice_data[trainee_.name] for state in game]
    print('imported',len(states), 'states')
    return states, result

In [3]:
mem_size = 100000
batch_size = 10000
train_batch_size = 128
num_rounds = 100
num_init_rounds = 100

In [4]:
from neural.reinforcement import Memory
memory = Memory(mem_size)
dummy_loss = 1.0 # later want to oversample high losses

In [5]:
# Run this to generate the initial simulation data to pre-fit the model
trainee = Agent(CustomPlayerComp(score_fn=improved_score_fast_x2, 
                                 name = "Trainee", 
                                 method ='alphabeta', 
                                 iterative = True), 
                "Trainee")

states, result = run_tournament(trainee,num_init_rounds, time_limit = 150)

for state in states:
    memory.add((state, dummy_loss))

import pickle
with open('../data/initial_run.pickle', 'wb') as handle:
    pickle.dump(states, handle)


*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 312 to 88
  Match 2:   Trainee   vs   MM_Null   	Result: 253 to 147
  Match 3:   Trainee   vs   MM_Open   	Result: 208 to 192
  Match 4:   Trainee   vs MM_Improved 	Result: 216 to 184
  Match 5:   Trainee   vs   AB_Null   	Result: 254 to 146
  Match 6:   Trainee   vs   AB_Open   	Result: 246 to 154
  Match 7:   Trainee   vs AB_Improved 	Result: 228 to 172


Results:
----------
Trainee             61.32%
[0.966032673953495, 0.97549502495, 0.9851495, 0.9476691271293582, 0.995, 0.9567586237418204, 0.047808962495597795, 0.03862765278603997, 0.0292599252995, 0.019701995000000028, 0.009950000000000014, 0.0, 0.9387605114994839, 0.06562709361551089, 0.056807564141935385]
imported 13842 states


In [6]:
from neural.keras_utils import deep_model_fun
from neural.neural_agent import NeuralAgent
def model_fun():
    return deep_model_fun(num_features =32, num_res_modules = 8, drop_rate = 0.1, activation = 'sigmoid')

def create_neural_agent(model_fun, name = None):
    deep_model, deep_Q_model = model_fun() 
    #deep_Q_model.load_weights(filename)#,custom_objects={'ConvByMoveLayer': ConvByMoveLayer})
    deep_model.compile(optimizer = 'adam',  loss='mean_squared_error')
    my_agent = NeuralAgent(deep_Q_model, name = name)
    return Agent(my_agent,name), deep_model, deep_Q_model

Using TensorFlow backend.


In [7]:
from tournament import tournament, Agent, RandomPlayer
my_agent, deep_model, deep_Q_model = create_neural_agent(model_fun, name = 'Trainee')

filename = '../data/deep_Q_model_weights.h5'
deep_Q_model.load_weights(filename)

In [None]:
from neural.reinforcement import generate_target

for state in states:
    memory.add((state, dummy_loss))
print(len(memory.buffer))


idx,batch_states = memory.sample(batch_size)
batch_states =[b[0] for b in batch_states]

board_full, player_pos, legal_moves, next_move, target = generate_target(batch_states, deep_Q_model, alpha=1.0, discount_factor=0.99)
deep_model.fit([board_full, player_pos, legal_moves, next_move], 
               target, 
               batch_size = train_batch_size, 
               epochs=1, 
               verbose =1, 
               validation_split = 0.2, 
               shuffle = True)

states, result = run_tournament(my_agent,num_rounds)

41526
Train on 8000 samples, validate on 2000 samples
Epoch 1/1

*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 278 to 122
  Match 2:   Trainee   vs   MM_Null   	Result: 195 to 205
  Match 3:   Trainee   vs   MM_Open   