In [1]:
import sys
sys.path.append('..')
import pickle

In [2]:
from tournament import tournament, Agent, CustomPlayerComp, improved_score_fast_x2
from data_utils import get_depths
#discount_factor = 0.99

def run_tournament(trainee_, num_rounds = 10, time_limit=float('inf'), discount_factor=0.99 ):
    result = tournament(num_rounds, test_agents = [trainee_], time_limit = time_limit)
    #print(result)
    nice_data = get_depths(result, [trainee_], lambda x:x, discount_factor)
    #print(nice_data)
    # TODO: nicer handling of final states, so wins/losses also propagate from final values via Q, not just via G
    states = [state for game in nice_data[trainee_.name] for state in game]
    print('imported',len(states), 'states')
    return states, result

In [22]:
mem_size = 1000000
batch_size = 10000
train_batch_size = 16
num_rounds = 10
num_init_rounds = 1000
initial_sim = False
#disc_factor = 0.99
learning_rate = 0.0001

In [4]:
from neural.reinforcement import Memory
memory = Memory(mem_size)
dummy_loss = 1.0 # later want to oversample high losses

In [5]:
if initial_sim:
    # Run this to generate the initial simulation data to pre-fit the model
    trainee = Agent(CustomPlayerComp(score_fn=improved_score_fast_x2, 
                                     name = "Trainee", 
                                     method ='alphabeta', 
                                     iterative = True), 
                    "Trainee")

    states, result = run_tournament(trainee,num_init_rounds, time_limit = 150)

    for state in states:
        memory.add((state, dummy_loss))


    with open('../data/initial_run.pickle', 'wb') as handle:
        pickle.dump(states, handle)
else:
    with open('../data/initial_run.pickle', 'rb') as handle:
        states = pickle.load(handle)

In [6]:
from neural.keras_utils import deep_model_fun
from neural.neural_agent import NeuralAgent
from keras.optimizers import Adam
def model_fun():
    return deep_model_fun(num_features =8, num_res_modules = 8, drop_rate = 0.1, activation = 'sigmoid')

def create_neural_agent(model_fun, name = None):
    deep_model, deep_Q_model = model_fun() 
    #deep_Q_model.load_weights(filename)#,custom_objects={'ConvByMoveLayer': ConvByMoveLayer})
    deep_model.compile(optimizer = Adam(lr=learning_rate),  loss='mean_squared_error')
    my_agent = NeuralAgent(deep_Q_model, name = name)
    return Agent(my_agent,name), deep_model, deep_Q_model

Using TensorFlow backend.


In [17]:
from tournament import tournament, Agent, RandomPlayer
my_agent, deep_model, deep_Q_model = create_neural_agent(model_fun, name = 'Trainee')

In [8]:
# # Option 1: load pre-calibrated states
# filename = '../data/deep_Q_model_weights.h5'
# deep_Q_model.load_weights(filename)

In [9]:
# # option 2: train the agent on the spot on the simulation data
# board_full, player_pos, legal_moves, next_move, target = prepare_data_for_model(states,'G')
# deep_model.fit([board_full, player_pos, legal_moves, next_move], 
#                target, 
#                batch_size = 128, 
#                epochs=1, 
#                verbose = 1, 
#                validation_split = 0.1, 
#                shuffle = True)
# deep_Q_model.save_weights('../data/deep_Q_model_weights_prefitted.h5')


In [23]:
i = 0

In [None]:
from neural.reinforcement import generate_target

while True:
    i +=1
    print('*** Iteration', i, '***')
    for state in states:
        memory.add((state, dummy_loss))
    print(len(memory.buffer))
    

    idx,batch_states = memory.sample(batch_size)
    batch_states =[b[0] for b in batch_states]

    board_full, player_pos, legal_moves, next_move, target = generate_target(batch_states, deep_Q_model, alpha=1.0, discount_factor=0.99)
    deep_model.fit([board_full, player_pos, legal_moves, next_move], 
                   target, 
                   batch_size = train_batch_size, 
                   epochs=1, 
                   verbose = 1, 
                   validation_split = 0.1, 
                   shuffle = True)
    deep_Q_model.save_weights('../data/deep_Q_model_weights_' + str(i) + '.h5')
    
    my_agent.player.temperature = 1/i
    states, result = run_tournament(my_agent,num_rounds)

*** Iteration 1 ***
295223
Train on 9000 samples, validate on 1000 samples
Epoch 1/1

*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 24 to 16
  Match 2:   Trainee   vs   MM_Null   	Result: 7 to 33
  Match 3:   Trainee   vs   MM_Open   	Result: 5 to 35
  Match 4:   Trainee   vs MM_Improved 	Result: 5 to 35
  Match 5:   Trainee   vs   AB_Null   	Result: 11 to 29
  Match 6:   Trainee   vs   AB_Open   	Result: 3 to 37
  Match 7:   Trainee   vs AB_Improved 	Result: 5 to 35


Results:
----------
Trainee             21.43%
imported 6283 states
*** Iteration 2 ***
301506
Train on 9000 samples, validate on 1000 samples
Epoch 1/1

*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 22 to 18
  Match 2:   Trainee   vs   MM_Null   	Result: 7 to 33
  Match 3:   Trainee   vs   MM_Open   	Result:


*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 32 to 8
  Match 2:   Trainee   vs   MM_Null   	Result: 15 to 25
  Match 3:   Trainee   vs   MM_Open   	Result: 14 to 26
  Match 4:   Trainee   vs MM_Improved 	Result: 17 to 23
  Match 5:   Trainee   vs   AB_Null   	Result: 14 to 26
  Match 6:   Trainee   vs   AB_Open   	Result: 14 to 26
  Match 7:   Trainee   vs AB_Improved 	Result: 12 to 28


Results:
----------
Trainee             42.14%
imported 22218 states
*** Iteration 13 ***
464821
Train on 9000 samples, validate on 1000 samples
Epoch 1/1

*************************
   Evaluating: Trainee   
*************************

Playing Matches:
----------
  Match 1:   Trainee   vs   Random    	Result: 24 to 16
  Match 2:   Trainee   vs   MM_Null   	Result: 17 to 23
  Match 3:   Trainee   vs   MM_Open   	Result: 10 to 30
  Match 4:   Trainee   vs MM_Improved 	Result: 15 to 25
  Match 5:  