In [1]:
# generate the symmetry-corrected indices for move-based convolution
import sys
import numpy as np
sys.path.append('..')

In [2]:
# test the coefficient generation logic the naive way
cell = 22

In [3]:
# display-only code, to visually check the coeffs are in the correct locations on the board
from neural.neural_ import to_pair, generate_all_moves_by_index, move_convolution_indices

all_inds, num_coeffs = move_convolution_indices()
num_coeffs -= 10 # the first 10 in the above function are biases, don't need them
num_biases = 10
num_fields = 7*7

cell = cell+1
tmp = all_inds[cell]
a = np.zeros([7,7])
for (ind, coeff) in tmp[1:]:
    pair = to_pair(ind)
    a[pair[0],pair[1]] = coeff - 9

print(to_pair(cell))
print(a)

(2, 3)
[[  0.   0.  37.   0.  37.   0.   0.]
 [  0.  38.   0.   0.   0.  38.   0.]
 [  0.   0.   0.   9.   0.   0.   0.]
 [  0.  36.   0.   0.   0.  36.   0.]
 [  0.   0.  39.   0.  39.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.]]


In [4]:
# try calling conv_stack directly from Tensorflow
import numpy as np
import tensorflow as tf

tf.reset_default_graph()

from neural.tensorflow_utils import conv_stack

with tf.Session() as sess:
    in_fields_np = np.ones([2,num_fields])
    in_fields_np[0,3] = 0
    in_fields_np[0,5] = 0
    my_pos = np.array([24, 24])
    other_pos =  np.array([33,33])
    inputs_np = np.concatenate([in_fields_np, my_pos[:,None], other_pos[:,None]],
                              1)
    #print(inputs_np.shape)
    inputs =tf.constant(inputs_np)# tf.placeholder(shape =[None, num_fields+2], dtype = tf.float32) #
    #print(sess.run(inputs))
    out = conv_stack(inputs, 5,sess)
    
    dummy = np.array([50,50])[:,None]
    #print(sess.run(get_random_index(inputs, tf.constant(dummy))))
    
    sess.run(tf.global_variables_initializer())
    stack_result = sess.run(out)#, feed_dict={inputs:inputs_np})
    print(stack_result)

[2 3 2]
[[[  4.31470346 -10.12623596]
  [  1.9507395   36.62452698]
  [  4.29634714  17.55342102]]

 [[  4.38607407 -18.0837574 ]
  [  1.8685627   42.87147522]
  [  4.78937006  17.91110229]]]


In [5]:
# load game simulation data
import glob
import sys
import pickle
from neural.data_utils import load_simulation_data

fn = '../data/states.pickle'
try:
    with open(fn, 'rb') as f:
        states = pickle.load(f)
except:
    files = glob.glob('../data/ID_x2_1000ms/result_ID*.pickle')
    #print(files)
    depths =load_simulation_data(files)
    keys = list(depths.keys())
    #print(keys)
    games = depths[keys[0]]
    #print(games[0])
    states = [state for game in games for state in game] 
    print(len(states))
    with open(fn, 'wb') as f:
        pickle.dump(states,f)

In [6]:
from neural.data_utils import prepare_data_for_model
board_full, player_pos, y = prepare_data_for_model(states)
print(board_full.shape, player_pos.shape, y.shape)


984694


In [8]:
# fit the naive score as a first test of our network
from neural.keras_utils import deep_model_fun
model = deep_model_fun(num_features = 8, num_res_modules = 4, drop_rate = 0.1, activation = 'linear')
model.summary()
model.compile(optimizer = 'adam',  loss='mean_squared_error')

Using TensorFlow backend.


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 49, 1)         0                                            
____________________________________________________________________________________________________
input_1 (InputLayer)             (None, 49, 2)         0                                            
____________________________________________________________________________________________________
concatenate_1 (Concatenate)      (None, 49, 3)         0           input_2[0][0]                    
                                                                   input_1[0][0]                    
____________________________________________________________________________________________________
conv_by_move_layer_1 (ConvByMove (None, 49, 8)         1064        concatenate_1[0][0]     

In [9]:
model.fit(x = [player_pos, board_full],y = y, batch_size = 256, epochs=10, verbose =1)

Epoch 1/10
Epoch 2/10
 54784/984694 [>.............................] - ETA: 206s - loss: 1.3160

KeyboardInterrupt: 

In [32]:
# Now let's get all those games where tree search actually completed
import numpy as np
from sklearn.preprocessing import OneHotEncoder

complete_states = [state for state in states if state['score'] == float('inf') or state['score'] == float('-inf')]
print(len(complete_states))
board_full_c, player_pos_c, y_c = prepare_data_for_model(complete_states,'score')
y_c[y_c==float('inf')] = 1
y_c[y_c==float('-inf')] = 0

365201


In [33]:
print(set(list(np.reshape(y_c,[-1]))))

{0.0, 1.0}


In [34]:
from neural.keras_utils import deep_model_fun
deep_model = deep_model_fun(num_features = 16, num_res_modules = 16, drop_rate = 0.1, activation = 'sigmoid')
deep_model.summary()
deep_model.compile(optimizer = 'adam',  loss='binary_crossentropy', metrics =['acc'])

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_4 (InputLayer)             (None, 49, 1)         0                                            
____________________________________________________________________________________________________
input_3 (InputLayer)             (None, 49, 2)         0                                            
____________________________________________________________________________________________________
concatenate_3 (Concatenate)      (None, 49, 3)         0           input_4[0][0]                    
                                                                   input_3[0][0]                    
____________________________________________________________________________________________________
conv_by_move_layer_10 (ConvByMov (None, 49, 16)        2128        concatenate_3[0][0]     

____________________________________________________________________________________________________
batch_normalization_24 (BatchNor (None, 49, 16)        64          conv_by_move_layer_25[0][0]      
____________________________________________________________________________________________________
activation_25 (Activation)       (None, 49, 16)        0           batch_normalization_24[0][0]     
____________________________________________________________________________________________________
conv_by_move_layer_26 (ConvByMov (None, 49, 16)        10656       activation_25[0][0]              
____________________________________________________________________________________________________
dropout_12 (Dropout)             (None, 49, 16)        0           conv_by_move_layer_26[0][0]      
____________________________________________________________________________________________________
add_12 (Add)                     (None, 49, 16)        0           add_11[0][0]            

In [35]:
deep_model.fit([player_pos_c, board_full_c], y_c, batch_size = 256, epochs=1, verbose =1, validation_split = 0.1)

Train on 328680 samples, validate on 36521 samples
Epoch 1/1


<keras.callbacks.History at 0x18c976bea20>

In [12]:
from collections import namedtuple
from copy import copy
from neural.neural_agent import apply_move, get_best_move_from_model

board = np.ones(49)
#print(list(board))
board.sum()
my_pos = None
other_pos = None
game = {'pos': np.array([my_pos, other_pos]), 'game': board}
game1 = apply_move(game, 0)
game2 = apply_move(game1, 1)
game3 = apply_move(game2, 15)
# board, pos, _ = possible_moves_for_model(game3)
# print(board.shape, pos.shape)
get_best_move_from_model(game3, model)


[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]


(16, array([ 32.19636917], dtype=float32))

In [15]:
%load_ext autoreload
%autoreload 2

my_agent.get_move(game3)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


(16,
 {'game': array([ 0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
          1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]),
  'move': 16,
  'n_score': array([ 32.19636917], dtype=float32),
  'pos': array([ 1, 15])})

In [39]:
from tournament import tournament, Agent, RandomPlayer
from neural.neural_agent import NeuralAgent

my_agent = NeuralAgent(deep_model)
tournament(num_matches=20, time_limit=500, 
           test_agents=[Agent(my_agent,"Neural Agent")])
a=1


This script evaluates the performance of the custom heuristic function by
comparing the strength of an agent using iterative deepening (ID) search with
alpha-beta pruning against the strength rating of agents using other heuristic
functions.  The `ID_Improved` agent provides a baseline by measuring the
performance of a basic agent using Iterative Deepening and the "improved"
heuristic (from lecture) on your hardware.  The `Student` agent then measures
the performance of Iterative Deepening and the custom heuristic against the
same opponents.


*************************
Evaluating: Neural Agent 
*************************

Playing Matches:
----------
  Match 1: Neural Agent vs ID_Improved timeout!




	Result: 29 to 51


Results:
----------
Neural Agent        36.25%


In [None]:
# sort all games by number of moves. 
states_by_num_moves = [[] for _ in range(49)]

for state in states:
    moves_made = 49 - state['game'].sum()
    states_by_num_moves[int(moves_made)].append(state)
    
for n in range(49):
    print(n,len(states_by_num_moves[n]))

In [None]:
# Iteratively populate all non-+-inf values in layer n from evaluating model in layer n+1, then include these into the fitting set
# after each pass, refresh the values for earlier layers
def recursively_fill_scores(states, model = deep_model):
    print(len(states))
    scores = np.zeros([len(states)])
    for n,state in enumerate(states):
        if state['score']  == float('inf'):
            scores[n] = 1
        elif state['score'] == float('-inf'):
            scores[n] = 0
        else:
            _ , scores[n] = get_best_move_from_model(state, model)
        if n%1000 == 0:
            print(n)
    return scores

prepared_data = [None for _ in range(49)]

for n in range(18,49):
    if len(states_by_num_moves[n]):
        board, pos, _  = prepare_data_for_model( states_by_num_moves[n], None) # board, player_pos, score
        scores = recursively_fill_scores( states_by_num_moves[n])
        prepared_data[n] = (pos, board, scores)
        print(len(scores),len(set(list(scores))))
# TODO: is my position always first in those dumps???
    

            


            
