In [1]:
from othello_rules import *
from othello_net import *
from tensorflow.python.framework import ops
from datetime import datetime
from example_states import *
from feature_extractor import *
from training_utils import *
import numpy as np
np.set_printoptions(suppress=True)

In [2]:
def value_error(data_path, sess, diagnostics=False):
    errors = []
    validation_matches = get_all_matches(data_path)
    #XXX: Delete this line when testing is faster
    validation_matches = validation_matches[0:30]
    for i in range(len(validation_matches)):
        if diagnostics:
            print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>new game")
        match = validation_matches[i]
        raw_match_movelist = match[8:]
        unpacked_movelist = unpack('b'*60, raw_match_movelist)
        black_score = match[6]
        unpacked_black_score = unpack('b', black_score)
        winner = -1 if unpacked_black_score[0] > 32 else 1
        board = initialize_game()
        player = -1
        
        if diagnostics:
            print(unpacked_black_score)
            print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>winner is " + str(winner))
        movenr = 0
        for move in unpacked_movelist:
            if move == 0:
                break
            feature_path = 'cache/validation/features/features_' + str(i) + "_" + str(move) + ".npy"
            label_path = 'cache/validation/labels/labels_' + str(i) + "_" + str(move) + ".npy"
            if os.path.isfile(feature_path) and os.path.isfile(label_path):
                features = np.load(feature_path)
            else:
                features = board_to_input(board, player)
            input_batch = [features]
            #s = 1 if player * (-1) == winner else 0
            if winner == -1:
                r = ([0, 1])
                #r = [[0]]
            else:
                r = ([1, 0])
                #r = [[1]]
            label_batch = [r]
            

            if diagnostics and movenr > 42:
                estimate = sess.run(score_out, feed_dict={img_data:input_batch, keep_prob:1.0})
                predictions = sess.run(score_out, feed_dict={img_data:input_batch, keep_prob:1.0})
                predic = predictions[0][0][0]
                prediction = list_softmax(predic)
                print("-new state-")
                print("actual value: " + str(r))
                #print("score_out: " + str(estimate))
                print("predictions: " + str(prediction))
            movenr += 1
            error = sess.run(loss, feed_dict={img_data:input_batch, ground_truths: label_batch, keep_prob:1.0})
            errors.append(error)
            board = make_move(board, move, player)
            if player is 1:
                player = -1
            else:
                player = 1
            legal_moves = find_legal_moves(board, player)
            if len(legal_moves) == 0:
                if player is 1:
                    player = -1
                else:
                    player = 1
                    
        if np.count_nonzero(board) == 64:
            error = sess.run(loss, feed_dict={img_data:input_batch, ground_truths: label_batch, keep_prob:1.0})
            errors.append(error)
            predic = sess.run(score_out, feed_dict={img_data:input_batch, ground_truths: label_batch, keep_prob:1.0})
            predic = predic[0][0][0]
            prediction = list_softmax(predic)
            if diagnostics:
                print("target is ")
                print(r)
                print("prediction is:")
                print(prediction)
    return np.sum(errors) / len(errors)

def list_softmax(x):
    """Compute softmax values for each sets of scores in x."""
    sf = np.exp(x)
    sf = sf/np.sum(sf, axis=0)
    return sf

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

def lrelu(x, leak=0.2, name="lrelu"):
    return tf.maximum(x, leak*x)

def conv_nonparams2(tensor_input, conv_weights, conv_biases, keep_prob):
    conv_in = conv(tensor_input, conv_weights)
    conv_relu = lrelu(conv_in + conv_biases)
    conv_drop = tf.nn.dropout(conv_relu, keep_prob)
    return conv_drop

def conv_weights2(i, o):
    k = 2
    shape = [k, k, i, o]
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def conv_nonparams3(tensor_input, conv_weights, conv_biases, keep_prob):
    conv_in = conv(tensor_input, conv_weights, "VALID")
    conv_relu = lrelu(conv_in + conv_biases)
    conv_drop = tf.nn.dropout(conv_relu, keep_prob)
    return conv_drop

def create_value_net():
    with tf.variable_scope("value_network"):
        img_data = tf.placeholder(tf.float32, shape=[None, 8, 8, 28], name="img_data")
        keep_prob = tf.placeholder(tf.float32, name="keep_prob")
        #convolutional layers 
        i = 28; o = 32;
        conv1_weights = conv_weights(i, o)
        conv1_biases = conv_biases(o)
        conv1_out = conv_nonparams2(img_data, conv1_weights, conv1_biases, keep_prob)
        #pool1 = max_pool_2x2(conv1_out)
        i = o; o = 32;
        conv2_weights = conv_weights(i, o)
        conv2_biases = conv_biases(o)
        conv2_out = conv_nonparams2(conv1_out, conv2_weights, conv2_biases, keep_prob)
        #pool2 = max_pool_2x2(conv2_out)
        i = o; o = 32;
        conv3_weights = conv_weights2(i, o)
        conv3_biases = conv_biases(o)
        conv3_out = conv_nonparams2(conv2_out, conv3_weights, conv3_biases, keep_prob)
        
        i = o; o = 32;
        conv4_weights = conv_weights2(i, o)
        conv4_biases = conv_biases(o)
        conv4_out = conv_nonparams2(conv3_out, conv4_weights, conv4_biases, keep_prob)
        

        
        k = 1; i = o; o = 2;
        s_weights = weight_variable([k, k, i, o])
        s_biases = bias_variable([o])
        s_out = conv_nonparams(conv4_out, s_weights, s_biases, keep_prob)
        
        k = 8; i = o; o = 2;
        score_weights = weight_variable([k, k, i, o])
        score_biases = bias_variable([o])
        score_out = conv_nonparams3(s_out, score_weights, score_biases, keep_prob)
        score_out_flat = tf.reshape(score_out, [-1, o])
        #final layer
        #Don't actually use this layer here
        predictions = softmax(score_out, o)

        #training block:
        label = tf.placeholder(tf.float32, shape=[None, 2], name="ground_truths")
        learn_rate = tf.placeholder(tf.float32, name="eta")
        
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(score_out_flat, label))
        policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables])
        optimizer = tf.train.AdamOptimizer(learn_rate)
        train_step = optimizer.minimize(loss)
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        return sess.graph, img_data, train_step, optimizer, label, loss, predictions, keep_prob, learn_rate, score_out

    # MAKE SURE that the topology of this network is the same as the policy network!

In [4]:
ops.reset_default_graph()
graph, img_data, train_step, optimizer, ground_truths, loss, pred_up, keep_prob, learn_rate, score_out = create_value_net()
saver = tf.train.Saver()
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
init_op = tf.initialize_all_variables()
sess.run(init_op)
#nopool7 gat fundið hver sigrar leiki, 0.2~ loss

# Prófa noends9 upp á sanity checks

# "models/sl-v/noends9.ckpt.b" hefur 0.238 end state loss og notar ekki bara end-states

# >>>>> "models/sl-v/players_g.ckpt" hefur 0.544 all state loss og notar alltalltallt
# Þjálfaði aðallega í 1e-5 eða svo 

current_model = "models/sl-v/players_g.ckpt.b"
if os.path.isfile(current_model):
    saver.restore(sess, current_model)
validation_path = "validation/"
matches = get_all_matches('training/')
print(len(matches))

# Byrjum þjálfunina
print("start training")
#print("starting error:" + str(avg_error(validation_path, sess)))
#print('%s: Step %d: Prediction accuracy = %.2f' % (datetime.now(), 0,
#                                                      prediction_accuracy()/float(60)))
diagn = False
if diagn:
    iterations = 1
else:
    #iterations = len(matches)
    iterations = 100
prev_stop = 21000
#upper_limit = np.max(prev_stop+iterations, len(matches))
probs = 1
eta = 1e-6
input_batch = []
label_batch = []
for i in range(prev_stop, prev_stop+iterations):
    if i > 35000 and i < 40000:
        continue
    #TODO: Skrifa þetta fall
    #input_batch, label_batch = prepare_train_batch(train_ids, batch_size, do_flips, do_rots, data_path)
    current_match = matches[i]
    raw_match_movelist = current_match[8:]
    unpacked_movelist = unpack('b'*60, raw_match_movelist)
    black_score = current_match[6]
    unpacked_black_score = unpack('b', black_score)
    
    #XXX: Temporary check for the value network, do this properly later
    if unpacked_black_score[0] > 32:
        winner = -1
    else:
        winner = 1
    
    board = initialize_game()
    training_stability = np.zeros((8,8))
    player = -1
    
    # One training batch is all the data from one match
    if winner == -1:
        r = ([0, 1])
    else:
        r = ([1, 0])
    
    for move in unpacked_movelist:
        if move == 0:
            break

        if np.random.rand()>0.0:
            feature_path = 'cache/training/features/features_' + str(i) + "_" + str(move) + ".npy"
            label_path = 'cache/training/labels/labels_' + str(i) + "_" + str(move) + ".npy"
            if os.path.isfile(feature_path) and os.path.isfile(label_path):
                try:
                    features = np.load(feature_path)
                    label = np.load(label_path)
                except:
                    print("data corruption in match " + str(i))
                    features = board_to_input(board, player)
                    label = prepare_data(move_to_label(move))
            else:
                features = board_to_input(board, player)
                label = prepare_data(move_to_label(move))
            features = np.array(features)
            input_batch.append(features)
            label_batch.append(r)
            
            features_upright = flip_features(features, 'upright')
            input_batch.append(features_upright)
            label_batch.append(r)

            # Then the other diagonal
            features_upleft = flip_features(features, 'upleft')
            input_batch.append(features_upleft)
            label_batch.append(r)

            # Then both diagonals
            features_both = flip_features(features, 'both')
            input_batch.append(features_both)
            label_batch.append(r)

        board = make_move(board, move, player)
        if player is 1:
            player = -1
        else:
            player = 1
        legal_moves = find_legal_moves(board, player)
        if len(legal_moves) == 0:
            if player is 1:
                player = -1
            else:
                player = 1
    #print(features)
    #input_batch.append(features)
    #label_batch.append(r)
    if len(input_batch) > 128:
        _, loss_ = sess.run([train_step, loss],
                               feed_dict={img_data:input_batch,
                                                ground_truths: label_batch,
                                                keep_prob:probs,
                                                learn_rate:eta})
        input_batch = []
        label_batch = []
    #print("score out is")
    #print(loss_)

    if (i % 10 is 0) and (i > 0) or (i+1) == (iterations+prev_stop) or (i % 1000 is 0):  
        value_loss = value_error(validation_path, sess, diagn)
        print('%s, Step %d, Loss = %.6f' % (datetime.now().strftime("%d. %b %H:%M:%S"), i,
                                                                          value_loss))
        save_path = saver.save(sess, current_model + '.b')
        if value_loss < 0.24:
            break

print("done")

117298
start training
15. Oct 13:29:06, Step 21000, Loss = 0.543892


KeyboardInterrupt: 

In [None]:
# number of doom: 0.693147

In [None]:
board1 = initialize_game()
zeros = np.count_nonzero(board1)
print(zeros)
f = board_to_input(board1, -1)
black = np.ones((8,8,1))
print(featurez.shape)
addone = np.dstack((f, black))
print(addone.shape)


(?, 2)
(?, 1, 1, 2)
(?, 2)
117298
start training

First 1e-3 down to 0.4, then:

start = 1e-4

start training
14. Oct 16:55:09, Step 0, Loss = 0.349850
14. Oct 16:55:15, Step 100, Loss = 0.338470
14. Oct 16:55:20, Step 200, Loss = 0.296196
14. Oct 16:55:25, Step 300, Loss = 0.283280
14. Oct 16:55:31, Step 400, Loss = 0.310574
14. Oct 16:55:36, Step 500, Loss = 0.308730
14. Oct 16:55:41, Step 600, Loss = 0.269805
14. Oct 16:55:47, Step 700, Loss = 0.255041
14. Oct 16:55:52, Step 800, Loss = 0.255202
14. Oct 16:55:57, Step 900, Loss = 0.233139
14. Oct 16:56:03, Step 1000, Loss = 0.230880
14. Oct 16:56:08, Step 1100, Loss = 0.192403

"models/sl-v/nopool12.ckpt"

"fewer"params and no leaky relu

4e-4

117298
start training
14. Oct 18:42:53, Step 0, Loss = 0.669618
14. Oct 18:42:59, Step 100, Loss = 0.640104
14. Oct 18:43:05, Step 200, Loss = 0.627391
14. Oct 18:43:11, Step 300, Loss = 0.682312
14. Oct 18:43:19, Step 400, Loss = 0.622758
14. Oct 18:43:24, Step 500, Loss = 0.418528
14. Oct 18:43:29, Step 600, Loss = 0.381199
14. Oct 18:43:36, Step 700, Loss = 0.289413
14. Oct 18:43:43, Step 799, Loss = 0.283211
done

1e-4

117298
start training
14. Oct 18:44:43, Step 100, Loss = 0.294863
14. Oct 18:44:49, Step 200, Loss = 0.256158
14. Oct 18:44:57, Step 300, Loss = 0.362265
14. Oct 18:45:05, Step 400, Loss = 0.305164
14. Oct 18:45:12, Step 500, Loss = 0.302925
14. Oct 18:45:19, Step 600, Loss = 0.208198
14. Oct 18:45:27, Step 700, Loss = 0.212496
14. Oct 18:45:34, Step 800, Loss = 0.217972
14. Oct 18:45:35, Step 807, Loss = 0.203382
done