In [1]:
from othello_rules import *
from othello_net import *
from tensorflow.python.framework import ops
from datetime import datetime
from example_states import *
from feature_extractor import *
from training_utils import *
import numpy as np


In [2]:
def avg_error(data_path, sess):
    errors = []
    validation_matches = get_all_matches(data_path)
    #XXX: Delete this line when testing is faster
    validation_matches = validation_matches[0:2]
    for i in range(len(validation_matches)):
        match = validation_matches[i]
        raw_match_movelist = match[8:]
        unpacked_movelist = unpack('b'*60, raw_match_movelist)
        board = initialize_game()
        player = -1
        for move in unpacked_movelist:
            if move == 0:
                break
            feature_path = 'cache/validation/features/features_' + str(i) + "_" + str(move) + ".npy"
            label_path = 'cache/validation/labels/labels_' + str(i) + "_" + str(move) + ".npy"
            if os.path.isfile(feature_path) and os.path.isfile(label_path):
                features = np.load(feature_path)
                label = np.load(label_path)
            else:
                features = board_to_input(board, player)
                label = prepare_data(move_to_label(move))
                
            input_batch = [features]
            label_batch = [label]
            error = sess.run(loss, feed_dict={img_data:input_batch, ground_truths: label_batch, keep_prob:1.0})
            errors.append(error)
            board = make_move(board, move, player)
            if player is 1:
                player = -1
            else:
                player = 1
            legal_moves = find_legal_moves(board, player)
            if len(legal_moves) == 0:
                if player is 1:
                    player = -1
                else:
                    player = 1
            #input_batch = prepare_data(board * )
            #label_batch = prepare_data(move_to_label(move))
    return np.sum(errors) / len(errors)

def prediction_accuracy(data_path, len_games=2):
    lengths = []
    successes = []
    validation_matches = get_all_matches(data_path)
    #XXX: Delete this line when testing is faster
    validation_matches = validation_matches[0:len_games]
    for i in range(len_games):
        test_match = validation_matches[i]
        board = initialize_game()
        #print(board)
        #print('\n')
        player = -1
        success = 0
        length = 0
        #test_match = matches[i]
        raw_match_movelist = test_match[8:]
        unpacked_movelist = unpack('b'*60, raw_match_movelist)
        for move in unpacked_movelist:
            length += 1
            if move == 0:
                winner = get_winner(board, 1, -1)
                break

            feature_path = 'cache/validation/features/features_' + str(i) + "_" + str(move) + ".npy"
            label_path = 'cache/validation/labels/labels_' + str(i) + "_" + str(move) + ".npy"
            if os.path.isfile(feature_path) and os.path.isfile(label_path):
                features = np.load(feature_path)
                label = np.load(label_path)
            else:
                features = board_to_input(board, player, training_stability)
                label = prepare_data(move_to_label(move))
                
            input_batch = [features]
            label_batch = [label]
            prediction = sess.run(pred_up, feed_dict={img_data:input_batch, ground_truths: label_batch, keep_prob:1.0})
            np.set_printoptions(precision=2)
            prediction = np.transpose(prediction[0])
            prediction = np.transpose(prediction[1])
            legal_moves = find_legal_moves(board, player)
            cleaned_predictions = zero_illegal_moves(prediction, legal_moves)
            i,j = np.unravel_index(cleaned_predictions.argmax(), cleaned_predictions.shape)
            move_argmax = str((i+1) * 10 + (j+1))
            if str(move) == str(move_argmax):
                success += 1
            original_board = np.array(board)
            board_upright = np.transpose(original_board)
            board_upleft = np.rot90(np.rot90(board_upright))
            board_both_flips = np.transpose(board_upleft)
            if np.array_equal(board, board_upright):
                if str(move_argmax) == flip_move_upright(move):
                    success += 1
            if np.array_equal(board, board_upleft):
                if str(move_argmax) == flip_move_upleft(move):
                    success += 1
            if np.array_equal(board, board_both_flips):
                if str(move_argmax) == flip_move_upright(flip_move_upleft(move)):
                    success += 1
            board = make_move(board, move, player)
            if player is 1:
                player = -1
            else:
                player = 1
            legal_moves = find_legal_moves(board, player)
            if len(legal_moves) == 0:
                if player is 1:
                    player = -1
                else:
                    player = 1
        legal_moves = find_legal_moves(board, player)
        winner = get_winner(board, 1, -1)
        successes.append(success)
        lengths.append(length)
    
    return np.mean(successes)

In [3]:
# Ræsum graphið fyrir tensorflow
ops.reset_default_graph()
graph, img_data, train_step, optimizer, ground_truths, loss, pred_up, keep_prob, learn_rate, score_out = create_othello_net()
saver = tf.train.Saver()
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
init_op = tf.initialize_all_variables()
sess.run(init_op)
#current_model = "models/tiny-selfplay-2.ckpt"
#if os.path.isfile(current_model):
#    saver.restore(sess, current_model)
validation_path = "validation/"
matches = get_all_matches('training/')
lenmatches = len(matches)
print(lenmatches)

# Byrjum þjálfunina
print("start training")
print("starting error:" + str(avg_error(validation_path, sess)))
#print('%s: Step %d: Prediction accuracy = %.2f' % (datetime.now(), 0,
#                                                      prediction_accuracy()/float(60)))
iterations = 1000
prev_stop = 0
probs = 1
for i in range(prev_stop, prev_stop+iterations):
    #TODO: Skrifa þetta fall
    #input_batch, label_batch = prepare_train_batch(train_ids, batch_size, do_flips, do_rots, data_path)
    current_match = matches[i]
    raw_match_movelist = current_match[8:]
    unpacked_movelist = unpack('b'*60, raw_match_movelist)
    
    board = initialize_game()
    training_stability = np.zeros((8,8))
    player = -1
    
    # One training batch is all the data from one match
    input_batch = []
    label_batch = []
    
    for move in unpacked_movelist:
        if move == 0:
            break
        # TODO: Athuga hvernig rotation sé löglegt og bæta þeim svo við
        feature_path = 'cache/training/features/features_' + str(i) + "_" + str(move) + ".npy"
        label_path = 'cache/training/labels/labels_' + str(i) + "_" + str(move) + ".npy"
        if os.path.isfile(feature_path) and os.path.isfile(label_path):
            try:
                features = np.load(feature_path)
                label = np.load(label_path)
            except:
                print("data corruption in match " + str(i))
                features = board_to_input(board, player)
                label = prepare_data(move_to_label(move))
        else:
            features = board_to_input(board, player)
            label = prepare_data(move_to_label(move))
        input_batch.append(features)
        label_batch.append(label)
        # Now we add 3 reflections of the game state
        # which is done by flipping the board over one diagonal        
        move_upright = flip_move_upright(move)
        features_upright = flip_features(features, 'upright')
        input_batch.append(features_upright)
        label_batch.append(prepare_data(move_to_label(move_upright)))
        
        # Then the other diagonal
        move_upleft = flip_move_upleft(move)
        features_upleft = flip_features(features, 'upleft')
        input_batch.append(features_upleft)
        label_batch.append(prepare_data(move_to_label(move_upleft)))
        
        # Then both diagonals
        move_both = flip_move_upright(flip_move_upleft(move))
        features_both = flip_features(features, 'both')
        input_batch.append(features_both)
        label_batch.append(prepare_data(move_to_label(move_both)))

        board = make_move(board, move, player)
        
        if player is 1:
            player = -1
        else:
            player = 1
        legal_moves = find_legal_moves(board, player)
        if len(legal_moves) == 0:
            if player is 1:
                player = -1
            else:
                player = 1
    
    start = 6e-4
    eta = start
    train_step.run(session=sess, feed_dict={img_data:input_batch,
                                            ground_truths: label_batch,
                                            keep_prob:probs,
                                            learn_rate:eta})

    if (i % 100 is 0) and (i > 0 and i < 2000) or (i+1) == (iterations+prev_stop) or (i % 1000 is 0):  
        print('%s, Step %d, Accuracy = %.3f, Loss = %.3f' % (datetime.now().strftime("%d. %b %H:%M:%S"), i,
                                                      prediction_accuracy(validation_path)/float(60),
                                                                          avg_error(validation_path, sess)))
        save_path = saver.save(sess, current_model)

print("done")

117298
start training
starting error:43.824617513
09. Oct 21:34:32, Step 0, Accuracy = 0.183, Loss = 41.729
09. Oct 21:34:56, Step 100, Accuracy = 0.342, Loss = 3.596
data corruption in match 177
09. Oct 21:35:20, Step 200, Accuracy = 0.508, Loss = 2.974
09. Oct 21:35:46, Step 300, Accuracy = 0.575, Loss = 2.759
09. Oct 21:36:10, Step 400, Accuracy = 0.642, Loss = 2.641
09. Oct 21:36:35, Step 500, Accuracy = 0.683, Loss = 2.582
09. Oct 21:37:00, Step 600, Accuracy = 0.725, Loss = 2.563
09. Oct 21:37:24, Step 700, Accuracy = 0.733, Loss = 2.510
09. Oct 21:37:51, Step 800, Accuracy = 0.742, Loss = 2.501
09. Oct 21:38:21, Step 900, Accuracy = 0.742, Loss = 2.477
09. Oct 21:38:49, Step 999, Accuracy = 0.758, Loss = 2.473
done


In [4]:
# Try inspecting the output of the network
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.inf)
validation_path = "validation/"
#print("number of correct guesses: " + str(prediction_accuracy(validation_path)))
print("Prediction accuracy: " + str(prediction_accuracy(validation_path)/float(60)))

Prediction accuracy: 0.758333333333


In [5]:
## A training with all flips, 2e-3 eta, 15 features, 64 inner filters
#start training
#starting error:75.0670833333
#2016-10-03 17:55:50.460447: Step 100: Validation error = 9.7
#2016-10-03 17:57:47.204687: Step 200: Validation error = 7.2
#2016-10-03 17:59:52.135957: Step 300: Validation error = 5.9
#2016-10-03 18:01:51.819990: Step 400: Validation error = 5.2
#2016-10-03 18:03:51.186046: Step 499: Validation error = 4.9
#done

#Ditto training, except now 28 features
#starting error:40.3807617188
#2016-10-03 20:17:49.791017: Step 100: Validation error = 10.2
#2016-10-03 20:19:56.679434: Step 200: Validation error = 7.2
#2016-10-03 20:21:56.938464: Step 300: Validation error = 5.6
#2016-10-03 20:24:09.127213: Step 400: Validation error = 5.1
#2016-10-03 20:26:15.017866: Step 499: Validation error = 4.7
#done

#Ditto training, except now 64 - 48 - 36 - 27 inner filters
#starting error:59.61125
#2016-10-03 20:59:00.119465: Step 100: Validation error = 10.7
#2016-10-03 21:00:42.969046: Step 200: Validation error = 7.8
#2016-10-03 21:02:28.783615: Step 300: Validation error = 6.1
#2016-10-03 21:04:17.698858: Step 400: Validation error = 5.5
#2016-10-03 21:06:44.768467: Step 499: Validation error = 5.0
#done
