In [None]:
from __future__ import division

In [None]:
import os
import random
import math
import datetime
import time

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd

In [None]:
import tensorflow as tf
from keras import backend as K
from keras import regularizers

In [None]:
from keras.layers import Dense
from keras.objectives import categorical_crossentropy
from keras.metrics import categorical_accuracy as accuracy
from keras.regularizers import l2
from keras.layers import Conv2D, Flatten
from keras.layers.core import Reshape
from keras.layers.merge import Concatenate

In [None]:
%load_ext autoreload

%autoreload 1

%aimport training

In [8]:
print "Foo"

Foo


# Architectures

## Arch 1: Normal dense fully-connected neural network

In [None]:
# Create a graph to hold the model.
graph_dense = tf.Graph()

# Create model in the graph.
with graph_dense.as_default():

    # Keras layers can be called on TensorFlow tensors:
    board = tf.placeholder(tf.float32, shape=(None, 42), name='board')
    outcome = tf.placeholder(tf.float32, shape=(None, 3), name='outcome')

    # Fully connected layers
    
    x = Dense(2048,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(board)
    
    x = Dense(1024,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(x)
   
    x = Dense(512,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(x)
    
    x = Dense(48,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(x)

    # output layer with 10 units and a softmax activation
    preds = Dense(3, activation='softmax', name='preds')(x) 
        
    with tf.name_scope('evaluation') as scope:
        loss = tf.reduce_mean(categorical_crossentropy(outcome, preds), name='loss')
        tf.summary.scalar('holdout_loss', loss)    
    
        acc_value = tf.identity(accuracy(outcome, preds), name='accuracy')
        tf.summary.scalar('holdout_accuracy', tf.reduce_mean(acc_value))
    
    with tf.name_scope('training') as scope:
        train_step = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss, name='train_step')
    
    # Initialize all variables
    init_op = tf.global_variables_initializer()
    
    all_summaries = tf.summary.merge_all()

## Arch 2: Pure CovNet

In [None]:
# Create a graph to hold the model.
graph_cov_pure = tf.Graph()

# Create model in the graph.
with graph_cov_pure.as_default():
    
    # Keras layers can be called on TensorFlow tensors:
    board = tf.placeholder(tf.float32, shape=(None, 42), name='board') 
    outcome = tf.placeholder(tf.float32, shape=(None, 3), name='outcome')    
    
    # The input data is [col0=[row_0, row_1, ...], col1=[row_0, row_1], ...]
    rs = Reshape((7, 6, 1), input_shape=(42,))(board)
    
    conv_args = dict(
        use_bias=True,
        activation='relu',
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l2(0.01),
        input_shape=(7, 6, 1),
        padding='valid'
    )
    
    # We use a few parallel covents, that we combine in the end        
    c1 = (Conv2D(8,  kernel_size=(1, 2), **conv_args)(rs))
    c2 = (Conv2D(16, kernel_size=(2, 1), **conv_args)(c1))
    c3 = (Conv2D(32, kernel_size=(3, 3), **conv_args)(c2))
    #c4 = (Conv2D(64, kernel_size=(6, 6), **conv_args)(c3))
    
    dense_args = dict(
        use_bias=True,
        activation='relu',
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l2(0.01),
        bias_regularizer=regularizers.l2(0.01),    
    )
    
    d = Dense(64,  **dense_args)(Flatten()(c3))         
    
    # output layer with 10 units and a softmax activation
    preds = Dense(3, activation='softmax', name='preds')(d) 
    
    with tf.name_scope('evaluation') as scope:
        loss = tf.reduce_mean(categorical_crossentropy(outcome, preds), name='loss')
        tf.summary.scalar('holdout_loss', loss)
        
        acc_value = tf.identity(accuracy(outcome, preds), name='accuracy')
        tf.summary.scalar('holdout_accuracy', tf.reduce_mean(acc_value))
    
    with tf.name_scope('training') as scope:
        train_step = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss, name='train_step')    
    
    # Initialize all variables
    init_op = tf.global_variables_initializer()
    
    all_summaries = tf.summary.merge_all()

## Arch 3: Complicated CovNet

In [13]:
# Create a graph to hold the model.
graph_cov_comp = tf.Graph()

# Create model in the graph.
with graph_cov_comp.as_default():
    
    # Keras layers can be called on TensorFlow tensors:
    board = tf.placeholder(tf.float32, shape=(None, 42), name='board') 
    outcome = tf.placeholder(tf.float32, shape=(None, 3), name='outcome')    
    
    # The input data is [col0=[row_0, row_1, ...], col1=[row_0, row_1], ...]
    rs = Reshape((7, 6, 1), input_shape=(42,))(board)
    
    conv_args = dict(
        use_bias=True,
        activation='relu',
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l1_l2(1.0),
        bias_regularizer=regularizers.l1_l2(1.0),
#        input_shape=(7, 6, 1),
        padding='valid'
    )
    
    # We use a few parallel covents, that we combine in the end        
    ca = Flatten()(Conv2D(12, kernel_size=(5, 5), **conv_args)(rs))
    cb = Flatten()(Conv2D(12, kernel_size=(4, 4), **conv_args)(rs))
    cc = Flatten()(Conv2D(12, kernel_size=(3, 3), **conv_args)(rs))

    cg = Flatten()(Conv2D(8, kernel_size=(6, 1), **conv_args)(rs))
    ch = Flatten()(Conv2D(8, kernel_size=(1, 6), **conv_args)(rs))
    
    cd = Flatten()(Conv2D(4, kernel_size=(1, 4), **conv_args)(rs))
    ce = Flatten()(Conv2D(4, kernel_size=(4, 1), **conv_args)(rs))
    cf = Flatten()(Conv2D(4, kernel_size=(2, 2), **conv_args)(rs))

    dense_args = dict(
        use_bias=True,
        activation='relu',
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l1_l2(1.0),
        bias_regularizer=regularizers.l1_l2(1.0),    
    )
    
    d1 = Dense(512, **dense_args)(board)
    
    merged = Concatenate()([d1, ca, cb, cc, cd, ce, cf, cg, ch])    
        
    x = Dense(512, **dense_args)(merged)         
    x = Dense(256, **dense_args)(x)         
    x = Dense(128, **dense_args)(x)    
    x = Dense(12,  **dense_args)(x)
    
    # output layer with 10 units and a softmax activation
    preds = Dense(3, activation='softmax', name='preds')(x) 
    
    with tf.name_scope('evaluation') as scope:
        loss = tf.reduce_mean(categorical_crossentropy(outcome, preds), name='loss')
        tf.summary.scalar('holdout_loss', loss)
        
        acc = tf.identity(accuracy(outcome, preds), name='accuracy')
        tf.summary.scalar('holdout_accuracy', tf.reduce_mean(acc))
    
    with tf.name_scope('training') as scope:
        train_step = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss, name='train_step')    
    
    # Initialize all variables
    init_op = tf.global_variables_initializer()
    
    all_summaries = tf.summary.merge_all()

-----------------------

# Gen 1

In [19]:
#simulations/random-2017-10-28-17:13:04

# 'random-2017-10-21-13:41:47'
# 'random-2017-10-28-17:13:04'

ds_gen1 = (training.DataLoader(frac_train=0.95, frac_test=0.05)
           .add_dataset('random-2017-10-28-17:13:04')
           .load())

### Fully Connected

In [None]:
training.train(graph_dense, './models/dense_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen1,  batch_size=500, num_batches=10000)

### Covnet

In [None]:
training.train(graph_cov_pure, './models/cov_pure_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen1,  batch_size=200, num_batches=20000)


### Advanced Covnet

In [None]:
# Best: gen1-cov2d_beta_2017_10_29_150829
# Dataset: 'random-2017-10-28-17:13:04'
# batch_size=500, learning_rate=0.001, regularization=(l1_l2, 1.0) (regularization of 0.1 seems to have the same effect...)
# Include all convolutions (adding cc, cf, cg, ch)
# Include 4 layers of dense: 512, 256, 128, 12

training.train(graph_cov_comp, './models/gen1-cov2d_alpha_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen1, batch_size=500, num_batches=15000)

-----------------

# Gen 2

In [17]:
ds_gen2 = (training.DataLoader(frac_train=0.95, frac_test=0.05)
           .add_dataset('random-2017-10-28-17:13:04', 100000)
           .add_dataset('gen1-cov2d_beta_2017_10_29_150829-2017-10-29-16:57:41')
           .load())

# Use all the advanced data
# 10,000 rows of gen-1 vs gen-1 data
#key = 'gen-1-cov2d_beta_2017_10_22_142925'
#features, targets, features_train, target_train, features_test, target_test = load_data('training_data/gen-1-cov2d_beta_2017_10_22_142925')

In [20]:
training.train(graph_cov_comp, './models/gen2-cov2d_beta_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen2, batch_size=250, num_batches=20000)

Running ./models/gen2-cov2d_beta_2017_10_30_201357
Batch        0 Hold-Out Accuracy: 0.4427 (84851/191675) Loss: 1.0982 Time taken: 0.0s
Batch     1000 Hold-Out Accuracy: 0.5884 (112789/191675) Loss: 0.7281 Time taken: 59.2s
Batch     2000 Hold-Out Accuracy: 0.5950 (114050/191675) Loss: 0.7190 Time taken: 59.3s
Batch     3000 Hold-Out Accuracy: 0.5969 (114412/191675) Loss: 0.7154 Time taken: 65.7s
Batch     4000 Hold-Out Accuracy: 0.6068 (116303/191675) Loss: 0.7089 Time taken: 65.9s
Batch     5000 Hold-Out Accuracy: 0.6146 (117804/191675) Loss: 0.7024 Time taken: 61.3s
Batch     6000 Hold-Out Accuracy: 0.6191 (118671/191675) Loss: 0.6996 Time taken: 68.4s
Batch     7000 Hold-Out Accuracy: 0.6234 (119481/191675) Loss: 0.6942 Time taken: 66.7s
Batch     8000 Hold-Out Accuracy: 0.6273 (120229/191675) Loss: 0.6907 Time taken: 60.7s
Batch     9000 Hold-Out Accuracy: 0.6291 (120575/191675) Loss: 0.6882 Time taken: 61.6s
Batch    10000 Hold-Out Accuracy: 0.6319 (121116/191675) Loss: 0.6855 T