In [1]:
from __future__ import division

In [2]:
import os
import random
import math
import datetime
import time

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
import numpy as np
import pandas as pd

In [5]:
import tensorflow as tf
from keras import backend as K
from keras import regularizers

Using TensorFlow backend.


In [33]:
from keras.layers import Dense
from keras.objectives import categorical_crossentropy
from keras.metrics import categorical_accuracy
from keras.regularizers import l2
from keras.layers import Conv2D, Flatten
from keras.layers.core import Reshape
from keras.layers.merge import Concatenate
from keras import optimizers
from keras.layers import InputLayer, Dense, Activation


In [7]:
%load_ext autoreload
%autoreload 1

%aimport training

In [8]:
print "Done Importing"

Done Importing


In [None]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

# Architectures

## Arch 1: Normal dense fully-connected neural network

In [None]:
# Create a graph to hold the model.
graph_dense = tf.Graph()

# Create model in the graph.
with graph_dense.as_default():

    # Keras layers can be called on TensorFlow tensors:
    board = tf.placeholder(tf.float32, shape=(None, 42), name='board')
    outcome = tf.placeholder(tf.float32, shape=(None, 3), name='outcome')

    # Fully connected layers
    
    x = Dense(2048,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(board)
    
    x = Dense(1024,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(x)
   
    x = Dense(512,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(x)
    
    x = Dense(48,
              activation='relu',
              kernel_regularizer=regularizers.l2(0.1),
              bias_regularizer=regularizers.l2(0.1),
              kernel_initializer='random_uniform',
              bias_initializer='zeros')(x)

    # output layer with 10 units and a softmax activation
    preds = Dense(3, activation='softmax', name='preds')(x) 
        
    with tf.name_scope('evaluation') as scope:
        
        loss = tf.identity(tf.reduce_mean(categorical_crossentropy(outcome, preds)), name='loss')
        accuracy = tf.identity(tf.reduce_mean(categorical_accuracy(outcome, preds)), name='accuracy')
                                    
        holdout_summaries = tf.identity(tf.summary.merge([
            tf.summary.scalar('holdout_loss', loss),
            tf.summary.scalar('holdout_accuracy', accuracy)]),
            name='holdout_summaries')
        
        batch_summaries = tf.identity(tf.summary.merge([
            tf.summary.scalar('batch_loss', loss),
            tf.summary.scalar('batch_accuracy', accuracy)]),
            name='batch_summaries')       
    
    with tf.name_scope('training') as scope:
        train_step = tf.train.AdamOptimizer(learning_rate=0.01, epsilon=0.1).minimize(loss, name='train_step')
    
    # Initialize all variables
    init_op = tf.global_variables_initializer()
    
    #all_summaries = tf.summary.merge_all()

## Arch 2: Pure CovNet

In [None]:
# Create a graph to hold the model.
graph_cov_pure = tf.Graph()

# Create model in the graph.
with graph_cov_pure.as_default():
    
    # Keras layers can be called on TensorFlow tensors:
    board = tf.placeholder(tf.float32, shape=(None, 42), name='board') 
    outcome = tf.placeholder(tf.float32, shape=(None, 3), name='outcome')    
    
    # The input data is [col0=[row_0, row_1, ...], col1=[row_0, row_1], ...]
    rs = Reshape((7, 6, 1), input_shape=(42,))(board)
    
    conv_args = dict(
        use_bias=True,
        activation='relu',
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l2(0.01),
        input_shape=(7, 6, 1),
        padding='valid'
    )
    
    # We use a few parallel covents, that we combine in the end        
    c1 = (Conv2D(8,  kernel_size=(1, 2), **conv_args)(rs))
    c2 = (Conv2D(16, kernel_size=(2, 1), **conv_args)(c1))
    c3 = (Conv2D(32, kernel_size=(3, 3), **conv_args)(c2))
    #c4 = (Conv2D(64, kernel_size=(6, 6), **conv_args)(c3))
    
    dense_args = dict(
        use_bias=True,
        activation='relu',
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l2(0.01),
        bias_regularizer=regularizers.l2(0.01),    
    )
    
    d = Dense(64,  **dense_args)(Flatten()(c3))         
    
    # output layer with 10 units and a softmax activation
    preds = Dense(3, activation='softmax', name='preds')(d) 
    
    with tf.name_scope('evaluation') as scope:
                                
        loss = tf.identity(tf.reduce_mean(categorical_crossentropy(outcome, preds)), name='loss')
        accuracy = tf.identity(tf.reduce_mean(categorical_accuracy(outcome, preds)), name='accuracy')        
        
        holdout_summaries = tf.identity(tf.summary.merge([
            tf.summary.scalar('holdout_loss', loss),
            tf.summary.scalar('holdout_accuracy', accuracy)]),
            name='holdout_summaries')
        
        batch_summaries = tf.identity(tf.summary.merge([
            tf.summary.scalar('batch_loss', loss),
            tf.summary.scalar('batch_accuracy', accuracy)]),
            name='batch_summaries')                 
        
        #tf.summary.scalar('holdout_loss', loss)
        
        #acc_value = tf.identity(accuracy(outcome, preds), name='accuracy')
        #tf.summary.scalar('holdout_accuracy', tf.reduce_mean(acc_value))
    
    with tf.name_scope('training') as scope:
        train_step = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss, name='train_step')    
    
    # Initialize all variables
    init_op = tf.global_variables_initializer()
    
    all_summaries = tf.summary.merge_all()

## Arch 3: Complicated CovNet

In [53]:
# Create a graph to hold the model.
graph_cov_comp = tf.Graph()

# Create model in the graph.
with graph_cov_comp.as_default():
    
    # Keras layers can be called on TensorFlow tensors:
    board = tf.placeholder(tf.float32, shape=(None, 42), name='board') 
    outcome = tf.placeholder(tf.float32, shape=(None, 3), name='outcome')    
    
    # The input data is [col0=[row_0, row_1, ...], col1=[row_0, row_1], ...]
    rs = Reshape((7, 6, 1), input_shape=(42,))(board)
    
    conv_args = dict(
        use_bias=True,
        activation="relu",
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l1_l2(0.1),
        bias_regularizer=regularizers.l1_l2(0.1),
        padding='valid'
    )
    
    dense_args = dict(
        use_bias=True,
        activation="relu",
        kernel_initializer='random_uniform',
        bias_initializer='zeros',
        kernel_regularizer=regularizers.l1_l2(1.0),
        bias_regularizer=regularizers.l1_l2(1.0),    
    )
    
    # We use a few parallel covents, that we combine in the end        
    caa = Flatten()(Conv2D(12, kernel_size=(5, 5), **conv_args)(rs))
    cab = Flatten()(Conv2D(12, kernel_size=(4, 4), **conv_args)(rs))
    cac = Flatten()(Conv2D(12, kernel_size=(3, 3), **conv_args)(rs))

    cba = Flatten()(Conv2D(8, kernel_size=(6, 1), **conv_args)(rs))
    cbb = Flatten()(Conv2D(8, kernel_size=(1, 6), **conv_args)(rs))
    cbc = Flatten()(Conv2D(8, kernel_size=(4, 2), **conv_args)(rs))
    cbd = Flatten()(Conv2D(8, kernel_size=(2, 4), **conv_args)(rs))
    
    cca = Flatten()(Conv2D(4, kernel_size=(1, 4), **conv_args)(rs))
    ccb = Flatten()(Conv2D(4, kernel_size=(4, 1), **conv_args)(rs))
    ccc = Flatten()(Conv2D(4, kernel_size=(2, 2), **conv_args)(rs))

    d1 = Dense(512, **dense_args)(board)
    
    merged = Concatenate()([d1,
                            caa, cab, cac,
                            cba, cbb, cbc, cbd,
                            cca, ccb, ccc])    
        
    x = Dense(512, **dense_args)(merged)         
    x = Dense(256, **dense_args)(x)         
    x = Dense(128, **dense_args)(x)    
    x = Dense(48,  **dense_args)(x)
    
    # output layer with 10 units and a softmax activation
    preds = Dense(3, activation='softmax', name='preds')(x) 
    
    with tf.name_scope('evaluation') as scope:
        
        loss = tf.identity(tf.reduce_mean(categorical_crossentropy(outcome, preds)), name='loss')
        accuracy = tf.identity(tf.reduce_mean(categorical_accuracy(outcome, preds)), name='accuracy')        
        
        holdout_summaries = tf.identity(tf.summary.merge([
            tf.summary.scalar('holdout_loss', loss),
            tf.summary.scalar('holdout_accuracy', accuracy)]),
            name='holdout_summaries')
        
        batch_summaries = tf.identity(tf.summary.merge([
            tf.summary.scalar('batch_loss', loss),
            tf.summary.scalar('batch_accuracy', accuracy)]),
            name='batch_summaries')  
    
    with tf.name_scope('training') as scope:
        train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss, name='train_step')    
    
    # Initialize all variables
    init_op = tf.global_variables_initializer()
    
    all_summaries = tf.summary.merge_all()

-----------------------

# Gen 1

In [12]:
#simulations/random-2017-10-28-17:13:04

# 'random-2017-10-21-13:41:47'
# 'random-2017-10-28-17:13:04'

ds_gen1 = (training.DataLoader(frac_train=0.95, frac_test=0.05)
           .add_dataset('random-2017-10-28-17:13:04')
           .load())

### Fully Connected

In [None]:
training.train(graph_dense, './models/dense_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen1,  batch_size=200, num_batches=15000)

### Covnet

In [None]:
training.train(graph_cov_pure, './models/cov_pure_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen1,  batch_size=200, num_batches=20000)


### Advanced Covnet

In [None]:
# Best: gen1-cov2d_beta_2017_10_29_150829
# Dataset: 'random-2017-10-28-17:13:04'
# batch_size=500, learning_rate=0.001, regularization=(l1_l2, 1.0) (regularization of 0.1 seems to have the same effect...)
# Include all convolutions (adding cc, cf, cg, ch)
# Include 4 layers of dense: 512, 256, 128, 12
# Accuracy: ~ .64

In [54]:
# Exp:
# Removed a fully connected layer
# Added 4x2 and 2x4 COV layers
# Large Batches (1000)
# Moved learning from 0.001 to 0.01
# --
# Move back to 0.001, drop batch size to 250
# --
# Move CONV regularization from 1.0 to 0.1 (but keep dense the same)
# --
# Move activation=elu
# --
# Increase batch from 250 to 500, learning rate from 0.001 to 0.01
# --
# Add back other dense layer.  500 Batch, 0.001 learning rate.  Num epochs -> 30 to try to max out
# --
# Move 2nd to last dense layer from 12 to 24, increase batch to 800
# -- Learning rate 0.001 to 0.01, last layer 24-48, decrease batch 800 to 600
# 

training.train(graph_cov_comp, './models/gen1-cov2d_alpha_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen1, batch_size=600, epoch_size=240000, num_epochs=15)

Running ./models/gen1-cov2d_alpha_2017_11_04_204810
Epoch  0 Num Batches    0 Num Rows:          0 Hold-Out Accuracy: 0.5009 Loss: 1.0881 Time taken: 0.1s
Epoch  1 Num Batches  400 Num Rows:     240000 Hold-Out Accuracy: 0.5805 Loss: 0.7541 Time taken: 46.2s
Epoch  2 Num Batches  800 Num Rows:     480000 Hold-Out Accuracy: 0.5951 Loss: 0.7423 Time taken: 40.9s
Epoch  3 Num Batches 1200 Num Rows:     720000 Hold-Out Accuracy: 0.5966 Loss: 0.7376 Time taken: 42.1s
Epoch  4 Num Batches 1600 Num Rows:     960000 Hold-Out Accuracy: 0.6089 Loss: 0.7269 Time taken: 45.5s
Epoch  5 Num Batches 2000 Num Rows:    1200000 Hold-Out Accuracy: 0.6126 Loss: 0.7202 Time taken: 47.7s
Epoch  6 Num Batches 2400 Num Rows:    1440000 Hold-Out Accuracy: 0.6130 Loss: 0.7201 Time taken: 44.9s
Epoch  7 Num Batches 2800 Num Rows:    1680000 Hold-Out Accuracy: 0.6192 Loss: 0.7133 Time taken: 44.5s
Epoch  8 Num Batches 3200 Num Rows:    1920000 Hold-Out Accuracy: 0.6202 Loss: 0.7114 Time taken: 41.5s
Epoch  9 Num 

-----------------

# Gen 2

In [None]:
ds_gen2 = (training.DataLoader(frac_train=0.95, frac_test=0.05)
           .add_dataset('random-2017-10-28-17:13:04', 100000)
           .add_dataset('gen1-cov2d_beta_2017_10_29_150829-2017-10-29-16:57:41')
           .load())

# Use all the advanced data
# 10,000 rows of gen-1 vs gen-1 data
#key = 'gen-1-cov2d_beta_2017_10_22_142925'
#features, targets, features_train, target_train, features_test, target_test = load_data('training_data/gen-1-cov2d_beta_2017_10_22_142925')

In [None]:
training.train(graph_cov_comp, './models/gen2-cov2d_beta_{}'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")),
               ds_gen2, batch_size=250, num_batches=20000)