In [36]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import time
from sklearn.preprocessing import scale

In [16]:
dd_train = pd.read_csv('Training Data.csv')
dd_test = pd.read_csv('Testing Data.csv')

In [21]:
in_train = dd_train.values[:,1:129].astype(np.float64)
in_train = scale(in_train, axis = 1)
out_train = dd_train.label.astype('category')
out_train = pd.get_dummies( out_train ).values.astype(np.float64)
in_valid = dd_test.values[:,1:129].astype(np.float64)
in_valid = scale(in_valid, axis = 1)
out_valid = dd_test.label.astype('category')
out_valid = pd.get_dummies( out_valid ).values.astype(np.float64)

In [22]:
N_train = in_train.shape[0]
N_feat = in_train.shape[1]
N_cat = out_train.shape[1]
N_valid = in_valid.shape[0]

In [43]:
# hyper-parameters
EPOCHS  = 100        # number of training epochs
N_nodes  = 128        # nodes in hidden layer
ALPHA   = 0          # regularization parameter
BS      = 2000         # batch size
STD     = 0.1        # weight initialization standard deviation

In [44]:
x_train = tf.placeholder( tf.float32, [BS, N_feat] )
y_train = tf.placeholder( tf.float32, [BS, N_cat] )

x_train_f = tf.constant( in_train , tf.float32, [N_train,N_feat])
y_train_f = tf.constant( out_train , tf.float32, [N_train,N_cat])

x_valid = tf.constant( in_valid , tf.float32, [N_valid , N_feat] )
y_valid = tf.constant( out_valid, tf.float32, [N_valid, N_cat])

In [45]:
w1 = tf.Variable( tf.truncated_normal( [N_feat,N_nodes], stddev = STD, seed=0))
b1 = tf.Variable( tf.truncated_normal( [1,N_nodes], stddev = STD, seed=0))

y1_train = tf.nn.relu( tf.matmul(x_train,w1) + b1 )
y1_train_f = tf.nn.relu( tf.matmul(x_train_f,w1) + b1 )
y1_valid = tf.nn.relu( tf.matmul( x_valid, w1) + b1 )

w2 = tf.Variable( tf.truncated_normal( [N_nodes,N_cat], stddev = STD, seed=0))
b2 = tf.Variable( tf.truncated_normal( [1,N_cat], stddev = STD, seed=0))

logits_train = tf.matmul(y1_train,w2) + b2
logits_train_f = tf.matmul(y1_train_f,w2) + b2
logits_valid = tf.matmul(y1_valid,w2) + b2

In [46]:
CE = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( 
        logits_train, y_train) )
L2 = ALPHA*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(b1) + \
               tf.nn.l2_loss(w2) + tf.nn.l2_loss(b2))
CE_train_f = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(
        logits_train_f, y_train_f) )
CE_valid = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(
        logits_valid, y_valid) )

y_train_p = tf.nn.softmax( logits_train )
y_train_fp = tf.nn.softmax( logits_train_f )
y_valid_p = tf.nn.softmax( logits_valid )

optimizer = tf.train.AdamOptimizer().minimize(CE+L2)
init = tf.initialize_all_variables()

In [47]:
sess = tf.Session()
sess.run(init)

In [48]:
t0 = time.time()
np.random.seed(0)

print( 'Epochs =%3d, Training Set Size =%4d, Nodes = %5d, Alpha = %3.4f, Batch Size = %4d, STD = %5.3f' %
      (EPOCHS, N_train, N_nodes, ALPHA, BS, STD))
print()
print('%15s%24s%24s' % (' ','cross-entropy','error-rate'))
print('%15s%12s%12s%12s%12s%12s%12s' % 
      ('epoch','training','validation','training','validation','L2','time (min)'))
for i in range(EPOCHS+1): # For every Epoch
    ran = np.random.permutation(N_train) # Order the data
    reran = np.reshape( ran, [ int(N_train/BS) ,BS] ) #Reshape ordering as a matrix
    for j in range( int(N_train/BS) ): # For every batch
        mini = reran[j,:].astype(int) # Find the batch indices
        x_batch = in_train[mini,:] # Call the batch features
        y_batch = out_train[mini] # Call the batch labels
        # Do a step with a batch
        sess.run([optimizer],feed_dict = {x_train:x_batch, y_train:y_batch})
    if (i % int(EPOCHS/10)) == 0:
        ( ce_train, ce_valid, out_train_pf, out_valid_p, l2 ) = \
        sess.run( [CE_train_f, CE_valid, y_train_fp, y_valid_p,L2])
        err_train = 1-accuracy_score( out_train.argmax(axis=1), 
                                   out_train_pf.argmax(axis=1))
        err_valid  = 1-accuracy_score( out_valid.argmax(axis=1),
                                    out_valid_p.argmax(axis=1))
        t = (time.time() - t0)/60
        print('%7d %7d%12.5f%12.5f%12.3f%12.3f%12.3f%12.1f' % 
              (EPOCHS,i,ce_train,ce_valid,err_train,err_valid,l2,t))

Epochs =100, Training Set Size =2000, Nodes =   128, Alpha = 0.0000, Batch Size = 2000, STD = 0.100

                          cross-entropy              error-rate
          epoch    training  validation    training  validation          L2  time (min)
    100       0     2.43549     2.43479       0.918       0.911       0.000         0.0
    100      10     0.80790     0.81041       0.004       0.005       0.000         0.0
    100      20     0.18845     0.18972       0.000       0.000       0.000         0.0
    100      30     0.05470     0.05497       0.000       0.000       0.000         0.0
    100      40     0.02493     0.02501       0.000       0.000       0.000         0.0
    100      50     0.01548     0.01549       0.000       0.000       0.000         0.0
    100      60     0.01143     0.01141       0.000       0.000       0.000         0.0
    100      70     0.00922     0.00919       0.000       0.000       0.000         0.0
    100      80     0.00778     0.00775    