In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold

In [2]:
df = pd.read_csv("../features/features.csv", sep=',')

In [3]:
#Global constants
seed = 42
validation_size = 10
feature_count = df.shape[1] - 2

#feed forward neural net
n_nodes_hl1 = 25
n_nodes_hl2 = 25
n_nodes_hl3 = 25

#cycles of feed forward + backprop
hm_epochs = 200

n_classes = 2
batch_size = 8

keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)

In [4]:
X = np.asarray(df.ix[:,2:feature_count+2])
Y_1 = np.asarray(df.ix[:,0])
Y_1 = [int(y == "purple") for y in Y_1]
#one hot Y
Y = np.zeros(shape=(len(Y_1), n_classes))
Y[np.arange(len(Y_1)), Y_1] = 1
    
validation_features = X[:validation_size]
validation_labels = Y[:validation_size]

train_features = X[validation_size:]
train_labels = Y[validation_size:]

num_examples = train_features.shape[0]

In [5]:
def neural_network_model(data):
    
    hidden_1_layer = {
        'weights': tf.Variable(tf.truncated_normal([feature_count, n_nodes_hl1], stddev=0.1, seed=seed)),
        'biases': tf.Variable(tf.constant(1.0, shape=[n_nodes_hl1]))
    }
    
    hidden_2_layer = {
        'weights': tf.Variable(tf.truncated_normal([n_nodes_hl1, n_nodes_hl2], stddev=0.1, seed=seed)),
        'biases': tf.Variable(tf.constant(1.0, shape=[n_nodes_hl2]))
    }
    
    hidden_3_layer = {
        'weights': tf.Variable(tf.truncated_normal([n_nodes_hl2, n_nodes_hl3], stddev=0.1, seed=seed)),
        'biases': tf.Variable(tf.constant(1.0, shape=[n_nodes_hl3]))
    }
    
    output_layer = {
        'weights': tf.Variable(tf.truncated_normal([n_nodes_hl3, n_classes], stddev=0.1, seed=seed)),
        'biases': tf.Variable(tf.constant(1.0, shape=[n_classes]))
    }
    
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
    l1 = tf.nn.relu(l1)
    
    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
    l2 = tf.nn.relu(l2)
    
    l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
    l3 = tf.nn.sigmoid(l3)
    
    output = tf.matmul(l3, output_layer['weights']) +  output_layer['biases']
    
    return output

In [6]:
epochs_completed = 0
index_in_epoch = 0

# serve data by batches
def next_batch(batch_size):
    
    #meh
    global index_in_epoch
    global epochs_completed
    global train_features
    global train_labels
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    # when all trainig data have been already used, it is reordered randomly    
    if index_in_epoch > num_examples:
        # finished epoch
        epochs_completed += 1
        # shuffle the data
        perm = np.arange(num_examples)
        np.random.shuffle(perm)
        train_features = train_features[perm]
        train_labels = train_labels[perm]
        # start next epoch
        start = 0
        index_in_epoch = batch_size
        assert batch_size <= num_examples
        
    end = index_in_epoch
    
    return train_features[start:end], train_labels[start:end]

In [7]:
def train_neural_network(x):
    
    prediction = neural_network_model(x)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y))
    
    #metrics
    correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
    false_prediction = tf.logical_not(correct_prediction)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

    true_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), True) )))
    false_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), True) )))
    true_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), False) )))
    false_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), False) )))

    #learning rate can be passed
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    display_step = 1
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(hm_epochs):
            epoch_loss = 0
            for _ in range(int(num_examples/batch_size)):
                epoch_x, epoch_y = next_batch(batch_size)
                
                _, c = sess.run([optimizer, cost], feed_dict = {x: epoch_x, y: epoch_y})
                epoch_loss += c
                
                
            # increase display_step after 10 iteration of same decimal
            if epoch%(display_step*10) == 0 and epoch:
                display_step *= 10
                
            if epoch%display_step == 0 or (epoch+1) == hm_epochs:
                
                train_accuracy = accuracy.eval(feed_dict={ x: train_features, y: train_labels})  
                validation_accuracy = accuracy.eval(feed_dict={ x: validation_features, y: validation_labels})     
                print('train accuracy => %.2f, validation accuracy => %.2f for epoch %d' % (train_accuracy, validation_accuracy, epoch))

                

In [8]:
x = tf.placeholder('float', [None, feature_count])
y = tf.placeholder('float', [None, n_classes])

train_neural_network(x)

train accuracy => 0.54, validation accuracy => 0.50 for epoch 0
train accuracy => 0.54, validation accuracy => 0.50 for epoch 1
train accuracy => 0.54, validation accuracy => 0.50 for epoch 2
train accuracy => 0.54, validation accuracy => 0.50 for epoch 3
train accuracy => 0.54, validation accuracy => 0.50 for epoch 4
train accuracy => 0.54, validation accuracy => 0.50 for epoch 5
train accuracy => 0.54, validation accuracy => 0.50 for epoch 6
train accuracy => 0.54, validation accuracy => 0.50 for epoch 7
train accuracy => 0.54, validation accuracy => 0.50 for epoch 8
train accuracy => 0.54, validation accuracy => 0.50 for epoch 9
train accuracy => 0.54, validation accuracy => 0.50 for epoch 10
train accuracy => 0.54, validation accuracy => 0.50 for epoch 20
train accuracy => 0.54, validation accuracy => 0.50 for epoch 30
train accuracy => 0.54, validation accuracy => 0.50 for epoch 40
train accuracy => 0.54, validation accuracy => 0.50 for epoch 50
train accuracy => 0.54, validation 

In [9]:
def train_neural_network_CV(x):
    
    prediction = neural_network_model(x)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y))
    
    #metrics
    correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
    false_prediction = tf.logical_not(correct_prediction)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

    true_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), True) )))
    false_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), True) )))
    true_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), False) )))
    false_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, tf.equal(tf.argmax(tf.nn.softmax(y),1), False) )))

    #learning rate can be passed
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    display_step = 1
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(hm_epochs):
            #epoch_loss = 0
            
            #train and acc summed over all folds
            test_accs = []
            train_accs = []
            
            kf = KFold(n_splits=10, random_state=seed, shuffle=True)
            for train_index, test_index in kf.split(train_features, train_labels):
                X_train, X_test = train_features[train_index], train_features[test_index]
                y_train, y_test = train_labels[train_index], train_labels[test_index]
                
                _, c = sess.run([optimizer, cost], feed_dict = {x: X_train, y: y_train})
                #epoch_loss += c
                
                train_accuracy = accuracy.eval(feed_dict={ x: X_train, y: y_train})  
                test_accuracy = accuracy.eval(feed_dict={ x: X_test, y: y_test})  
                
                train_accs.append(train_accuracy)
                test_accs.append(test_accuracy)
                
            train_acc = np.sum(train_accs) / len(train_accs)
            test_acc = np.sum(test_accs) / len(test_accs)
            
            # increase display_step after 10 iteration of same decimal
            if epoch%(display_step*10) == 0 and epoch:
                display_step *= 10
                
            if epoch%display_step == 0 or (epoch+1) == hm_epochs:
                
                validation_accuracy = accuracy.eval(feed_dict={ x: validation_features, y: validation_labels})     
                print('train accuracy => %.2f, test acc =>  %.2f, validation accuracy => %.2f for epoch %d' % (train_acc, test_acc, validation_accuracy, epoch))

                

In [10]:
x = tf.placeholder('float', [None, feature_count])
y = tf.placeholder('float', [None, n_classes])

train_neural_network_CV(x)

train accuracy => 0.49, test acc =>  0.47, validation accuracy => 0.50 for epoch 0
train accuracy => 0.54, test acc =>  0.54, validation accuracy => 0.50 for epoch 1
train accuracy => 0.54, test acc =>  0.54, validation accuracy => 0.50 for epoch 2
train accuracy => 0.54, test acc =>  0.54, validation accuracy => 0.50 for epoch 3
train accuracy => 0.54, test acc =>  0.54, validation accuracy => 0.50 for epoch 4
train accuracy => 0.54, test acc =>  0.54, validation accuracy => 0.50 for epoch 5
train accuracy => 0.54, test acc =>  0.55, validation accuracy => 0.50 for epoch 6
train accuracy => 0.55, test acc =>  0.55, validation accuracy => 0.50 for epoch 7
train accuracy => 0.56, test acc =>  0.57, validation accuracy => 0.40 for epoch 8
train accuracy => 0.59, test acc =>  0.52, validation accuracy => 0.50 for epoch 9
train accuracy => 0.59, test acc =>  0.56, validation accuracy => 0.60 for epoch 10
train accuracy => 0.67, test acc =>  0.67, validation accuracy => 0.50 for epoch 20
tr