In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.utils import shuffle

In [2]:
#X = pd.read_csv('./data/'+ 'x_train' +'.csv', header=None)
#X

In [2]:
class Data:
    def __init__(self, batch_size):
        self.x = Data.normalize(Data.import_data('x_train'))
        self.y = Data.convert(Data.import_data('y_train'))
        self.val_x = Data.normalize(Data.import_data('x_validation'))
        self.val_y = Data.convert(Data.import_data('y_validation'))
        self.pointer = 0
        self.batch_size = batch_size
    
    def normalize(data):
        mean, std = data.mean(), data.std()
        data = (data - mean) / std
        return data
    
    def scale(data):
        return preprocessing.scale(data)
    
    def shuffle_data(self):
        self.x, self.y = shuffle(self.x, self.y, random_state=0)
    
    def next_batch(self):
        batch = (self.x[self.pointer:self.pointer + self.batch_size], self.y[self.pointer:self.pointer + self.batch_size])
        self.pointer += self.batch_size
        return batch[0], batch[1]
    
    def convert(data):
        return pd.get_dummies(data, columns = [0]).values
        
    def import_data(name):
        return pd.read_csv('./data/'+ name +'.csv', header=None)
    
    def valid_data(self):
        return self.val_x
    
    def label_data(self):
        return self.val_y
    
    def move_pointer(self):
        self.pointer = 0
        
    def get_pointer(self):
        return self.pointer
    
    def get_train_size(self):
        return self.x.shape[0]
    
    def get_train_data(self):
        return self.y

In [3]:
# Constants
learning_rate = 0.001
reg_constant = 0.001
#Parameters
epochs = 100
batch_size = 32
logs_path = "./logs"

data = Data(batch_size)

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 32 # data input (img shape: 8*4)
num_classes = 2 # total classes (0-1)

with tf.name_scope('input'):
    # Graph inputs
    X = tf.placeholder("float", [None, num_input])
    Y = tf.placeholder("float", [None, num_classes])
    #dropout_prob = tf.placeholder(tf.float32)

FileNotFoundError: File b'./data/x_train.csv' does not exist

In [5]:
# Store layers weight & bias
with tf.name_scope("weights"):
    weights = {
        'h1': tf.Variable(tf.truncated_normal([num_input, n_hidden_1])),
        'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
        'out': tf.Variable(tf.truncated_normal([n_hidden_2, num_classes]))
    }
with tf.name_scope("biases"):
    biases = {
        'b1': tf.Variable(tf.truncated_normal([n_hidden_1])),
        'b2': tf.Variable(tf.truncated_normal([n_hidden_2])),
        'out': tf.Variable(tf.truncated_normal([num_classes]))
    }

In [6]:
# Create model
def neural_net(x):
    # Hidden fully connected layer with 256 neurons
    with tf.name_scope('hidden_layer_1'):
        layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
        layer_1 = tf.nn.relu(layer_1)
        #dropout = tf.nn.dropout(layer_1, dropout_prob)
    # Hidden fully connected layer with 256 neurons
    with tf.name_scope('hidden_layer_2'):
        layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
        layer_2 = tf.nn.relu(layer_2)
        #dropout_2 = tf.nn.dropout(layer_2, dropout_prob)
        
    # Output fully connected layer with a neuron for each class
    with tf.name_scope('ouput_layer_1'):
        out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    
    return out_layer

In [7]:
# Construct model
logits = neural_net(X)

# Define loss and optimizer
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)
loss_op = tf.reduce_mean(entropy + reg_constant*tf.nn.l2_loss(weights['h1']) +
                         reg_constant*tf.nn.l2_loss(weights['h2']) + reg_constant*tf.nn.l2_loss(weights['out']))

optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss_op)

correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [8]:
# Start training
with tf.Session() as sess:
    # Run the initializer
    sess.run(tf.global_variables_initializer())

    tf.summary.scalar('accuracy', accuracy)
    tf.summary.scalar('loss', loss_op)
    tf.summary.scalar('learning_rate', learning_rate)
    merged = tf.summary.merge_all()
    
    test_writer = tf.summary.FileWriter(logs_path + '/train/' + 'final_2', sess.graph)
    for step in range(1, epochs):
        while data.get_train_size() > batch_size + data.get_pointer():
            batch_x, batch_y = data.next_batch()
            train.run(feed_dict={X: batch_x, Y: batch_y})
            
        valid_x = data.valid_data()
        valid_y = data.label_data()
        summary, test_acc, loss_val = sess.run([merged, accuracy, loss_op], feed_dict={X: valid_x, Y: valid_y})
        test_writer.add_summary(summary, step)
        print("Epoch: ", step, "Testing Accuracy: ", test_acc, "Loss: ", loss_val)
        
        data.shuffle_data()
        data.move_pointer()
    print("Optimization Finished!")
    
    test_data = pd.read_csv('./data/x_test.csv', header=None)
    results = sess.run(logits, feed_dict={X: test_data})

Epoch:  1 Testing Accuracy:  0.83325 Loss:  35.4056
Epoch:  2 Testing Accuracy:  0.81595 Loss:  29.7051
Epoch:  3 Testing Accuracy:  0.8777 Loss:  25.1971
Epoch:  4 Testing Accuracy:  0.89175 Loss:  21.762
Epoch:  5 Testing Accuracy:  0.8614 Loss:  19.1768
Epoch:  6 Testing Accuracy:  0.8303 Loss:  17.3535
Epoch:  7 Testing Accuracy:  0.8082 Loss:  15.0265
Epoch:  8 Testing Accuracy:  0.88575 Loss:  12.1134
Epoch:  9 Testing Accuracy:  0.83735 Loss:  10.5693
Epoch:  10 Testing Accuracy:  0.9066 Loss:  9.16552
Epoch:  11 Testing Accuracy:  0.86975 Loss:  7.17789
Epoch:  12 Testing Accuracy:  0.90335 Loss:  5.50037
Epoch:  13 Testing Accuracy:  0.90125 Loss:  4.30814
Epoch:  14 Testing Accuracy:  0.8968 Loss:  3.28877
Epoch:  15 Testing Accuracy:  0.91235 Loss:  2.38052
Epoch:  16 Testing Accuracy:  0.92985 Loss:  1.65431
Epoch:  17 Testing Accuracy:  0.92465 Loss:  1.1878
Epoch:  18 Testing Accuracy:  0.93045 Loss:  0.820852
Epoch:  19 Testing Accuracy:  0.93165 Loss:  0.609227
Epoch:  

In [10]:
results.shape

(20000, 2)