In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("initial_dataset_v2.csv")

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

def get_train_test_split(data):
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in split.split(data, data["target"]):
        data_train = data.loc[train_index]
        data_test = data.loc[test_index]
    return data_train, data_test

In [4]:
train,test = get_train_test_split(df)

X_train = train.drop('target',axis=1)
y_train = train['target']
X_test = test.drop('target',axis=1)
y_test = test['target']

In [5]:
# from sklearn.linear_model import LogisticRegression

# log_reg = LogisticRegression()
# log_reg.fit(X_train, y_train)

In [6]:
# predictions = log_reg.predict(X_test)

# Evaluation of Dataset 3

In [7]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import roc_auc_score

def print_complete_evaluation_statistics(original, predictions):
    
    accuracy = accuracy_score(original, predictions)
    conf_matrix = confusion_matrix(original, predictions)
    precision = precision_score(original, predictions)
    recall = recall_score(original, predictions)
    f1 = f1_score(original, predictions)
    
    print("Accuracy: {}\nPrecision: {}\nRecall: {}\nF1: {}".format(accuracy,precision,recall,f1))
    print("Confusion Matrix:")
    print("{}".format(conf_matrix))
    print("Format:")
    print("True Negatives --- False Positives")
    print("False Negatives --- True Positives")

In [8]:
# print_complete_evaluation_statistics(predictions,y_test)

In [5]:
import tensorflow as tf
import time

In [6]:
def get_accuracy(predictions,target):
    count_correct = 0
    total = len(predictions)
    for i in range(len(predictions)):
        p = predictions[i]
        t = target[i]
        if p == t: 
            count_correct += 1
    accuracy = round(count_correct/total,7)*100
    return accuracy

In [35]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 100
lr = 0.1
epochs = 100

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 4, activation=None) # changed to 3 because of 3 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
            
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 100
Learning Rate: 0.1
Total Epochs: 100
Accuracy after epoch 0 = 14.28395%
Accuracy after epoch 10 = 46.59259%
Accuracy after epoch 20 = 47.987649999999995%
Accuracy after epoch 30 = 48.35802%
Accuracy after epoch 40 = 48.41975%
Accuracy after epoch 50 = 48.48148%
Accuracy after epoch 60 = 48.48148%
Accuracy after epoch 70 = 48.765429999999995%
Accuracy after epoch 80 = 48.77778%
Accuracy after epoch 90 = 48.77778%
Time taken = 0.73851 seconds


In [34]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 5
lr = 0.1
epochs = 200

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 4, activation=None) # changed to 3 because of 3 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 5
Learning Rate: 0.1
Total Epochs: 200
Accuracy after epoch 0 = 39.74074%
Accuracy after epoch 10 = 45.09877%
Accuracy after epoch 20 = 45.58025%
Accuracy after epoch 30 = 46.62963%
Accuracy after epoch 40 = 46.76543%
Accuracy after epoch 50 = 46.888889999999996%
Accuracy after epoch 60 = 47.2716%
Accuracy after epoch 70 = 47.23457%
Accuracy after epoch 80 = 47.22222%
Accuracy after epoch 90 = 47.20988%
Accuracy after epoch 100 = 47.283950000000004%
Accuracy after epoch 110 = 47.518519999999995%
Accuracy after epoch 120 = 47.5679%
Accuracy after epoch 130 = 47.5679%
Accuracy after epoch 140 = 47.5679%
Accuracy after epoch 150 = 47.62963%
Accuracy after epoch 160 = 47.5679%
Accuracy after epoch 170 = 47.95062%
Accuracy after epoch 180 = 47.95062%
Accuracy after epoch 190 = 47.962959999999995%
Time taken = 0.81729 seconds


In [38]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 30
lr = 0.1
epochs = 100

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 4, activation=None) # changed to 3 because of 2 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
test_x = X_test
test_y = y_test.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
    result, _ = sess.run([predictions, train_op], feed_dict={x: test_x, target: test_y, learning_rate: lr})
    print("Test Set: Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,test_y)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 30
Learning Rate: 0.1
Total Epochs: 100
Accuracy after epoch 0 = 20.59259%
Accuracy after epoch 10 = 46.18519%
Accuracy after epoch 20 = 47.81481%
Accuracy after epoch 30 = 48.25926%
Accuracy after epoch 40 = 48.48148%
Accuracy after epoch 50 = 48.55556%
Accuracy after epoch 60 = 48.69136%
Accuracy after epoch 70 = 48.765429999999995%
Accuracy after epoch 80 = 48.765429999999995%
Accuracy after epoch 90 = 48.75309%
Test Set: Accuracy after epoch 99 = 48.61797%
Time taken = 0.52712 seconds
