In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("initial_dataset_v2.csv")

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

def get_train_test_split(data):
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in split.split(data, data["target"]):
        data_train = data.loc[train_index]
        data_test = data.loc[test_index]
    return data_train, data_test

In [4]:
train,test = get_train_test_split(df)

X_train = train.drop('target',axis=1)
y_train = train['target']
X_test = test.drop('target',axis=1)
y_test = test['target']

In [5]:
import tensorflow as tf
X_train_norm = tf.keras.utils.normalize(X_train.values,axis=1)
X_test_norm = tf.keras.utils.normalize(X_test.values,axis=1)

In [6]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train_norm, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [7]:
predictions = log_reg.predict(X_test_norm)

# Evaluation of Dataset 3

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import roc_auc_score

def print_complete_evaluation_statistics(original, predictions):
    
    accuracy = accuracy_score(original, predictions)
    conf_matrix = confusion_matrix(original, predictions)
#     precision = precision_score(original, predictions)
#     recall = recall_score(original, predictions)
#     f1 = f1_score(original, predictions)
    precision = 0
    recall = 0
    f1 = 0
    
    print("Accuracy: {}\nPrecision: {}\nRecall: {}\nF1: {}".format(accuracy,precision,recall,f1))
    print("Confusion Matrix:")
    print("{}".format(conf_matrix))
    print("Format:")
    print("True Negatives --- False Positives")
    print("False Negatives --- True Positives")

In [9]:
print_complete_evaluation_statistics(predictions,y_test)

Accuracy: 0.4768015794669299
Precision: 0
Recall: 0
F1: 0
Confusion Matrix:
[[214 161 184]
 [  0   1   0]
 [341 374 751]]
Format:
True Negatives --- False Positives
False Negatives --- True Positives


In [10]:
import time

In [11]:
def get_accuracy(predictions,target):
    count_correct = 0
    total = len(predictions)
    for i in range(len(predictions)):
        p = predictions[i]
        t = target[i]
        if p == t: 
            count_correct += 1
    accuracy = round(count_correct/total,7)*100
    return accuracy

In [13]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 100
lr = 0.1
epochs = 100

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 4, activation=None) # changed to 3 because of 3 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
test_x = X_test
test_y = y_test.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
    result, _ = sess.run([predictions, train_op], feed_dict={x: test_x, target: test_y, learning_rate: lr})
    print("Test Set: Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,test_y)))
            
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 100
Learning Rate: 0.1
Total Epochs: 100
Accuracy after epoch 0 = 27.246910000000003%
Accuracy after epoch 10 = 47.37037%
Accuracy after epoch 20 = 48.41975%
Accuracy after epoch 30 = 48.37037%
Accuracy after epoch 40 = 48.50617%
Accuracy after epoch 50 = 48.790119999999995%
Accuracy after epoch 60 = 48.85185%
Accuracy after epoch 70 = 48.87654%
Accuracy after epoch 80 = 48.814809999999994%
Accuracy after epoch 90 = 48.92593%
Test Set: Accuracy after epoch 99 = 47.13722%
Time taken = 1.24629 seconds


In [14]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 5
lr = 0.1
epochs = 200

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 4, activation=None) # changed to 3 because of 3 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
test_x = X_test
test_y = y_test.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
    result, _ = sess.run([predictions, train_op], feed_dict={x: test_x, target: test_y, learning_rate: lr})
    print("Test Set: Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,test_y)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 5
Learning Rate: 0.1
Total Epochs: 200
Accuracy after epoch 0 = 25.333329999999997%
Accuracy after epoch 10 = 41.69136%
Accuracy after epoch 20 = 44.97531%
Accuracy after epoch 30 = 45.81481%
Accuracy after epoch 40 = 46.38272%
Accuracy after epoch 50 = 47.061730000000004%
Accuracy after epoch 60 = 47.32099%
Accuracy after epoch 70 = 47.58025%
Accuracy after epoch 80 = 47.87654%
Accuracy after epoch 90 = 47.92593%
Accuracy after epoch 100 = 47.91358%
Accuracy after epoch 110 = 47.864200000000004%
Accuracy after epoch 120 = 47.93827%
Accuracy after epoch 130 = 47.95062%
Accuracy after epoch 140 = 48.01235%
Accuracy after epoch 150 = 48.14815%
Accuracy after epoch 160 = 48.18519%
Accuracy after epoch 170 = 48.2716%
Accuracy after epoch 180 = 48.24691%
Accuracy after epoch 190 = 48.28395%
Test Set: Accuracy after epoch 199 = 48.173739999999995%
Time taken = 1.18051 seconds


In [24]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 18
lr = 0.1
epochs = 100

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 4, activation=None) # changed to 3 because of 2 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
test_x = X_test
test_y = y_test.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
    result, _ = sess.run([predictions, train_op], feed_dict={x: test_x, target: test_y, learning_rate: lr})
    print("Test Set: Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,test_y)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 18
Learning Rate: 0.1
Total Epochs: 100
Accuracy after epoch 0 = 28.30864%
Accuracy after epoch 10 = 45.32099%
Accuracy after epoch 20 = 46.14815%
Accuracy after epoch 30 = 47.75309%
Accuracy after epoch 40 = 47.92593%
Accuracy after epoch 50 = 48.12346%
Accuracy after epoch 60 = 48.24691%
Accuracy after epoch 70 = 48.23457%
Accuracy after epoch 80 = 48.296299999999995%
Accuracy after epoch 90 = 48.37037%
Test Set: Accuracy after epoch 99 = 48.61797%
Time taken = 0.55391 seconds
