In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("initial_dataset_v3.csv")

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

def get_train_test_split(data):
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in split.split(data, data["target"]):
        data_train = data.loc[train_index]
        data_test = data.loc[test_index]
    return data_train, data_test

In [4]:
train,test = get_train_test_split(df)

X_train = train.drop('target',axis=1)
y_train = train['target']
X_test = test.drop('target',axis=1)
y_test = test['target']

In [5]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [6]:
predictions = log_reg.predict(X_test)

# Evaluation of Dataset 3

In [7]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import roc_auc_score

def print_complete_evaluation_statistics(original, predictions):
    
    accuracy = accuracy_score(original, predictions)
    conf_matrix = confusion_matrix(original, predictions)
    precision = precision_score(original, predictions)
    recall = recall_score(original, predictions)
    f1 = f1_score(original, predictions)
    
    print("Accuracy: {}\nPrecision: {}\nRecall: {}\nF1: {}".format(accuracy,precision,recall,f1))
    print("Confusion Matrix:")
    print("{}".format(conf_matrix))
    print("Format:")
    print("True Negatives --- False Positives")
    print("False Negatives --- True Positives")

In [8]:
print_complete_evaluation_statistics(predictions,y_test)

Accuracy: 0.6243830207305034
Precision: 0.5219251336898396
Recall: 0.6084788029925187
F1: 0.5618883131836501
Confusion Matrix:
[[777 447]
 [314 488]]
Format:
True Negatives --- False Positives
False Negatives --- True Positives


In [9]:
import tensorflow as tf
import time

In [10]:
def get_accuracy(predictions,target):
    count_correct = 0
    total = len(predictions)
    for i in range(len(predictions)):
        p = predictions[i]
        t = target[i]
        if p == t: 
            count_correct += 1
    accuracy = round(count_correct/total,7)*100
    return accuracy

In [11]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 100
lr = 0.5
epochs = 500

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 2, activation=None) # changed to 2 because of 2 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 100
Learning Rate: 0.5
Total Epochs: 500
Accuracy after epoch 0 = 39.888889999999996%
Accuracy after epoch 10 = 39.55556%
Accuracy after epoch 20 = 60.456790000000005%
Accuracy after epoch 30 = 60.48148%
Accuracy after epoch 40 = 60.456790000000005%
Accuracy after epoch 50 = 60.518519999999995%
Accuracy after epoch 60 = 39.80247%
Accuracy after epoch 70 = 41.25926%
Accuracy after epoch 80 = 40.20988%
Accuracy after epoch 90 = 41.17284%
Accuracy after epoch 100 = 40.259260000000005%
Accuracy after epoch 110 = 60.518519999999995%
Accuracy after epoch 120 = 60.456790000000005%
Accuracy after epoch 130 = 60.456790000000005%
Accuracy after epoch 140 = 60.456790000000005%
Accuracy after epoch 150 = 60.456790000000005%
Accuracy after epoch 160 = 60.456790000000005%
Accuracy after epoch 170 = 60.469139999999996%
Accuracy after epoch 180 = 60.469139999999996%
Accuracy after epoch 190 = 60.456790000000005%
Accuracy after epoch 200 = 60.456790000000005%
Accuracy after epoch 210

In [12]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 100
lr = 0.1
epochs = 100

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 2, activation=None) # changed to 2 because of 2 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 100
Learning Rate: 0.1
Total Epochs: 100
Accuracy after epoch 0 = 41.92593%
Accuracy after epoch 10 = 41.07407%
Accuracy after epoch 20 = 46.691359999999996%
Accuracy after epoch 30 = 53.04938%
Accuracy after epoch 40 = 56.24691000000001%
Accuracy after epoch 50 = 57.87654%
Accuracy after epoch 60 = 58.703700000000005%
Accuracy after epoch 70 = 59.617279999999994%
Accuracy after epoch 80 = 60.16049%
Accuracy after epoch 90 = 60.58025%
Time taken = 1.62451 seconds


In [15]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 5], name="x")
target = tf.placeholder(tf.int32, [None], name="target")
learning_rate = tf.placeholder(tf.float32, name="learning_rate")

hidden_layer_size = 200
lr = 0.01
epochs = 300

hidden = tf.layers.dense(x, hidden_layer_size, activation=tf.tanh)
output = tf.layers.dense(hidden, 2, activation=None) # changed to 2 because of 2 possible classes

probabilities = tf.nn.softmax(output)
predictions = tf.argmax(probabilities, axis=1)
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=target)
loss = tf.reduce_mean(loss_)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

data_x = X_train
data_targets = y_train.tolist()
print("Hidden Layer Size: {}".format(hidden_layer_size))
print("Learning Rate: {}".format(lr))
print("Total Epochs: {}".format(epochs))
start_time = time.time()
tf.set_random_seed(20)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        result, _ = sess.run([predictions, train_op], feed_dict={x: data_x, target: data_targets, learning_rate: lr})
        if epoch % 10 == 0:
            print("Accuracy after epoch {} = {}%".format(epoch,get_accuracy(result,data_targets)))
end_time = time.time()
print("Time taken = {} seconds".format(round(end_time-start_time,5)))

Hidden Layer Size: 200
Learning Rate: 0.01
Total Epochs: 300
Accuracy after epoch 0 = 39.60494%
Accuracy after epoch 10 = 60.40740999999999%
Accuracy after epoch 20 = 60.62963%
Accuracy after epoch 30 = 60.814809999999994%
Accuracy after epoch 40 = 60.74074%
Accuracy after epoch 50 = 60.75309%
Accuracy after epoch 60 = 60.765429999999995%
Accuracy after epoch 70 = 60.72840000000001%
Accuracy after epoch 80 = 60.74074%
Accuracy after epoch 90 = 60.87654%
Accuracy after epoch 100 = 60.888889999999996%
Accuracy after epoch 110 = 60.888889999999996%
Accuracy after epoch 120 = 60.90123%
Accuracy after epoch 130 = 60.90123%
Accuracy after epoch 140 = 60.91358%
Accuracy after epoch 150 = 60.90123%
Accuracy after epoch 160 = 60.888889999999996%
Accuracy after epoch 170 = 60.90123%
Accuracy after epoch 180 = 60.864200000000004%
Accuracy after epoch 190 = 60.90123%
Accuracy after epoch 200 = 60.925929999999994%
Accuracy after epoch 210 = 60.925929999999994%
Accuracy after epoch 220 = 60.92592999