In [94]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.special import expit

In [95]:
def sigmoid(z):
    return expit(z)

In [96]:
def predict(features, weights):
    z = np.dot(features, weights)
    return sigmoid(z)

In [98]:
def update_weights(features, labels, weights, lr):
    N = len(features)

    predictions = predict(features, weights)

    gradient = lr * (np.dot(features.T, predictions - labels) / N)

    weights -= gradient

    return weights

In [99]:
def decision_boundary(prob):
    return 1 if prob >= 0.5 else 0

def classify(predictions):
    decision_boundarys = np.vectorize(decision_boundary)
    return decision_boundarys(predictions)

In [100]:
def train(features, labels, weights, lr, iters):
    for i in range(iters):
        weights = update_weights(features, labels, weights, lr)

    return weights

In [149]:
def accuracy(predicted_labels, actual_labels):
    diff = predicted_labels - actual_labels
    return 1.0 - (float(np.count_nonzero(diff)) / len(diff))

In [135]:
def precision(predicted_labels, actual_labels):
    connected_result = zip(predicted_labels, actual_labels.values)
    both_ones = 0
    first_one = 0
    for prediction, actual in connected_result:
        if (prediction[0] == 1) & (actual[0] == 1):
            both_ones += 1
        if (prediction[0] == 1) & (actual[0] != 1):
            first_one += 1

    TP = both_ones
    FP = first_one

    return (TP)/ (TP+FP)


In [137]:
def recall(predicted_labels, actual_labels):
    connected_result = zip(predicted_labels, actual_labels.values)
    both_ones = 0
    total_ones = 0
    for prediction, actual in connected_result:
        if (prediction[0] == 1) & (actual[0] == 1):
            both_ones += 1
        if (actual[0] == 1):
            total_ones += 1

    return both_ones / total_ones

In [139]:
def f1_score(predicted_labels, actual_labels):
    precision_score = precision(predicted_labels, actual_labels)
    recall_score = recall(predicted_labels, actual_labels)

    return (2 * precision_score * recall_score) / (precision_score + recall_score)

In [142]:
def specificity(predicted_labels, actual_labels):
    connected_result = zip(predicted_labels, actual_labels.values)
    both_zeroes = 0
    ones_zeroes = 0
    for prediction, actual in connected_result:
        if (prediction[0] == 0) & (actual[0] == 0):
            both_zeroes += 1
        if (prediction[0] == 1 & actual[0] == 0):
            ones_zeroes += 1

    return both_zeroes / (both_zeroes + ones_zeroes)

In [103]:
df = pd.read_csv("/home/ragemachine/DataspellProjects/ml-assignments/assignment-2/Diabetes.csv")
features = ['Pregnency', 'Glucose', 'Blood Pressure', 'Skin Thickness', 'Insulin', 'BMI', 'DFP', 'Age']
x = df.loc[:, features]
y = df.loc[:, ['Diabetes']]

X_train, X_test, Y_train, Y_test = train_test_split(x,y, random_state=0, train_size=0.7)

In [150]:
weights = np.zeros([8, 1])
weights = train(X_train, Y_train, weights, 0.1, 1000)
predictions = predict(X_test, weights)
classifications = classify(predictions)

accuracy_result = accuracy(classifications, Y_test)
precision_result = precision(classifications, Y_test)
recall_result = recall(classifications, Y_test)
f1_result = f1_score(classifications, Y_test)
specificity_result = specificity(classifications, Y_test)

print("Accuracy = {0}".format(str(accuracy_result)))
print("Precision = {0}".format(str(precision_result)))
print("Recall = {0}".format(str(recall_result)))
print("F1 Score = {0}".format(str(f1_result)))
print("Specificity Score = {0}".format(str(specificity_result)))


Accuracy = 0.7056277056277056
Precision = 0.5238095238095238
Recall = 0.6111111111111112
F1 Score = 0.5641025641025642
Specificity Score = 0.5
