In [1]:
import numpy as np
from numpy import matmul

In [2]:
ds1_train = np.genfromtxt("DS1_train.csv", delimiter=',')
ds1_test = np.genfromtxt("DS1_test.csv", delimiter=',')

In [3]:
N1 = ds1_train[:,-1].sum()
N0 = ds1_train.shape[0] - N1

# number of samples of each class
p1 = N1/(N1 + N0)
p0 = N0/(N1 + N0)

# calculate mean for class 0 and class 1
u0 = ds1_train[ds1_train[:, -1] == 0, :-1].sum(axis=0)/N0
u1 = ds1_train[ds1_train[:, -1] == 1, :-1].sum(axis=0)/N1

x=0
s0=0
s1=0
# Run through each row of training set and save values 
for sample in ds1_train:
    # If last elem of sample is 0 it is class 0
    if sample[-1]==0:
        x = np.array(sample[:-1])
        x -= u0
        x = np.reshape(x, (20, 1))
        s0 += x.dot(x.T)
    
    # If last elem of sample is 1 it is class 1
    if sample[-1]==1:
        x = np.array(sample[:-1])
        x -= u1
        x = np.reshape(x, (20, 1))
        s1 += x.dot(x.T)

s0 /= N0        
s1 /= N1

# calculate covariance according to equation in notes 
cov = p0*s0+p1*s1
cov_inv = np.linalg.inv(p0*s0+p1*s1)

w = cov_inv.dot(u0 - u1)
wo = -0.5*(matmul(matmul(u0.T, cov_inv), u0)) + 0.5*(matmul(matmul(u1.T, cov_inv), u1)) + np.log(p0/p1)
print("w: ", w)
print("wo: ", wo)

w:  [ 1.40542533e+01 -8.26578614e+00 -5.06632505e+00 -2.69201272e+00
 -9.37280780e+00 -4.38523686e+00  1.58689599e+01 -2.34300606e+01
 -2.78745134e+01  9.09608016e+00 -1.28541098e+01 -1.16922993e+01
  1.48472460e+01  1.22183007e+01 -5.71140687e+00  1.27236593e+01
  2.80432792e+01 -6.58936552e+00  9.58045183e-03 -4.93335203e+00]
wo:  26.41842219565859


In [4]:
# P(t(n) | x) = sigmoid(a)
def sigmoid(a):
    return 1/(1 + np.exp(-a))

In [19]:
# P(t(n) | x) = sigmoid(w*x + wo)

tp = 0
fp = 0
fn = 0
tn = 0

for row in ds1_test:
    # save row label
    label = row[-1]
    # slice row without label to find class prediction
    row = row[:-1]
    pred = sigmoid(w.dot(row) + wo) # probability of class 0 
    pred = round(1 - pred, 0)
    if pred == 0:
        tn += int(label == pred)
        fn += int(not label == pred)
        
    elif pred == 1:
        tp += int(label == pred)
        fp += int(not label == pred)
        

total = (ds1_test.shape[0])
precision = tp/(tp + fp)
recall = tp/(tp + fn)
acc = (tn + tp)/(total)
f1 = (2*precision*recall)/(precision + recall)

print("Precision: ", precision)
print("Recall: ", recall)
print("Accuracy: ", acc)
print("F1 Measure: ", f1)

Precision:  0.9443037974683545
Recall:  0.9325
Accuracy:  0.93875
F1 Measure:  0.938364779874214
