In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [2]:
df = pd.read_csv('../ML Assignment/DATA/Dataset_NullDropped.csv')

In [4]:
df['diagnosis']=df['diagnosis'].astype('category')

In [5]:
randlist=[1223, 91, 760, 1096, 670, 715, 373, 69, 538, 455]

In [6]:
train_array = [1]*10
test_array = [1]*10
X_train= [1]*10
X_test= [1]*10
y_train= [1]*10
y_test = [1]*10
for i in range(10):
    train_array[i] = df.sample(frac=0.67, random_state = randlist[i])
    test_array[i] = df.drop(train_array[i].index)
    trainData_Split = train_array[i].to_numpy()
    testData_Split = test_array[i].to_numpy()
    n_samples,n_features = trainData_Split.shape
    n_features -= 1
    X_train[i] = trainData_Split[:,1:]
    y_train[i] = trainData_Split[:,0]
    X_test[i] = testData_Split[:,1:]
    y_test[i] = testData_Split[:,0]

In [7]:
print(X_train[4])

[[1.150e+01 1.845e+01 7.328e+01 ... 6.544e-02 2.740e-01 6.487e-02]
 [1.263e+01 2.076e+01 8.215e+01 ... 1.105e-01 2.226e-01 8.486e-02]
 [1.131e+01 1.904e+01 7.180e+01 ... 6.961e-02 2.400e-01 6.641e-02]
 ...
 [2.051e+01 2.781e+01 1.344e+02 ... 1.563e-01 2.437e-01 8.328e-02]
 [1.149e+01 1.459e+01 7.399e+01 ... 7.431e-02 2.941e-01 9.180e-02]
 [1.145e+01 2.097e+01 7.381e+01 ... 6.127e-02 2.762e-01 8.851e-02]]


In [10]:
class Perceptron:
    def __init__(self, learning_rate = 0.01, num_iters=1000):
        self.lr = learning_rate
        self.n_iters = num_iters
        self.activation = self._step_function
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        for _ in range(self.n_iters):
            for index,x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self.activation(linear_output)
                
                update = self.lr * (y[index] - y_predicted)
                self.weights += update * x_i
                self.bias += update
                
    
    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self.activation(linear_output)
        return y_predicted
    
    def _step_function(self, x):
        return np.where(x>=0, 1, 0)
    
    def _sigmoid_function(self, x):
        sig_x = 1/(1+(np.exp(-x)).astype(float))
        return np.where(sig_x>=0.5, 1, 0)

In [11]:
def computeMetrics(y_actual, y_predicted):
    tp = np.sum((y_actual == 1) & (y_predicted == 1))
    tn = np.sum((y_actual == 0) & (y_predicted == 0))
    fp = np.sum((y_actual == 0) & (y_predicted == 1))        
    fn = np.sum((y_actual == 1) & (y_predicted == 0)) 
    accuracy = np.sum(y_actual == y_predicted)/len(y_actual)
    precision = tp/np.sum(tp+fp)
    recall = tp/np.sum(tp+fn)
    return 100*accuracy, precision, recall

In [12]:
p = [1]*10
for i in range(10):
    p[i] = Perceptron(learning_rate=0.1, num_iters=30000)

In [13]:
for i in range(10):
    p[i].fit(X_train[i], y_train[i])

In [14]:
predictions = [1]*10
for i in range(10):
    predictions[i]=p[i].predict(X_test[i])

In [15]:
acc = [1]*10
prec = [1]*10
rec = [1]*10
for i in range(10):
    acc[i], prec[i], rec[i] = computeMetrics(y_test[i], predictions[i])
    print("Metrics for test-train split {}".format(i))
    print(acc[i], prec[i], rec[i])
    print("\n")

Metrics for test-train split 0
91.93548387096774 0.8271604938271605 0.9852941176470589


Metrics for test-train split 1
93.01075268817203 0.9411764705882353 0.8767123287671232


Metrics for test-train split 2
91.93548387096774 0.9833333333333333 0.8082191780821918


Metrics for test-train split 3
90.86021505376344 0.9636363636363636 0.7794117647058824


Metrics for test-train split 4
92.47311827956989 0.8857142857142857 0.9117647058823529


Metrics for test-train split 5
93.01075268817203 0.8674698795180723 0.972972972972973


Metrics for test-train split 6
93.01075268817203 0.9787234042553191 0.7931034482758621


Metrics for test-train split 7
93.01075268817203 0.8666666666666667 0.9558823529411765


Metrics for test-train split 8
91.93548387096774 0.8904109589041096 0.9027777777777778


Metrics for test-train split 9
90.32258064516128 0.8111111111111111 0.9864864864864865




In [16]:
acc = np.array(acc)
prec = np.array(prec)
rec = np.array(rec)

print("Mean Accuracy = {} and Variance of Accuracy = {}".format(np.mean(acc), np.var(acc)))
print("Mean Precision = {} and Variance of Precision = {}".format(np.mean(prec), np.var(prec)))
print("Mean Recall = {} and Variance of Recall = {}".format(np.mean(rec), np.var(rec)))

Mean Accuracy = 92.15053763440861 and Variance of Accuracy = 0.8209041507688714
Mean Precision = 0.9015402967554657 and Variance of Precision = 0.0034535361606881096
Mean Recall = 0.8972625133538885 and Variance of Recall = 0.0058208507192448345
