In [169]:
import pandas as pd
import numpy as np
import math

In [170]:
def DataLoader(path):
    df = pd.read_csv(path)
    print('data shape:',df.shape)
    df= df.dropna()
    print('shape after cleaning:',df.shape)
    x = df.loc[:,:'glucose'].to_numpy()
    print('number of input features:',x.shape[1])
    print('training data shape:',x.shape)
    y = df.loc[:,'TenYearCHD'].to_numpy()
    y = y.reshape(-1,1)
    print('target shape:',y.shape)
    return x,y

In [171]:
def fun(z):
    return 1/(1+math.exp(-z))
def classify(y_p):
    if y_p>0.5:
        return 1
    return 0

sig= np.vectorize(fun)
pred=np.vectorize(classify)

In [172]:
def val(p,a):
    if(p==a):
        return 0 #true pred
    elif(p==1):
        return 1 #false positive
    return -1  #false negative
confusion= np.vectorize(val)

In [184]:
class Logistic_regression_model():
    def __init__(self, nVariables):
        self.w = np.random.rand(1,nVariables)
        self.b = np.random.rand(1)
    def linear(self,x):
        return (x @ self.w.T) + self.b
    def metrics(self,y_pc,y):
        cm = confusion(y_pc,y)
        return (cm==0).sum(), (cm==1).sum(), (cm==-1).sum()
    def fit(self, x, y, lr=0.001, epochs= 10):
        m = y.shape[0]
        for epoch in range(epochs):
            z = self.linear(x)
            y_p = sig(z)
            y_pc = pred(y_p)
            t,fp,fn = self.metrics(y_pc,y)
            print(epoch,'Accuracy:',t*100/m,'%','False positive:',fp*100/m,'%','False negative:',fn*100/m,'%')
            e = (y_p-y)
            k = x*e
            dw = (k.sum(0))*(lr*(1/m))
            db = (e.sum())*(lr*(1/m))
            self.w = self.w - dw
            self.b = self.b - db
        z = self.linear(x)
        y_p = sig(z)
        y_pc = pred(y_p)
        t,fp,fn = self.metrics(y_pc,y)
        print('Final Accuracy:',t*100/m,'%','False positive:',fp*100/m,'%','False negative:',fn*100/m,'%')
        

In [174]:
x,y = DataLoader('./w1p2_data.csv')

data shape: (4238, 16)
shape after cleaning: (3656, 16)
number of input features: 15
training data shape: (3656, 15)
target shape: (3656, 1)


In [None]:
#below is training of 3 models

In [203]:
model0 = Logistic_regression_model(15)
lr = 0.001
epochs = 10
model0.fit(x,y, lr=lr, epochs=epochs)

0 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
1 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
2 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
3 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
4 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
5 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
6 Accuracy: 83.86214442013129 % False positive: 1.5590809628008753 % False negative: 14.578774617067834 %
7 Accuracy: 77.68052516411379 % False positive: 9.710065645514224 % False negative: 12.609409190371991 %
8 Accuracy: 73.41356673960613 % False positive: 15.262582056892779 % False negative: 11.323851203501095 %
9 Accuracy: 75.68380743982495 % False positive: 12.335886214442013 % False negative: 11.980306345733041 %
Final Accuracy: 74.34354485776805 % F

In [204]:
lr = 0.0001
epochs = 10
model0.fit(x,y, lr=lr, epochs=epochs)

0 Accuracy: 74.34354485776805 % False positive: 14.086433260393873 % False negative: 11.570021881838075 %
1 Accuracy: 74.37089715536105 % False positive: 14.031728665207877 % False negative: 11.597374179431073 %
2 Accuracy: 74.48030634573304 % False positive: 13.894967177242888 % False negative: 11.62472647702407 %
3 Accuracy: 74.53501094091904 % False positive: 13.840262582056893 % False negative: 11.62472647702407 %
4 Accuracy: 74.56236323851203 % False positive: 13.812910284463895 % False negative: 11.62472647702407 %
5 Accuracy: 74.64442013129103 % False positive: 13.730853391684901 % False negative: 11.62472647702407 %
6 Accuracy: 74.67177242888403 % False positive: 13.703501094091903 % False negative: 11.62472647702407 %
7 Accuracy: 74.69912472647702 % False positive: 13.676148796498905 % False negative: 11.62472647702407 %
8 Accuracy: 74.72647702407002 % False positive: 13.648796498905908 % False negative: 11.62472647702407 %
9 Accuracy: 74.78118161925602 % False positive: 13.59

In [215]:
model1 = Logistic_regression_model(15)
lr = 0.0008
epochs = 10
model1.fit(x,y, lr=lr, epochs=epochs)

0 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
1 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
2 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
3 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
4 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
5 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
6 Accuracy: 14.9890590809628 % False positive: 84.57330415754923 % False negative: 0.437636761487965 %
7 Accuracy: 82.52188183807439 % False positive: 3.1728665207877462 % False negative: 14.305251641137856 %
8 Accuracy: 78.7199124726477 % False positive: 8.205689277899344 % False negative: 13.074398249452955 %
9 Accuracy: 75.60175054704595 % False positive: 12.335886214442013 % False negative: 12.062363238512035 %
Final Accuracy: 74.89059080962801 % False

In [216]:
lr = 0.0001
epochs = 10
model1.fit(x,y, lr=lr, epochs=epochs)

0 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
1 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
2 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
3 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
4 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
5 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
6 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
7 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
8 Accuracy: 74.89059080962801 % False positive: 13.402625820568927 % False negative: 11.706783369803064 %
9 Accuracy: 74.86323851203501 % False positive

In [220]:
model2 = Logistic_regression_model(15)
lr = 0.0005
epochs = 10
model2.fit(x,y, lr=lr, epochs=epochs)

0 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
1 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
2 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
3 Accuracy: 15.23522975929978 % False positive: 84.76477024070022 % False negative: 0.0 %
4 Accuracy: 15.153172866520787 % False positive: 84.68271334792122 % False negative: 0.16411378555798686 %
5 Accuracy: 67.39606126914661 % False positive: 20.97921225382932 % False negative: 11.62472647702407 %
6 Accuracy: 72.5109409190372 % False positive: 14.742888402625821 % False negative: 12.74617067833698 %
7 Accuracy: 72.62035010940919 % False positive: 14.551422319474836 % False negative: 12.828227571115974 %
8 Accuracy: 72.64770240700219 % False positive: 14.524070021881839 % False negative: 12.828227571115974 %
9 Accuracy: 72.64770240700219 % False positive: 14.524070021881839 % False negative: 12.828227571115974 %
Final A

In [221]:
lr = 0.0001
epochs = 10
model2.fit(x,y, lr=lr, epochs=epochs)

0 Accuracy: 72.64770240700219 % False positive: 14.524070021881839 % False negative: 12.828227571115974 %
1 Accuracy: 72.64770240700219 % False positive: 14.524070021881839 % False negative: 12.828227571115974 %
2 Accuracy: 72.64770240700219 % False positive: 14.524070021881839 % False negative: 12.828227571115974 %
3 Accuracy: 72.67505470459518 % False positive: 14.49671772428884 % False negative: 12.828227571115974 %
4 Accuracy: 72.67505470459518 % False positive: 14.49671772428884 % False negative: 12.828227571115974 %
5 Accuracy: 72.67505470459518 % False positive: 14.49671772428884 % False negative: 12.828227571115974 %
6 Accuracy: 72.67505470459518 % False positive: 14.49671772428884 % False negative: 12.828227571115974 %
7 Accuracy: 72.67505470459518 % False positive: 14.49671772428884 % False negative: 12.828227571115974 %
8 Accuracy: 72.70240700218818 % False positive: 14.49671772428884 % False negative: 12.800875273522976 %
9 Accuracy: 72.70240700218818 % False positive: 14.4