In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('framingham.csv')
data

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.10,85.0,85.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4233,1,50,1.0,1,1.0,0.0,0,1,0,313.0,179.0,92.0,25.97,66.0,86.0,1
4234,1,51,3.0,1,43.0,0.0,0,0,0,207.0,126.5,80.0,19.71,65.0,68.0,0
4235,0,48,2.0,1,20.0,,0,0,0,248.0,131.0,72.0,22.00,84.0,86.0,0
4236,0,44,1.0,1,15.0,0.0,0,0,0,210.0,126.5,87.0,19.16,86.0,,0


In [3]:
cleaned_data = data.dropna()
cleaned_data

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.10,85.0,85.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4231,1,58,3.0,0,0.0,0.0,0,1,0,187.0,141.0,81.0,24.96,80.0,81.0,0
4232,1,68,1.0,0,0.0,0.0,0,1,0,176.0,168.0,97.0,23.14,60.0,79.0,1
4233,1,50,1.0,1,1.0,0.0,0,1,0,313.0,179.0,92.0,25.97,66.0,86.0,1
4234,1,51,3.0,1,43.0,0.0,0,0,0,207.0,126.5,80.0,19.71,65.0,68.0,0


In [4]:
train_data = cleaned_data.sample(frac=0.9,random_state=42)
test_data  = cleaned_data.drop(train_data.index)
print(train_data.shape,test_data.shape)

(3290, 16) (366, 16)


In [5]:
X_train = train_data.iloc[:,0:15].to_numpy()
Y_train = train_data.iloc[:,15].to_numpy()
X_test = test_data.iloc[:,0:15].to_numpy()
Y_test = test_data.iloc[:,15].to_numpy()
print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)

(3290, 15) (3290,) (366, 15) (366,)


In [29]:
X_train_scaled = (X_train - np.mean(X_train))/np.std(X_train)
X_test_scaled = (X_test - np.mean(X_train))/np.std(X_train)

In [32]:
class logistic_regression:
    def __init__(self,X):
        self.w = np.random.rand(X.shape[1])
        self.b = np.random.rand()
    def sigmoid(self,X):
        return 1/(1+np.exp(-X))
    def predict(self,X):
        return self.sigmoid(np.dot(X,self.w)+self.b)
    
    def gradient(self,X,Y):
        dw = np.matmul(X.T,self.predict(X)-Y)/X.shape[0]
        db = np.mean(self.predict(X)-Y)
        return dw,db
    def update_weights(self,dw,db,lr):
        self.w = self.w - lr*dw
        self.b = self.b - lr*db

    def cost(self,X,Y):
        return -np.mean(Y*np.log(self.predict(X))+(1-Y)*np.log(1-self.predict(X)))
    def fit(self,X,Y,lr,epochs):
        
        for i in range(epochs):
            dw,db = self.gradient(X,Y)
            self.update_weights(dw,db,lr)
            if ((i+1)%100==0):print(f'Epoch {i+1}/{epochs} Cost: {self.cost(X,Y)} Weights: {self.w} Bias: {self.b}')
    def accuracy(self,X,Y):
        
        percentage_correctly_classified =  np.mean(np.round(self.predict(X))==Y)*100
        false_negatives = np.sum((Y == 1) & (np.round(self.predict(X)) == 0))
        false_positives = np.sum((Y == 0) & (np.round(self.predict(X)) == 1))
        percentage_false_negatives = false_negatives/np.sum(Y == 1)*100
        percentage_false_positives = false_positives/np.sum(Y == 0)*100
        
        print(f'Accuracy: {percentage_correctly_classified}% False Negatives: {percentage_false_negatives}% False Positives: {percentage_false_positives}%')
        
        

In [44]:
model = logistic_regression(X_train_scaled)
model.fit(X_train_scaled,Y_train,1,4000)
model.accuracy(X_test_scaled,Y_test)

Epoch 100/4000 Cost: 0.40292353232526956 Weights: [ 0.44380859  0.90819936  0.69836786  1.03823065  0.84147256  0.82532308
  0.6778066   0.36912057  0.96560032  0.16064967  1.13701542  0.2157675
  1.12822325 -0.04356037  0.56883221] Bias: 0.43335059131798354
Epoch 200/4000 Cost: 0.3951521345555742 Weights: [ 0.49698828  1.44435183  0.71002702  1.07604725  1.04757542  0.85877522
  0.71001156  0.41020717  0.99779617  0.17915673  1.29010624  0.12335506
  1.12434464 -0.17038321  0.59534634] Bias: 0.38875517410211263
Epoch 300/4000 Cost: 0.39104309528453074 Weights: [ 0.51145024  1.88052844  0.68671563  1.07500303  1.19362572  0.85295924
  0.70335339  0.4101879   0.99083021  0.16653831  1.33406148  0.02148951
  1.09770238 -0.24922762  0.59134217] Bias: 0.40004578757261494
Epoch 400/4000 Cost: 0.38860545583887396 Weights: [ 0.51793181  2.24787259  0.65741243  1.06584783  1.30780182  0.83936375
  0.68903497  0.40192036  0.97613086  0.14799729  1.34379353 -0.06448346
  1.0696099  -0.29953417  