<a href="https://colab.research.google.com/github/bahadurshubham/Darknet/blob/master/Logistic_regression_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d fedesoriano/heart-failure-prediction
!unzip heart-failure-prediction.zip

Downloading heart-failure-prediction.zip to /content
  0% 0.00/8.56k [00:00<?, ?B/s]
100% 8.56k/8.56k [00:00<00:00, 2.34MB/s]
Archive:  heart-failure-prediction.zip
  inflating: heart.csv               


In [2]:
import pandas as pd

heart = pd.read_csv('heart.csv')
heart

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [3]:
import plotly.express as px

px.scatter(heart, x='MaxHR', y='HeartDisease', color='HeartDisease')

In [4]:
from sklearn.linear_model import LogisticRegression

X_train = heart['MaxHR'].to_numpy().reshape(len(heart['Age']), 1)
Y_train = heart['HeartDisease'].to_numpy()

X_train.shape, Y_train.shape

((918, 1), (918,))

In [5]:
lr = LogisticRegression().fit(X_train, Y_train)
lr.predict(X_train).shape

(918,)

In [6]:
lr.predict_proba(X_train).shape

(918, 2)

In [7]:
from copy import deepcopy

predictions_df = deepcopy(heart[['MaxHR' , 'HeartDisease']])
predictions_df['Sklearn Probability'] = lr.predict_proba(X_train)[:, 1]
predictions_df['Sklearn Prediction'] = lr.predict_proba(X_train)[:, 1] > 0.5
predictions_df

Unnamed: 0,MaxHR,HeartDisease,Sklearn Probability,Sklearn Prediction
0,172,0,0.260939,False
1,156,1,0.389737,False
2,98,0,0.845538,True
3,108,1,0.790775,True
4,122,0,0.692325,True
...,...,...,...,...
913,132,1,0.608400,True
914,141,1,0.526777,True
915,115,1,0.744655,True
916,174,1,0.246907,False


In [8]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['HeartDisease'],
                         mode='markers' , name='Observed' , marker={'color':'blue'}))

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['Sklearn Probability'],
                         mode='markers', name='Model Predictions', marker={'color':'green'}))

fig.update_layout(
    title = 'Heart Disease VS Max Heart Rate',
    xaxis_title = 'Max Heart Rate',
    yaxis_title = 'Heart Disease'
)

fig.show()

In [9]:
from sklearn.metrics import accuracy_score

accuracy_score(predictions_df['Sklearn Prediction'] , Y_train)

0.6710239651416122

In [10]:
lr.intercept_ , lr.coef_

(array([5.33015391]), array([[-0.03704214]]))

In [11]:
heart.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [12]:
import numpy as np

def sigmoid(x):
  return 1 / (1 + (np.exp(-x)))

In [13]:
test_range = np.arange(-50 , 50)

fig = go.Figure()
fig.add_trace(go.Scatter(x=test_range, y=sigmoid(test_range)))

In [14]:
def get_predictions(model, x):
  alpha_hat = model['alpha_hat']
  beta_hat = model['beta_hat']

  return sigmoid(alpha_hat + beta_hat*x) 

In [15]:
test_model = dict(alpha_hat=2, beta_hat=-0.019)
predictions_df['Max Heart rate Probabilities'] = get_predictions(test_model, X_train.flatten())
predictions_df

Unnamed: 0,MaxHR,HeartDisease,Sklearn Probability,Sklearn Prediction,Max Heart rate Probabilities
0,172,0,0.260939,False,0.219600
1,156,1,0.389737,False,0.276078
2,98,0,0.845538,True,0.534445
3,108,1,0.790775,True,0.487003
4,122,0,0.692325,True,0.421163
...,...,...,...,...,...
913,132,1,0.608400,True,0.375662
914,141,1,0.526777,True,0.336485
915,115,1,0.744655,True,0.453881
916,174,1,0.246907,False,0.213157


In [16]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['HeartDisease'],
                         mode='markers', name='Observed', marker=dict(color='orange')))

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['Sklearn Probability'],
                         mode='markers' , name='Sklearn Probabilities', marker=dict(color='blue')))

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['Max Heart rate Probabilities'],
                         mode='markers' , name='Max Heart rate Probabilities' , marker=dict(color='red')))

fig.update_layout(
    title = 'MaxHR VS Heart Disease',
    xaxis_title = 'MaxHR',
    yaxis_title = 'Heart Disease' 
)

fig.show()

In [17]:
accuracy_score(predictions_df['Max Heart rate Probabilities']>0.5 , Y_train)

0.5206971677559913

In [25]:
from sklearn.metrics import mean_squared_error

def function_to_minimize(params, *args):
  alpha_hat = params[0]
  beta_hat = params[1]
  model = {'alpha_hat':alpha_hat , 'beta_hat':beta_hat}
  x = args[0]
  y = args[1]
  predictions_probs = get_predictions(model, x)

  return mean_squared_error(predictions_probs , y)

In [26]:
from scipy.optimize import minimize

def get_best_model(x,y):
  res = minimize(function_to_minimize, x0=[0 ,0], args=(x,y))
  parameters = res.x
  alpha_hat, beta_hat = parameters[0] , parameters[1]
  best_model = dict(alpha_hat=alpha_hat , beta_hat=beta_hat)


  return best_model

In [27]:
best_model = get_best_model(X_train.flatten() , predictions_df['HeartDisease'])
best_model

{'alpha_hat': 5.516771166306751, 'beta_hat': -0.038253385518181665}

In [38]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['HeartDisease'],
                         mode='markers', name='Observed', marker=dict(color='orange')))

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['Sklearn Probability'],
                         mode='markers' , name='Sklearn Probabilities', marker=dict(color='blue')))

fig.add_trace(go.Scatter(x=predictions_df['MaxHR'] , y=predictions_df['Our Model heart Disease Probabilities'],
                         mode='markers' , name='Our Model heart Disease Probabilities' , marker=dict(color='red')))

fig.update_layout(
    title = 'MaxHR VS Heart Disease',
    xaxis_title = 'MaxHR',
    yaxis_title = 'Heart Disease' 
)

fig.show()