#### Import the models

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelBinarizer
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

#### Import the data

In [2]:
data = pd.read_csv('heart.csv')

In [3]:
data.head(2)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [5]:
#### Split the data into train and test and scale it
X = data.drop('target',axis=1) # Input
Y = data['target'] # Output

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2) # 20% data for testing

In [7]:
# Perform the Min Max scaler on the independent variable
scl = MinMaxScaler()
X_train = scl.fit_transform(X_train) # Fit and transform the train data
X_test = scl.transform(X_test) # Only transform the test data

#### Apply the Statsmodel GLM method

In [8]:
# Need to add Intercept manually, and then fit the data
X_train_sm = sm.add_constant(X_train)
logm1 = sm.GLM(Y_train,X_train_sm, family = sm.families.Binomial())
res = logm1.fit()
res.summary()

0,1,2,3
Dep. Variable:,target,No. Observations:,242.0
Model:,GLM,Df Residuals:,228.0
Model Family:,Binomial,Df Model:,13.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-86.812
Date:,"Fri, 02 Apr 2021",Deviance:,173.62
Time:,01:23:44,Pearson chi2:,214.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,3.4226,1.611,2.124,0.034,0.264,6.581
x1,-0.8837,1.239,-0.713,0.476,-3.313,1.545
x2,-2.0392,0.541,-3.772,0.000,-3.099,-0.980
x3,2.6695,0.619,4.312,0.000,1.456,3.883
x4,-2.7975,1.157,-2.418,0.016,-5.065,-0.530
x5,-1.8659,1.814,-1.029,0.304,-5.421,1.690
x6,0.6758,0.649,1.041,0.298,-0.596,1.948
x7,0.8865,0.766,1.157,0.247,-0.615,2.388
x8,2.4582,1.586,1.549,0.121,-0.651,5.568


In [9]:
#Do the prediction on Statsmodel 
X_test_sm = sm.add_constant(X_test)
y_pred_sm = list(res.predict(X_test_sm))

In [10]:
# Statsmodel returns Probability value, convert it into classes.
y_pred_sm = list(map(lambda x: 1 if x > 0.5 else 0, y_pred_sm))

In [11]:
# Get the classification report
print(classification_report(Y_test, y_pred_sm))

              precision    recall  f1-score   support

           0       0.93      0.81      0.86        31
           1       0.82      0.93      0.87        30

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.88      0.87      0.87        61



#### Apply the sklearn Logistic Regression

In [12]:
# Logistic object
logistic_sk = LogisticRegression()

In [13]:
# Fit the model
logistic_sk.fit(X_train, Y_train)

LogisticRegression()

In [14]:
# Do prediction
y_pred_sk = logistic_sk.predict(X_test)

In [15]:
# Get the classification report
print(classification_report(Y_test, y_pred_sk))

              precision    recall  f1-score   support

           0       0.90      0.84      0.87        31
           1       0.84      0.90      0.87        30

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61

