**Import Dependencies**

In [34]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

**Data Collection & Preprocessing**

In [35]:
df= pd.read_csv('https://github.com/V-Vibee/Pro_Smart/raw/main/2.0%20Cross%20Validation/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [36]:
df.shape

(303, 14)

In [37]:
df.isnull().sum()

Unnamed: 0,0
age,0
sex,0
cp,0
trestbps,0
chol,0
fbs,0
restecg,0
thalach,0
exang,0
oldpeak,0


In [38]:
df['target'].value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
1,165
0,138


1----> Defective Heart

0----> Healthy Heart

**Splitting target & Features**

In [39]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [40]:
y= df['target']
X= df.drop(columns='target', axis=1)

**Train Test Split**

In [41]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=3, stratify=y)

In [42]:
print(X.shape,X_train.shape, X_test.shape)

(303, 13) (242, 13) (61, 13)


**Logistic Regression**

In [43]:
import warnings
warnings.filterwarnings("ignore")

In [44]:
from sklearn.linear_model import LogisticRegression
lr= LogisticRegression()
lr.fit(X_train, y_train)

y_pred= lr.predict(X_test)

In [45]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.71      0.77        28
           1       0.78      0.88      0.83        33

    accuracy                           0.80        61
   macro avg       0.81      0.80      0.80        61
weighted avg       0.81      0.80      0.80        61



**Support Vector Classifier**

In [46]:
from sklearn.svm import SVC
model= SVC(kernel='linear')
model.fit(X_train, y_train)

y_pred= model.predict(X_test)

In [47]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.68      0.73        28
           1       0.76      0.85      0.80        33

    accuracy                           0.77        61
   macro avg       0.77      0.76      0.77        61
weighted avg       0.77      0.77      0.77        61



**K _Neighbour**

In [48]:
from sklearn.neighbors import KNeighborsClassifier
knc= KNeighborsClassifier()
knc.fit(X_train, y_train)

y_pred= knc.predict(X_test)

In [49]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.64      0.57      0.60        28
           1       0.67      0.73      0.70        33

    accuracy                           0.66        61
   macro avg       0.65      0.65      0.65        61
weighted avg       0.65      0.66      0.65        61



**Random Forest**

In [50]:
from sklearn.ensemble import RandomForestClassifier
rfc= RandomForestClassifier()
rfc.fit(X_train, y_train)

y_pred= rfc.predict(X_test)

In [51]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.77      0.71      0.74        28
           1       0.77      0.82      0.79        33

    accuracy                           0.77        61
   macro avg       0.77      0.77      0.77        61
weighted avg       0.77      0.77      0.77        61



**Comparing the Models**

In [57]:
# List of the Models

models= [LogisticRegression(max_iter=1000), SVC(kernel='linear'), KNeighborsClassifier(),RandomForestClassifier()]

In [58]:
def compare_models_train_test():

  for model in models:

    #Train the model
    model.fit(X_train, y_train)

    #Evaluate the model
    y_pred1 = model.predict(X_test)

    accuracy= accuracy_score(y_test, y_pred1)

    print('Accuracy of Models: ', model,' = ', accuracy)


In [59]:
compare_models_train_test()

Accuracy of Models:  LogisticRegression(max_iter=1000)  =  0.7704918032786885
Accuracy of Models:  SVC(kernel='linear')  =  0.7704918032786885
Accuracy of Models:  KNeighborsClassifier()  =  0.6557377049180327
Accuracy of Models:  RandomForestClassifier()  =  0.7540983606557377


**Cross Validation**

In [60]:
from sklearn.model_selection import cross_val_score

**Logistic Regression**

In [68]:
cv_score_lr= cross_val_score(LogisticRegression(max_iter=1000),X,y, cv=5)
print(cv_score_lr)

mean_accuracy_lr= sum(cv_score_lr)/len(cv_score_lr)
mean_accuracy_lr = mean_accuracy_lr *100
mean_accuracy_lr= round(mean_accuracy_lr,2)
mean_accuracy_lr

[0.80327869 0.86885246 0.85245902 0.86666667 0.75      ]


82.83

**Support Vector Classifier**

In [71]:
cv_score_svc= cross_val_score(SVC(kernel='linear'),X,y, cv=5)
print(cv_score_svc)

mean_accuracy_svc= sum(cv_score_svc)/len(cv_score_svc)
mean_accuracy_svc = mean_accuracy_svc *100
mean_accuracy_svc= round(mean_accuracy_svc,2)
mean_accuracy_svc

[0.81967213 0.8852459  0.80327869 0.86666667 0.76666667]


82.83

**KNN**

In [72]:
cv_score_knc= cross_val_score(KNeighborsClassifier(),X,y, cv=5)
print(cv_score_knc)

mean_accuracy_knc= sum(cv_score_knc)/len(cv_score_svc)
mean_accuracy_knc = mean_accuracy_knc *100
mean_accuracy_knc= round(mean_accuracy_knc,2)
mean_accuracy_knc

[0.60655738 0.6557377  0.57377049 0.73333333 0.65      ]


64.39

**Random Forest**

In [73]:
cv_score_rfc= cross_val_score(RandomForestClassifier(),X,y, cv=5)
print(cv_score_rfc)

mean_accuracy_rfc= sum(cv_score_rfc)/len(cv_score_rfc)
mean_accuracy_rfc = mean_accuracy_rfc *100
mean_accuracy_rfc= round(mean_accuracy_rfc,2)
mean_accuracy_rfc

[0.78688525 0.8852459  0.83606557 0.83333333 0.78333333]


82.5

**Function for Cross Validation**

In [80]:
# List of the Models

models= [LogisticRegression(max_iter=1000), SVC(kernel='linear'), KNeighborsClassifier(),RandomForestClassifier()]

In [92]:
def compare_models_cross_validation_score():

  for model in models:

    cv_score= cross_val_score(model,X, y,cv=5)

    mean_accuracy= sum(cv_score)/len(cv_score )

    mean_accuracy = mean_accuracy *100

    mean_accuracy= round(mean_accuracy,2)

    print('Cross validation score: ',model, '=', cv_score)
    print('Accuracy percentage: ',model, '=', mean_accuracy)
    print('--------------------------------------------------------------------------')



In [93]:
compare_models_cross_validation_score()

Cross validation score:  LogisticRegression(max_iter=1000) = [0.80327869 0.86885246 0.85245902 0.86666667 0.75      ]
Accuracy percentage:  LogisticRegression(max_iter=1000) = 82.83
--------------------------------------------------------------------------
Cross validation score:  SVC(kernel='linear') = [0.81967213 0.8852459  0.80327869 0.86666667 0.76666667]
Accuracy percentage:  SVC(kernel='linear') = 82.83
--------------------------------------------------------------------------
Cross validation score:  KNeighborsClassifier() = [0.60655738 0.6557377  0.57377049 0.73333333 0.65      ]
Accuracy percentage:  KNeighborsClassifier() = 64.39
--------------------------------------------------------------------------
Cross validation score:  RandomForestClassifier() = [0.81967213 0.8852459  0.81967213 0.81666667 0.78333333]
Accuracy percentage:  RandomForestClassifier() = 82.49
--------------------------------------------------------------------------
