In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold

%matplotlib inline

In [2]:
df = pd.read_csv("D:\corona\heart_Disease\heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [4]:
df['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

In [5]:
model = []

svc_model = Pipeline([('m', SVC())])
model.append(('SVM_Classification',svc_model))

logistic_reg = Pipeline([('m', LogisticRegression())])
model.append(('Logistic_Regression',logistic_reg))

decision_tree = Pipeline([('m', DecisionTreeClassifier())])
model.append(('Decision_Tree',decision_tree))

random_forest = Pipeline([('m', RandomForestClassifier())])
model.append(('Random_Forest',decision_tree))


ensemble = VotingClassifier(estimators=model, voting='hard')

In [6]:
model

[('SVM_Classification', Pipeline(steps=[('m', SVC())])),
 ('Logistic_Regression', Pipeline(steps=[('m', LogisticRegression())])),
 ('Decision_Tree', Pipeline(steps=[('m', DecisionTreeClassifier())])),
 ('Random_Forest', Pipeline(steps=[('m', DecisionTreeClassifier())]))]

In [7]:
# Combination of all the models
ensemble

VotingClassifier(estimators=[('SVM_Classification',
                              Pipeline(steps=[('m', SVC())])),
                             ('Logistic_Regression',
                              Pipeline(steps=[('m', LogisticRegression())])),
                             ('Decision_Tree',
                              Pipeline(steps=[('m',
                                               DecisionTreeClassifier())])),
                             ('Random_Forest',
                              Pipeline(steps=[('m',
                                               DecisionTreeClassifier())]))])

In [8]:
# Using RepeatedStratifiedKFold For proper train test splits

X = df.drop('target',axis=1)
y = df['target']

cv = RepeatedStratifiedKFold(n_splits=101, n_repeats=3)
n_score = cross_val_score(ensemble, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

In [9]:
n_score

array([0.66666667, 1.        , 0.66666667, 0.66666667, 0.66666667,
       1.        , 1.        , 1.        , 0.66666667, 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.66666667,
       0.33333333, 0.66666667, 1.        , 0.66666667, 0.66666667,
       0.66666667, 0.66666667, 1.        , 1.        , 0.66666667,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 0.66666667, 0.66666667,
       1.        , 0.33333333, 1.        , 1.        , 0.66666667,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       0.66666667, 0.33333333, 1.        , 1.        , 0.66666667,
       0.66666667, 0.66666667, 1.        , 1.        , 0.66666667,
       1.        , 0.33333333, 0.66666667, 0.66666667, 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 0.33333333, 1.        , 0.66666667,
       1.        , 0.66666667, 0.66666667, 0.66666667, 1.     

In [10]:
n_score.mean()

0.8074807480748075