In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import sklearn
from tensorflow import keras
import matplotlib.pyplot as plt
import seaborn as sns
import PIL
import cv2
import os
%matplotlib inline

In [28]:
df = pd.read_csv("/content/heart.csv")
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [30]:
def unique_val(df):
  for col in df.columns:
    if df[col].dtypes == "object":
      print(f'{col} : {df[col].unique()}')

unique_val(df)

Sex : ['M' 'F']
ChestPainType : ['ATA' 'NAP' 'ASY' 'TA']
RestingECG : ['Normal' 'ST' 'LVH']
ExerciseAngina : ['N' 'Y']
ST_Slope : ['Up' 'Flat' 'Down']


In [31]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
for col in df.columns:
  if df[col].dtypes == "object":
    df[col] = le.fit_transform(df[col])


In [33]:
df.dtypes

Unnamed: 0,0
Age,int64
Sex,int64
ChestPainType,int64
RestingBP,int64
Cholesterol,int64
FastingBS,int64
RestingECG,int64
MaxHR,int64
ExerciseAngina,int64
Oldpeak,float64


In [34]:
X = df.drop("HeartDisease",axis=1)
y = df["HeartDisease"]

In [35]:
from sklearn.preprocessing import MinMaxScaler

s = MinMaxScaler()

X = s.fit_transform(X)
X

array([[0.24489796, 1.        , 0.33333333, ..., 0.        , 0.29545455,
        1.        ],
       [0.42857143, 0.        , 0.66666667, ..., 0.        , 0.40909091,
        0.5       ],
       [0.18367347, 1.        , 0.33333333, ..., 0.        , 0.29545455,
        1.        ],
       ...,
       [0.59183673, 1.        , 0.        , ..., 1.        , 0.43181818,
        0.5       ],
       [0.59183673, 0.        , 0.33333333, ..., 0.        , 0.29545455,
        0.5       ],
       [0.20408163, 1.        , 0.66666667, ..., 0.        , 0.29545455,
        1.        ]])

In [36]:
X = pd.DataFrame(X,columns=df.columns[:-1])
X.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,0.244898,1.0,0.333333,0.7,0.47927,0.0,0.5,0.788732,0.0,0.295455,1.0
1,0.428571,0.0,0.666667,0.8,0.298507,0.0,0.5,0.676056,0.0,0.409091,0.5
2,0.183673,1.0,0.333333,0.65,0.46932,0.0,1.0,0.267606,0.0,0.295455,1.0
3,0.408163,0.0,0.0,0.69,0.354892,0.0,0.5,0.338028,1.0,0.465909,0.5
4,0.530612,1.0,0.666667,0.75,0.323383,0.0,0.5,0.43662,0.0,0.295455,1.0


In [37]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

In [38]:
##### SVM
from sklearn.svm import SVC

svc = SVC()
svc.fit(X_train,y_train)


In [39]:
svc.score(X_test,y_test)

0.8695652173913043

In [53]:
#### SVC with bagging
from sklearn.ensemble import BaggingClassifier

bag_svc = BaggingClassifier(
    estimator=SVC(C=1,kernel="rbf"),
    n_estimators=100,
    max_samples=0.8,
    oob_score=True,
    random_state=0
)

bag_svc.fit(X_train,y_train)
# bag_svc.oob_score_
from sklearn.model_selection import cross_val_score
scores = cross_val_score(bag_svc,X,y,cv=5)
scores.mean()

0.8245188880969352

In [41]:
bag_svc.score(X_test,y_test)

0.875

In [42]:
##### Decision Tree Standalone
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()
tree.fit(X_train,y_train)
tree.score(X_test,y_test)

0.7663043478260869

In [43]:
##### Decision tree with Bagging

bag_treee = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=20,
    max_samples=0.85,
    oob_score=True,
    random_state=0
)

bag_treee.fit(X_train,y_train)
bag_treee.oob_score_

0.829700272479564

In [44]:
bag_treee.score(X_test,y_test)

0.8315217391304348

In [45]:
from sklearn.model_selection import cross_val_score

scores_bag_tree = cross_val_score(bag_treee,X,y,cv=5)
scores_bag_tree.mean()

0.800576146353053

In [46]:
scores_bag_svc = cross_val_score(bag_svc,X,y,cv=5)
scores_bag_svc.mean()

0.8212639581848421

In [48]:
scores_svc = cross_val_score(SVC(),X,y,cv=5)
scores_svc.mean()

0.8354181515799478