In [1]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

from sklearn import metrics

In [2]:
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
y = data.output

X = data.drop('output', axis = 1)

In [4]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, random_state = 23)

# Before Scaling

## DecisionTree

In [5]:
model_dt = DecisionTreeClassifier(random_state = 23)
model_dt.fit(X_train, y_train)

preds_dt = model_dt.predict(X_valid)
acc_score_dt = metrics.accuracy_score(preds_dt, y_valid)

print('DecisionTree')
print(f'> Acc: {acc_score_dt}')

DecisionTree
> Acc: 0.8032786885245902


## RandomForest

In [6]:
model_rf = RandomForestClassifier(random_state = 23)
model_rf.fit(X_train, y_train)

preds_rf = model_rf.predict(X_valid)
acc_score_rf = metrics.accuracy_score(preds_rf, y_valid)

print('RandomForest')
print(f'> Acc: {acc_score_rf}')

RandomForest
> Acc: 0.8032786885245902


## KNeighbors

In [7]:
model_knn = KNeighborsClassifier()
model_knn.fit(X_train, y_train)

preds_knn = model_knn.predict(X_valid)
acc_score_knn = metrics.accuracy_score(preds_knn, y_valid)

print('KNeighbors')
print(f'> Acc: {acc_score_knn}')

KNeighbors
> Acc: 0.7049180327868853


# After Scaling

## DecisionTree

In [8]:
model_dt = DecisionTreeClassifier(random_state = 11)

clf_dt = Pipeline(steps =[('preprocessor', StandardScaler()),
                               ('model', model_dt)])
clf_dt.fit(X_train, y_train)

Pipeline(steps=[('preprocessor', StandardScaler()),
                ('model', DecisionTreeClassifier(random_state=11))])

In [9]:
preds_dt = clf_dt.predict(X_valid)
acc_score_dt = metrics.accuracy_score(preds_dt, y_valid)

print('DecisionTree')
print(f'> Acc: {acc_score_dt}')

DecisionTree
> Acc: 0.7704918032786885


## RandomForest

In [10]:
model_rf = RandomForestClassifier(random_state = 11)

clf_rf = Pipeline(steps =[('preprocessor', StandardScaler()),
                               ('model', model_rf)])
clf_rf.fit(X_train, y_train)

Pipeline(steps=[('preprocessor', StandardScaler()),
                ('model', RandomForestClassifier(random_state=11))])

In [11]:
preds_rf = clf_rf.predict(X_valid)
acc_score_rf = metrics.accuracy_score(preds_rf, y_valid)

print('RandomForest')
print(f'> Acc: {acc_score_rf}')

RandomForest
> Acc: 0.7868852459016393


## KNeighbors

In [12]:
model_knn = KNeighborsClassifier()

clf_knn = Pipeline(steps =[('preprocessor', StandardScaler()),
                               ('model', model_knn)])
clf_knn.fit(X_train, y_train)

Pipeline(steps=[('preprocessor', StandardScaler()),
                ('model', KNeighborsClassifier())])

In [13]:
preds_knn = clf_knn.predict(X_valid)
acc_score_knn = metrics.accuracy_score(preds_knn, y_valid)

print('KNeighbors')
print(f'> Acc: {acc_score_knn}')

RandomForest
> Acc: 0.8360655737704918


With scaled features, KNeighborsClassifier has outperformed other models used

In [1]:
# neighbors = [2, 3, 4, 5, 6, 7, 10, 13, 15, 18, 20]

# for k in neighbors:
#     model_knn = KNeighborsClassifier(%xdel)
#     clf_knn = Pipeline(steps =[('preprocessor', StandardScaler()),
#                                ('model', model_knn)])
    
#     clf_knn.fit(X_train, y_tra
    
#     preds_knn = clf_knn.predict(X_valid)
#     acc = metrics.accuracy_score(preds_knn, y_valid)
    
#     print(f'KNeighbors (K) > {k}')
#     print(f'> Acc: {acc}')