In [41]:
# import libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [42]:
# import data from url

url = 'https://raw.githubusercontent.com/akaakselabhijeet/Data-Science-Datasets-Ver.01/main/Health/FILE_heart.csv'
heart = pd.read_csv(url)

print(heart.tail(6))

     age  sex  cp  trestbps  chol  fbs  ...  exang  oldpeak  slope  ca  thal  target
297   59    1   0       164   176    1  ...      0      1.0      1   2     1       0
298   57    0   0       140   241    0  ...      1      0.2      1   0     3       0
299   45    1   3       110   264    0  ...      0      1.2      1   0     3       0
300   68    1   0       144   193    1  ...      0      3.4      1   2     3       0
301   57    1   0       130   131    0  ...      1      1.2      1   1     3       0
302   57    0   1       130   236    0  ...      0      0.0      1   1     2       0

[6 rows x 14 columns]


In [43]:
# seperate heart_features & heart_target

heart_feature = heart.iloc[:,0:13]
print(heart_feature.head(6))

   age  sex  cp  trestbps  chol  fbs  ...  thalach  exang  oldpeak  slope  ca  thal
0   63    1   3       145   233    1  ...      150      0      2.3      0   0     1
1   37    1   2       130   250    0  ...      187      0      3.5      0   0     2
2   41    0   1       130   204    0  ...      172      0      1.4      2   0     2
3   56    1   1       120   236    0  ...      178      0      0.8      2   0     2
4   57    0   0       120   354    0  ...      163      1      0.6      2   0     2
5   57    1   0       140   192    0  ...      148      0      0.4      1   0     1

[6 rows x 13 columns]


In [44]:
heart_target = heart.iloc[:,13]
print(heart_target.head(6))

0    1
1    1
2    1
3    1
4    1
5    1
Name: target, dtype: int64


In [48]:
# split dataset in 0.25 test and 0.75 train

feature_train, feature_test, target_train, target_test = train_test_split(heart_feature, heart_target, test_size=0.25, train_size=0.75)


In [49]:
# standard scaler method application

obj = StandardScaler()
train_standardization = obj.fit_transform(feature_train)
print(train_standardization[0:7])

[[ 1.32415907 -1.43790388 -0.95079097  2.77831573 -0.36432678  2.46855221
   0.9699242   0.68823338  1.45244244  0.01438117 -0.71965503  1.23960494
   1.15626574]
 [-0.03749035  0.69545678  1.02025515 -0.35086575  0.47861728 -0.40509575
  -0.99590432  0.10377965 -0.68849544 -0.41516162 -2.38661108  0.25915086
  -0.53710409]
 [ 0.64333436  0.69545678 -0.95079097 -0.05565995  0.10397548 -0.40509575
   0.9699242  -0.25588418  1.45244244  0.3580154   0.94730101  0.25915086
   1.15626574]
 [ 0.64333436 -1.43790388 -0.95079097  1.12516325  0.19763593 -0.40509575
  -0.99590432  0.32856955 -0.68849544  1.38891811 -0.71965503  1.23960494
   1.15626574]
 [-1.51261055  0.69545678  1.02025515 -1.11840083  0.04777921 -0.40509575
   0.9699242   1.31764509 -0.68849544 -0.84470442  0.94730101 -0.72130322
  -0.53710409]
 [ 0.18945122  0.69545678  2.00577821 -0.64607155 -1.01994993 -0.40509575
  -0.99590432  0.55335944 -0.68849544  0.7875582  -0.71965503 -0.72130322
   1.15626574]
 [-0.26443192  0.69545

In [50]:
test_standardization = obj.fit_transform(feature_test)
print(test_standardization[0:3])

[[ 0.8712322   0.63828474  1.88365993  0.59787863 -0.2098958   2.20139816
  -1.0459461   0.0197601  -0.72111026  0.96830821 -2.00625306 -0.69413557
  -1.94642189]
 [ 0.46250598  0.63828474  1.88365993  2.33741413  0.58878753 -0.45425676
  -1.0459461  -0.18318147 -0.72111026  2.65293117 -2.00625306 -0.69413557
   1.04202384]
 [-0.96803578 -1.5666989   0.02446312 -0.19281933 -0.18830976 -0.45425676
  -1.0459461   1.03446795 -0.72111026 -0.53898602 -0.4660992  -0.69413557
  -0.45219903]]


In [51]:
# use KNN classifier

classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
classifier = classifier.fit(feature_train, target_train)
print(classifier.predict(feature_test))

[1 0 1 0 1 0 1 0 1 0 0 0 1 1 1 0 0 1 0 0 1 1 1 1 0 1 1 0 1 0 1 1 1 0 1 1 0
 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 0 1 0 0 1 1 0 1 1 1 0
 1 0]


In [52]:
# derive accuracy scores

predicted_heart_target = classifier.predict(heart_feature)
print(predicted_heart_target[0:10])

[1 1 1 1 0 0 1 1 0 1]


In [53]:
accuracy = metrics.accuracy_score(heart_target, predicted_heart_target)
print(accuracy)

0.7095709570957096


In [54]:
classifier = KNeighborsClassifier(n_neighbors=4, metric='minkowski', p=2)
classifier = classifier.fit(feature_train, target_train)
predicted_heart_target = classifier.predict(heart_feature)
accuracy = metrics.accuracy_score(heart_target, predicted_heart_target)
print(accuracy)

0.7095709570957096


In [55]:
classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
classifier = classifier.fit(feature_train, target_train)
predicted_heart_target = classifier.predict(heart_feature)
accuracy = metrics.accuracy_score(heart_target, predicted_heart_target)
print(accuracy)

0.7095709570957096


In [56]:
classifier = KNeighborsClassifier(n_neighbors=6, metric='minkowski', p=2)
classifier = classifier.fit(feature_train, target_train)
predicted_heart_target = classifier.predict(heart_feature)
accuracy = metrics.accuracy_score(heart_target, predicted_heart_target)
print(accuracy)

0.7029702970297029


In [57]:
classifier = KNeighborsClassifier(n_neighbors=7, metric='minkowski', p=2)
classifier = classifier.fit(feature_train, target_train)
predicted_heart_target = classifier.predict(heart_feature)
accuracy = metrics.accuracy_score(heart_target, predicted_heart_target)
print(accuracy)

0.6996699669966997


In [58]:
# THE END
# Abhijeet Das - DS Assignment 10