In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [22]:
df = pd.read_csv('heart.csv')

df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


### Q1

In [23]:
df.groupby('target').size()

target
0    499
1    526
dtype: int64

In [24]:
X = df.values[:, :13] # not target
Y = df.values[:, 13] # target

In [25]:
test_size = 0.2
random_seed = 8

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=random_seed)

display(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

(820, 13)

(820,)

(205, 13)

(205,)

In [26]:
accs = []
for i in range(1, 251):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, Y_train)
    predictions = knn.predict(X_test)

    accs.append(accuracy_score(Y_test, predictions))

npaccs = np.array(accs)
npaccs.sort()
print("MAX ACCURACY: ", npaccs[-1])
print("MIN ACCURACY: ", npaccs[0])

MAX ACCURACY:  1.0
MIN ACCURACY:  0.5853658536585366


### Q2

In [27]:
accs = []
for random_seed in range(1, 11):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=random_seed)

    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, Y_train)
    predictions = knn.predict(X_test)

    accs.append(accuracy_score(Y_test, predictions))

npaccs = np.array(accs)
npaccs.sort()
display(accs)
print("MAX ACCURACY: ", npaccs[-1])
print("MIN ACCURACY: ", npaccs[0])

[0.9317073170731708,
 0.8780487804878049,
 0.9219512195121952,
 0.9121951219512195,
 0.9365853658536586,
 0.9121951219512195,
 0.9024390243902439,
 0.8975609756097561,
 0.9365853658536586,
 0.8585365853658536]

MAX ACCURACY:  0.9365853658536586
MIN ACCURACY:  0.8585365853658536


### Q3

In [None]:
df = pd.read_csv('derma.csv')
df = df.dropna()

X = df.values[:, :35]
y = df.values[:, 35]

display(X.shape, y.shape)

(358, 35)

(358,)

In [None]:
kf = KFold(10, shuffle=True, random_state=727)

for i, (train_index, test_index) in enumerate(kf.split(X)):
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X[train_index], y[train_index])
    predictions = knn.predict(X[test_index])
    print(f'Iteration {i+1}:\nConfusion Matrix')
    display(confusion_matrix(y[test_index], predictions))


Iteration 1:
Confusion Matrix


array([[11,  0,  0,  0,  0,  0],
       [ 4,  3,  1,  0,  0,  0],
       [ 0,  2,  2,  0,  0,  0],
       [ 2,  2,  0,  1,  0,  0],
       [ 2,  1,  0,  0,  2,  0],
       [ 1,  0,  0,  0,  0,  2]], dtype=int64)

Iteration 2:
Confusion Matrix


array([[10,  1,  0,  1,  0,  0],
       [ 3,  2,  1,  0,  0,  0],
       [ 2,  1,  2,  1,  0,  0],
       [ 1,  0,  2,  1,  0,  0],
       [ 2,  1,  2,  0,  2,  0],
       [ 1,  0,  0,  0,  0,  0]], dtype=int64)

Iteration 3:
Confusion Matrix


array([[9, 0, 0, 0, 2, 0],
       [1, 2, 0, 1, 0, 0],
       [3, 2, 5, 2, 0, 0],
       [1, 1, 0, 2, 0, 0],
       [2, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 1]], dtype=int64)

Iteration 4:
Confusion Matrix


array([[8, 0, 0, 0, 2, 0],
       [5, 4, 0, 0, 0, 0],
       [2, 2, 4, 0, 0, 0],
       [0, 4, 0, 0, 0, 0],
       [0, 3, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 1]], dtype=int64)

Iteration 5:
Confusion Matrix


array([[3, 3, 1, 0, 0, 0],
       [3, 3, 1, 0, 0, 0],
       [1, 2, 4, 1, 0, 0],
       [1, 2, 1, 2, 0, 0],
       [4, 2, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1]], dtype=int64)

Iteration 6:
Confusion Matrix


array([[4, 4, 1, 2, 0, 2],
       [3, 0, 0, 0, 0, 0],
       [2, 0, 3, 2, 0, 0],
       [1, 0, 0, 2, 0, 0],
       [2, 2, 0, 1, 1, 0],
       [2, 1, 0, 1, 0, 0]], dtype=int64)

Iteration 7:
Confusion Matrix


array([[11,  0,  0,  1,  2,  0],
       [ 2,  2,  1,  0,  1,  0],
       [ 4,  1,  1,  0,  1,  0],
       [ 1,  0,  0,  1,  1,  0],
       [ 3,  2,  0,  0,  0,  0],
       [ 0,  1,  0,  0,  0,  0]], dtype=int64)

Iteration 8:
Confusion Matrix


array([[4, 3, 0, 1, 0, 0],
       [4, 3, 0, 0, 0, 0],
       [2, 1, 2, 0, 0, 0],
       [5, 1, 0, 3, 1, 0],
       [2, 2, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0]], dtype=int64)

Iteration 9:
Confusion Matrix


array([[7, 4, 0, 2, 0],
       [4, 1, 0, 0, 1],
       [2, 2, 1, 0, 0],
       [5, 0, 0, 0, 1],
       [2, 1, 0, 0, 2]], dtype=int64)

Iteration 10:
Confusion Matrix


array([[9, 2, 0, 0, 1, 0],
       [1, 2, 0, 1, 0, 0],
       [2, 2, 5, 0, 0, 0],
       [2, 0, 1, 0, 0, 0],
       [1, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 4]], dtype=int64)