In [2]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import DistanceMetric
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
warnings.filterwarnings('ignore')

In [3]:
dataset = pd.read_csv('../data/heart_disease_dataset_UCI.csv')
dataset.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
standardScalar = StandardScaler()
target_columns = ['age','trestbps','chol','thalach','oldpeak']
dataset[target_columns] = standardScalar.fit_transform(dataset[target_columns])
dataset.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0.952197,1,3,0.763956,-0.256334,1,0,0.015443,0,1.087338,0,0,1,1
1,-1.915313,1,2,-0.092738,0.072199,0,1,1.633471,0,2.122573,0,0,2,1
2,-1.474158,0,1,-0.092738,-0.816773,0,0,0.977514,0,0.310912,2,0,2,1
3,0.180175,1,1,-0.663867,-0.198357,0,1,1.239897,0,-0.206705,2,0,2,1
4,0.290464,0,0,-0.663867,2.08205,0,1,0.583939,1,-0.379244,2,0,2,1


In [5]:
#Dividing into input and output
X= dataset.drop(['target'], axis=1)
Y= dataset['target']
X_train, X_test, Y_train, Y_test= train_test_split(X, Y, test_size=0.25, random_state=40)

In [6]:
n_neighbors = 1
nca = NeighborhoodComponentsAnalysis()
pca = PCA()
lda = LinearDiscriminantAnalysis()
dimreduc = [("PCA", pca), ("LDA", lda), ("NCA", nca)]
functions = ['manhattan','euclidean','chebyshev']

In [7]:
for dim, (name,model) in enumerate(dimreduc):    
    for distfunc in functions:
        for i in range(0,9):
            knn = KNeighborsClassifier(n_neighbors=n_neighbors + i, metric=distfunc)
            sfs = SequentialFeatureSelector(knn,scoring='accuracy')
            sfs.fit(X_train,Y_train)
            model.fit(sfs.transform(X_train),Y_train)
            knnmodel = knn.fit(model.transform(sfs.transform(X_train)), Y_train)
            knnpredict = knnmodel.predict(model.transform(sfs.transform(X_test)))
            cmknn = confusion_matrix(Y_test, knnpredict)
            print(name, "|", distfunc, "| K Neighbors =", str(i+1),'\n', cmknn)
            features = sfs.get_support()
            print("Selected Features: ")
            for j in range(len(features)):
                if (features[j] == True):
                    print(str(j+1), end = " ")
            print("\nAccuracy=", accuracy_score(Y_test, knnpredict),"\n")

PCA | manhattan | K Neighbors = 1 
 [[31  3]
 [ 8 34]]
Selected Features: 
2 6 7 9 10 12 
Accuracy= 0.8552631578947368 

PCA | manhattan | K Neighbors = 2 
 [[29  5]
 [19 23]]
Selected Features: 
2 3 4 6 12 13 
Accuracy= 0.6842105263157895 

PCA | manhattan | K Neighbors = 3 
 [[27  7]
 [ 7 35]]
Selected Features: 
2 3 9 10 12 13 
Accuracy= 0.8157894736842105 

PCA | manhattan | K Neighbors = 4 
 [[29  5]
 [ 9 33]]
Selected Features: 
2 6 9 10 12 13 
Accuracy= 0.8157894736842105 

PCA | manhattan | K Neighbors = 5 
 [[27  7]
 [ 5 37]]
Selected Features: 
3 6 7 10 12 13 
Accuracy= 0.8421052631578947 

PCA | manhattan | K Neighbors = 6 
 [[29  5]
 [ 6 36]]
Selected Features: 
3 9 10 11 12 13 
Accuracy= 0.8552631578947368 

PCA | manhattan | K Neighbors = 7 
 [[28  6]
 [ 4 38]]
Selected Features: 
2 3 6 10 12 13 
Accuracy= 0.868421052631579 

PCA | manhattan | K Neighbors = 8 
 [[28  6]
 [ 7 35]]
Selected Features: 
2 3 6 10 12 13 
Accuracy= 0.8289473684210527 

PCA | manhattan | K Neighb

ValueError: unknown solver 5 (valid solvers are 'svd', 'lsqr', and 'eigen').

In [8]:
for distfunc in functions:
    for i in range(0,9):
        knn = KNeighborsClassifier(n_neighbors=n_neighbors + i, metric=distfunc)
        sfs = SequentialFeatureSelector(knn,scoring='accuracy')
        sfs.fit(X_train,Y_train)
        knnmodel = knn.fit(sfs.transform(X_train), Y_train)
        knnpredict = knnmodel.predict(sfs.transform(X_test))
        cmknn = confusion_matrix(Y_test, knnpredict)
        print("No Dim Reduce |", distfunc, "| K Neighbors =", str(i+1),'\n', cmknn)
        features = sfs.get_support()
        print("Selected Features: ")
        for j in range(len(features)):
            if (features[j] == True):
                print(str(j+1), end = " ")
        print("\nAccuracy=", accuracy_score(Y_test, knnpredict),"\n")


No Dim Reduce | manhattan | K Neighbors = 1 
 [[27  7]
 [ 8 34]]
Selected Features: 
2 6 7 9 10 12 
Accuracy= 0.8026315789473685 

No Dim Reduce | manhattan | K Neighbors = 2 
 [[29  5]
 [14 28]]
Selected Features: 
2 3 4 6 12 13 
Accuracy= 0.75 

No Dim Reduce | manhattan | K Neighbors = 3 
 [[28  6]
 [ 7 35]]
Selected Features: 
2 3 9 10 12 13 
Accuracy= 0.8289473684210527 

No Dim Reduce | manhattan | K Neighbors = 4 
 [[29  5]
 [11 31]]
Selected Features: 
2 6 9 10 12 13 
Accuracy= 0.7894736842105263 

No Dim Reduce | manhattan | K Neighbors = 5 
 [[27  7]
 [ 4 38]]
Selected Features: 
3 6 7 10 12 13 
Accuracy= 0.8552631578947368 

No Dim Reduce | manhattan | K Neighbors = 6 
 [[27  7]
 [ 4 38]]
Selected Features: 
3 9 10 11 12 13 
Accuracy= 0.8552631578947368 

No Dim Reduce | manhattan | K Neighbors = 7 
 [[27  7]
 [ 3 39]]
Selected Features: 
2 3 6 10 12 13 
Accuracy= 0.868421052631579 

No Dim Reduce | manhattan | K Neighbors = 8 
 [[28  6]
 [ 5 37]]
Selected Features: 
2 3 6 1

In [12]:
knn = KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knnmodel = knn.fit(X_train, Y_train)
knnpredict = knnmodel.predict(X_test)
cmknn = confusion_matrix(Y_test, knnpredict)
print("No Dim Reduce |", 'mahattan', "| K Neighbors = 5",'\n', cmknn)
print("\nAccuracy=", accuracy_score(Y_test, knnpredict),"\n")


No Dim Reduce | mahattan | K Neighbors = 5 
 [[29  5]
 [ 3 39]]

Accuracy= 0.8947368421052632 

