# Vector classification using K Nearest Neighbors

## Import libraries

In [1]:
import numpy as np
from sklearn import preprocessing, model_selection, neighbors
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd
import time
import seaborn as sns

  import pandas.util.testing as tm


## Define timer

In [2]:
def tic():
    #Homemade version of matlab tic and toc functions
    import time
    global startTime_for_tictoc
    startTime_for_tictoc = time.time()

def toc():
    import time
    if 'startTime_for_tictoc' in globals():
        print ("Elapsed time is " + str(time.time() - startTime_for_tictoc) + " seconds.")
        return (time.time() - startTime_for_tictoc)
    else:
        print ("Toc: start time not set")

## Load and prepare data

In [3]:
df = pd.read_csv("s_HR_13scales_normalized.csv", header = None)

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,1,1,1.0,0.83834,0.63194,0.44754,0.35949,0.30002,0.26471,0.22864,...,0.15009,0.14823,0.13442,0.13008,0.12222,0.12426,0.11809,0.12339,0.11559,0.12513
1,1,2,1.0,0.8112,0.60588,0.45352,0.35654,0.30176,0.26145,0.22649,...,0.14249,0.1374,0.12752,0.12083,0.1138,0.11548,0.11146,0.11546,0.10528,0.1105
2,1,3,1.0,0.78503,0.58087,0.43953,0.35023,0.28822,0.24835,0.21731,...,0.13782,0.13571,0.11863,0.11758,0.1088,0.10933,0.1084,0.11009,0.10141,0.10618
3,1,4,1.0,0.7561,0.56667,0.41741,0.33932,0.27868,0.24637,0.2137,...,0.13724,0.13285,0.11877,0.11588,0.10263,0.10542,0.10119,0.10536,0.09909,0.10216
4,1,5,1.0,0.7274,0.54285,0.39605,0.32586,0.26886,0.23673,0.20496,...,0.13073,0.12641,0.11716,0.10908,0.099378,0.098337,0.095447,0.10013,0.091677,0.096647


In [5]:
df.columns = ["ID", "SCALE", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19", "R20",  "R21", "R22", "R23", "R24", "R25", "R26"]

In [6]:
df.head()

Unnamed: 0,ID,SCALE,R1,R2,R3,R4,R5,R6,R7,R8,...,R17,R18,R19,R20,R21,R22,R23,R24,R25,R26
0,1,1,1.0,0.83834,0.63194,0.44754,0.35949,0.30002,0.26471,0.22864,...,0.15009,0.14823,0.13442,0.13008,0.12222,0.12426,0.11809,0.12339,0.11559,0.12513
1,1,2,1.0,0.8112,0.60588,0.45352,0.35654,0.30176,0.26145,0.22649,...,0.14249,0.1374,0.12752,0.12083,0.1138,0.11548,0.11146,0.11546,0.10528,0.1105
2,1,3,1.0,0.78503,0.58087,0.43953,0.35023,0.28822,0.24835,0.21731,...,0.13782,0.13571,0.11863,0.11758,0.1088,0.10933,0.1084,0.11009,0.10141,0.10618
3,1,4,1.0,0.7561,0.56667,0.41741,0.33932,0.27868,0.24637,0.2137,...,0.13724,0.13285,0.11877,0.11588,0.10263,0.10542,0.10119,0.10536,0.09909,0.10216
4,1,5,1.0,0.7274,0.54285,0.39605,0.32586,0.26886,0.23673,0.20496,...,0.13073,0.12641,0.11716,0.10908,0.099378,0.098337,0.095447,0.10013,0.091677,0.096647


In [7]:
df.tail()

Unnamed: 0,ID,SCALE,R1,R2,R3,R4,R5,R6,R7,R8,...,R17,R18,R19,R20,R21,R22,R23,R24,R25,R26
28075,30,9,0.86701,1.0,0.78027,0.58748,0.47165,0.35351,0.26793,0.26124,...,0.15831,0.14129,0.13278,0.13247,0.12342,0.1128,0.10999,0.10849,0.10986,0.10476
28076,30,10,0.87999,1.0,0.7638,0.58889,0.46534,0.33954,0.27131,0.23735,...,0.14738,0.132,0.13399,0.1217,0.11796,0.1025,0.09132,0.097323,0.084752,0.083143
28077,30,11,0.88829,1.0,0.73748,0.58052,0.46266,0.31487,0.26318,0.23089,...,0.14788,0.12299,0.11561,0.10655,0.10281,0.098136,0.090488,0.090421,0.078053,0.072611
28078,30,12,0.93806,1.0,0.74012,0.60335,0.47222,0.33932,0.25705,0.25911,...,0.13215,0.11187,0.10611,0.099756,0.091331,0.086061,0.08292,0.08351,0.073841,0.070281
28079,30,13,0.97991,1.0,0.74809,0.60453,0.50307,0.34561,0.27131,0.24386,...,0.12876,0.11808,0.10087,0.092633,0.089685,0.082419,0.070821,0.072263,0.067838,0.066389


# Scale filtering

In [8]:
scale=[1,2,3,4,5,6,7,8,9,10,11,12,13]
df=df[df['SCALE'].isin(scale)]
df.head(20)

Unnamed: 0,ID,SCALE,R1,R2,R3,R4,R5,R6,R7,R8,...,R17,R18,R19,R20,R21,R22,R23,R24,R25,R26
0,1,1,1.0,0.83834,0.63194,0.44754,0.35949,0.30002,0.26471,0.22864,...,0.15009,0.14823,0.13442,0.13008,0.12222,0.12426,0.11809,0.12339,0.11559,0.12513
1,1,2,1.0,0.8112,0.60588,0.45352,0.35654,0.30176,0.26145,0.22649,...,0.14249,0.1374,0.12752,0.12083,0.1138,0.11548,0.11146,0.11546,0.10528,0.1105
2,1,3,1.0,0.78503,0.58087,0.43953,0.35023,0.28822,0.24835,0.21731,...,0.13782,0.13571,0.11863,0.11758,0.1088,0.10933,0.1084,0.11009,0.10141,0.10618
3,1,4,1.0,0.7561,0.56667,0.41741,0.33932,0.27868,0.24637,0.2137,...,0.13724,0.13285,0.11877,0.11588,0.10263,0.10542,0.10119,0.10536,0.09909,0.10216
4,1,5,1.0,0.7274,0.54285,0.39605,0.32586,0.26886,0.23673,0.20496,...,0.13073,0.12641,0.11716,0.10908,0.099378,0.098337,0.095447,0.10013,0.091677,0.096647
5,1,6,1.0,0.70846,0.5186,0.38736,0.3166,0.26083,0.23003,0.20261,...,0.12587,0.12324,0.11068,0.10673,0.095962,0.095636,0.092344,0.09512,0.087222,0.090701
6,1,7,1.0,0.68534,0.50515,0.37235,0.31032,0.25364,0.22148,0.19402,...,0.12007,0.11758,0.10604,0.10064,0.092976,0.092697,0.089031,0.089484,0.082588,0.084054
7,1,8,1.0,0.66026,0.48921,0.35655,0.29938,0.25146,0.21378,0.18918,...,0.11542,0.11363,0.10222,0.096165,0.087812,0.08645,0.082171,0.083787,0.076369,0.080076
8,1,9,1.0,0.63601,0.46835,0.34657,0.29444,0.2409,0.21021,0.18362,...,0.11184,0.10745,0.097413,0.091695,0.084708,0.080096,0.077492,0.078325,0.071922,0.073533
9,1,10,1.0,0.60507,0.45341,0.34304,0.28301,0.23566,0.20265,0.18007,...,0.10688,0.10309,0.09321,0.088423,0.078812,0.074338,0.072468,0.069985,0.066502,0.065722


## Feature and target columns

In [9]:
Y = df["ID"]
X = df[["R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19", "R20",  "R21", "R22", "R23", "R24", "R25", "R26"]]

In [10]:
X.head()

Unnamed: 0,R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,...,R17,R18,R19,R20,R21,R22,R23,R24,R25,R26
0,1.0,0.83834,0.63194,0.44754,0.35949,0.30002,0.26471,0.22864,0.22625,0.22745,...,0.15009,0.14823,0.13442,0.13008,0.12222,0.12426,0.11809,0.12339,0.11559,0.12513
1,1.0,0.8112,0.60588,0.45352,0.35654,0.30176,0.26145,0.22649,0.21556,0.21654,...,0.14249,0.1374,0.12752,0.12083,0.1138,0.11548,0.11146,0.11546,0.10528,0.1105
2,1.0,0.78503,0.58087,0.43953,0.35023,0.28822,0.24835,0.21731,0.21154,0.20933,...,0.13782,0.13571,0.11863,0.11758,0.1088,0.10933,0.1084,0.11009,0.10141,0.10618
3,1.0,0.7561,0.56667,0.41741,0.33932,0.27868,0.24637,0.2137,0.20578,0.20492,...,0.13724,0.13285,0.11877,0.11588,0.10263,0.10542,0.10119,0.10536,0.09909,0.10216
4,1.0,0.7274,0.54285,0.39605,0.32586,0.26886,0.23673,0.20496,0.19843,0.1979,...,0.13073,0.12641,0.11716,0.10908,0.099378,0.098337,0.095447,0.10013,0.091677,0.096647


In [11]:
Y.head()

0    1
1    1
2    1
3    1
4    1
Name: ID, dtype: int64

## KNN classiffier


In [12]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3)

In [13]:
clf = neighbors.KNeighborsClassifier() #create model
clf.fit(X_train, Y_train) #train model

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [14]:
accuracy = clf.score(X_test, Y_test)
accuracy

1.0

## Test

In [15]:
index_test=8500
sample_measure = np.array(X.iloc[index_test])

In [16]:
sample_measure = sample_measure.reshape(1,-1)

In [17]:
predict = clf.predict(sample_measure)

In [18]:
print("The input has the ID: " + str(predict[0]))

The input has the ID: 10


## Compute mean accuracy

In [19]:
accuracy=[]
for i in range(0, 30):
    X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3)
    clf = neighbors.KNeighborsClassifier() #create model
    clf.fit(X_train, Y_train) #train model
    accuracy.append(clf.score(X_test, Y_test))
    print('Cycle: ' + str(i) + ' | Accuracy: ' + str(clf.score(X_test, Y_test)))

Cycle: 0 | Accuracy: 1.0
Cycle: 1 | Accuracy: 1.0
Cycle: 2 | Accuracy: 1.0
Cycle: 3 | Accuracy: 1.0
Cycle: 4 | Accuracy: 1.0
Cycle: 5 | Accuracy: 1.0
Cycle: 6 | Accuracy: 1.0
Cycle: 7 | Accuracy: 1.0
Cycle: 8 | Accuracy: 1.0
Cycle: 9 | Accuracy: 1.0
Cycle: 10 | Accuracy: 1.0
Cycle: 11 | Accuracy: 1.0
Cycle: 12 | Accuracy: 1.0
Cycle: 13 | Accuracy: 1.0
Cycle: 14 | Accuracy: 1.0
Cycle: 15 | Accuracy: 1.0
Cycle: 16 | Accuracy: 1.0
Cycle: 17 | Accuracy: 1.0
Cycle: 18 | Accuracy: 1.0
Cycle: 19 | Accuracy: 1.0
Cycle: 20 | Accuracy: 1.0
Cycle: 21 | Accuracy: 1.0
Cycle: 22 | Accuracy: 1.0
Cycle: 23 | Accuracy: 1.0
Cycle: 24 | Accuracy: 1.0
Cycle: 25 | Accuracy: 1.0
Cycle: 26 | Accuracy: 1.0
Cycle: 27 | Accuracy: 1.0
Cycle: 28 | Accuracy: 1.0
Cycle: 29 | Accuracy: 1.0


In [20]:
#Mean accuracy
print("Mean accuracy is: " + str(round(np.mean(accuracy)*100, 2)) + "%")

Mean accuracy is: 100.0%
