# Section 1: Imports & Functions

In [1]:
# Library Import Statements

import pandas as pd
import numpy as np
from sklearn.preprocessing import normalize


In [2]:
# Data Import Cell
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00471/Data_for_UCI_named.csv"
data = pd.read_csv(url, sep=",")
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


# Section 2: 1st Analysis

In [3]:
my_data = data.head(5000)
my_data

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3.592940,8.130493,3.270724,8.075292,4.772430,-1.458529,-1.690274,-1.623627,0.210123,0.683625,0.159779,0.344948,0.011499,unstable
4996,7.656651,6.411748,5.655960,6.045712,3.193900,-0.918944,-1.377013,-0.897943,0.443617,0.490799,0.988511,0.788984,0.086436,unstable
4997,1.069438,6.581439,1.230892,9.937483,3.756828,-1.524720,-0.516322,-1.715786,0.501101,0.381437,0.773236,0.822149,-0.025390,stable
4998,4.629745,8.353612,8.119304,8.600847,2.960941,-1.424327,-0.890685,-0.645929,0.383265,0.241701,0.609101,0.756135,0.052382,unstable


In [4]:
# Convert Class names to numbers
# 0 = stable
# 1 = unstable

# assign values to the dependent variable
my_data.loc[(my_data['stabf'] == 'stable'), 'stabf'] = 0
my_data.loc[(my_data['stabf'] == 'unstable'), 'stabf'] = 1
my_data['stabf'] = pd.to_numeric(my_data['stabf'])
my_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  my_data['stabf'] = pd.to_numeric(my_data['stabf'])


Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,1
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,0
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,1
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,1
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3.592940,8.130493,3.270724,8.075292,4.772430,-1.458529,-1.690274,-1.623627,0.210123,0.683625,0.159779,0.344948,0.011499,1
4996,7.656651,6.411748,5.655960,6.045712,3.193900,-0.918944,-1.377013,-0.897943,0.443617,0.490799,0.988511,0.788984,0.086436,1
4997,1.069438,6.581439,1.230892,9.937483,3.756828,-1.524720,-0.516322,-1.715786,0.501101,0.381437,0.773236,0.822149,-0.025390,0
4998,4.629745,8.353612,8.119304,8.600847,2.960941,-1.424327,-0.890685,-0.645929,0.383265,0.241701,0.609101,0.756135,0.052382,1


In [5]:
# Split dataframe into testing and training sets
train_data = my_data.iloc[:3500]
test_data = my_data.iloc[3500:]
test_data

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
3500,7.592223,1.389509,7.285896,6.031028,4.901480,-1.809328,-1.536639,-1.555512,0.444016,0.469558,0.559769,0.170529,0.001593,1
3501,9.203739,3.031134,2.742088,7.318112,4.379546,-1.160296,-1.374819,-1.844431,0.322139,0.945597,0.786776,0.527852,0.015995,1
3502,4.016004,8.501127,5.345373,6.943443,2.952986,-0.535994,-1.375817,-1.041175,0.662382,0.300356,0.251775,0.559976,0.027832,1
3503,1.568860,2.338635,7.562933,0.931341,3.180046,-1.887828,-0.673774,-0.618444,0.321079,0.081876,0.298656,0.078052,-0.051548,0
3504,8.906023,4.356134,4.086695,1.396348,4.998403,-1.232689,-1.910843,-1.854870,0.143437,0.556369,0.929642,0.686855,0.007881,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3.592940,8.130493,3.270724,8.075292,4.772430,-1.458529,-1.690274,-1.623627,0.210123,0.683625,0.159779,0.344948,0.011499,1
4996,7.656651,6.411748,5.655960,6.045712,3.193900,-0.918944,-1.377013,-0.897943,0.443617,0.490799,0.988511,0.788984,0.086436,1
4997,1.069438,6.581439,1.230892,9.937483,3.756828,-1.524720,-0.516322,-1.715786,0.501101,0.381437,0.773236,0.822149,-0.025390,0
4998,4.629745,8.353612,8.119304,8.600847,2.960941,-1.424327,-0.890685,-0.645929,0.383265,0.241701,0.609101,0.756135,0.052382,1


In [6]:
# Save class values for train and test sets
train_labels = train_data['stabf'].to_numpy()
test_labels = test_data['stabf'].to_numpy()

# Drop class column from train and test sets
train = train_data.drop('stabf', axis=1)
test = test_data.drop('stabf', axis=1)
test

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab
3500,7.592223,1.389509,7.285896,6.031028,4.901480,-1.809328,-1.536639,-1.555512,0.444016,0.469558,0.559769,0.170529,0.001593
3501,9.203739,3.031134,2.742088,7.318112,4.379546,-1.160296,-1.374819,-1.844431,0.322139,0.945597,0.786776,0.527852,0.015995
3502,4.016004,8.501127,5.345373,6.943443,2.952986,-0.535994,-1.375817,-1.041175,0.662382,0.300356,0.251775,0.559976,0.027832
3503,1.568860,2.338635,7.562933,0.931341,3.180046,-1.887828,-0.673774,-0.618444,0.321079,0.081876,0.298656,0.078052,-0.051548
3504,8.906023,4.356134,4.086695,1.396348,4.998403,-1.232689,-1.910843,-1.854870,0.143437,0.556369,0.929642,0.686855,0.007881
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3.592940,8.130493,3.270724,8.075292,4.772430,-1.458529,-1.690274,-1.623627,0.210123,0.683625,0.159779,0.344948,0.011499
4996,7.656651,6.411748,5.655960,6.045712,3.193900,-0.918944,-1.377013,-0.897943,0.443617,0.490799,0.988511,0.788984,0.086436
4997,1.069438,6.581439,1.230892,9.937483,3.756828,-1.524720,-0.516322,-1.715786,0.501101,0.381437,0.773236,0.822149,-0.025390
4998,4.629745,8.353612,8.119304,8.600847,2.960941,-1.424327,-0.890685,-0.645929,0.383265,0.241701,0.609101,0.756135,0.052382


In [7]:
# Convert train and test to numpy arrays
train = train.to_numpy()
test = test.to_numpy()
test

array([[ 7.59222336e+00,  1.38950867e+00,  7.28589559e+00, ...,
         5.59768619e-01,  1.70528709e-01,  1.59300624e-03],
       [ 9.20373904e+00,  3.03113388e+00,  2.74208849e+00, ...,
         7.86776143e-01,  5.27851948e-01,  1.59949946e-02],
       [ 4.01600415e+00,  8.50112731e+00,  5.34537329e+00, ...,
         2.51774855e-01,  5.59975960e-01,  2.78322527e-02],
       ...,
       [ 1.06943762e+00,  6.58143888e+00,  1.23089213e+00, ...,
         7.73236128e-01,  8.22149435e-01, -2.53904805e-02],
       [ 4.62974535e+00,  8.35361207e+00,  8.11930402e+00, ...,
         6.09100855e-01,  7.56135124e-01,  5.23822770e-02],
       [ 7.54511861e-01,  9.72339201e+00,  9.26487092e+00, ...,
         2.73108030e-01,  2.51180820e-01, -3.69859336e-02]])

In [8]:
# normalize the datasets
train = normalize(train, axis=0)
test = normalize(test, axis=0)
test

array([[ 0.03294783,  0.0059878 ,  0.03189501, ...,  0.02445562,
         0.00752012,  0.00101395],
       [ 0.03994129,  0.01306203,  0.01200387, ...,  0.03437331,
         0.02327766,  0.01018084],
       [ 0.01742818,  0.03663382,  0.02340011, ...,  0.01099974,
         0.02469429,  0.01771527],
       ...,
       [ 0.00464102,  0.02836133,  0.0053884 , ...,  0.03378176,
         0.03625584, -0.01616108],
       [ 0.02009162,  0.03599814,  0.03554337, ...,  0.02661089,
         0.03334468,  0.03334139],
       [ 0.00327434,  0.04190091,  0.04055825, ...,  0.01193176,
         0.01107678, -0.0235416 ]])

In [9]:
# Calculate the Euclidean distances using no loops
num_test = test.shape[0]
num_train = train.shape[0]
dists = np.zeros((num_test, num_train)) 
sum_squares_train = np.sum(np.square(train), axis=1)
sum_squares_test = np.sum(np.square(test), axis=1)
sum_combo = np.dot(test, train.T) * -2
# Store the distances in the distance matrix
dists = np.sqrt(((sum_combo + sum_squares_train).T + sum_squares_test).T)
dists

array([[0.0529621 , 0.04462797, 0.05458674, ..., 0.06326677, 0.05746328,
        0.04569973],
       [0.04906884, 0.04705974, 0.05646954, ..., 0.06194467, 0.05296708,
        0.05821566],
       [0.03821409, 0.05069847, 0.05365513, ..., 0.0557102 , 0.0613269 ,
        0.053487  ],
       ...,
       [0.05835539, 0.05998833, 0.06828727, ..., 0.0537749 , 0.04646163,
        0.06121794],
       [0.04297523, 0.06438266, 0.06614503, ..., 0.06783632, 0.07152282,
        0.0638074 ],
       [0.07608597, 0.06118565, 0.06595964, ..., 0.06664703, 0.07698337,
        0.07384184]])

In [10]:
# Make predictions using k nearest neighbors

k = 13
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
    nearest_neighbors_indices = np.argsort(dists[i])
    closest_y = np.zeros(k)
    for m in range(k):
        closest_y[m] = train_labels[nearest_neighbors_indices[m]]
    labels = []
    label_counts = []
    for label in closest_y:
        if label not in labels:
          labels.append(label)
          label_counts.append(1)
        else:
          label_counts[labels.index(label)] = label_counts[labels.index(label)] + 1
    mode_labels = np.argsort(label_counts)[::-1]
    y_pred[i] = labels[mode_labels[0]]
y_pred

array([1., 1., 1., ..., 0., 1., 0.])

In [11]:
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_pred == test_labels)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

Got 1424 / 1500 correct => accuracy: 0.949333


# Section 3: 2nd Analysis

In [12]:
my_data = data.tail(5000)
my_data

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
5000,5.825547,6.153305,9.215683,2.644971,4.749492,-1.491278,-1.382048,-1.876166,0.528459,0.155706,0.156222,0.458131,-0.023089,stable
5001,2.750088,8.767781,4.517367,1.373404,2.700511,-0.587310,-1.190554,-0.922647,0.471845,0.428823,0.596576,0.249577,-0.021277,stable
5002,6.253534,6.625686,0.613047,7.550439,4.305593,-1.529390,-1.415248,-1.360955,0.376768,0.686419,0.598896,0.408225,0.009619,unstable
5003,1.800725,1.185765,1.515843,8.576087,4.255226,-1.270079,-1.970055,-1.015092,0.341150,0.623442,0.247956,0.653949,-0.038621,stable
5004,7.150430,4.837233,3.244408,2.089166,4.539624,-1.981831,-1.375972,-1.181821,0.319280,0.072775,0.842072,0.577839,-0.027978,stable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,unstable


In [13]:
# Convert Class names to numbers
# 0 = stable
# 1 = unstable

# assign values to the dependent variable
my_data.loc[(my_data['stabf'] == 'stable'), 'stabf'] = 0
my_data.loc[(my_data['stabf'] == 'unstable'), 'stabf'] = 1
my_data['stabf'] = pd.to_numeric(my_data['stabf'])
my_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  my_data['stabf'] = pd.to_numeric(my_data['stabf'])


Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
5000,5.825547,6.153305,9.215683,2.644971,4.749492,-1.491278,-1.382048,-1.876166,0.528459,0.155706,0.156222,0.458131,-0.023089,0
5001,2.750088,8.767781,4.517367,1.373404,2.700511,-0.587310,-1.190554,-0.922647,0.471845,0.428823,0.596576,0.249577,-0.021277,0
5002,6.253534,6.625686,0.613047,7.550439,4.305593,-1.529390,-1.415248,-1.360955,0.376768,0.686419,0.598896,0.408225,0.009619,1
5003,1.800725,1.185765,1.515843,8.576087,4.255226,-1.270079,-1.970055,-1.015092,0.341150,0.623442,0.247956,0.653949,-0.038621,0
5004,7.150430,4.837233,3.244408,2.089166,4.539624,-1.981831,-1.375972,-1.181821,0.319280,0.072775,0.842072,0.577839,-0.027978,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,1
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,0
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,0
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,1


In [14]:
# Split dataframe into testing and training sets
train_data = my_data.iloc[:3000]
test_data = my_data.iloc[3000:]
test_data

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
8000,5.306905,2.452792,5.414825,4.964973,2.853706,-0.952709,-1.391630,-0.509366,0.160440,0.958669,0.867021,0.183520,0.026496,1
8001,7.745244,4.311693,7.893386,0.917804,3.457962,-1.425012,-1.291345,-0.741605,0.576198,0.851929,0.172012,0.788637,-0.004614,0
8002,5.279096,5.458030,4.123039,9.860768,2.441075,-1.028213,-0.790346,-0.622516,0.462567,0.287156,0.252946,0.663104,0.021423,1
8003,1.596736,5.744044,5.888295,2.434392,3.762364,-1.019351,-0.869846,-1.873167,0.769447,0.733218,0.096916,0.159050,-0.033336,0
8004,7.214450,2.265906,3.889059,9.012663,3.675266,-0.722557,-1.249246,-1.703463,0.837425,0.599088,0.975245,0.772472,0.061052,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,1
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,0
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,0
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,1


In [15]:
# Save class values for train and test sets
train_labels = train_data['stabf'].to_numpy()
test_labels = test_data['stabf'].to_numpy()

# Drop class column from train and test sets
train = train_data.drop('stabf', axis=1)
test = test_data.drop('stabf', axis=1)
test

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab
8000,5.306905,2.452792,5.414825,4.964973,2.853706,-0.952709,-1.391630,-0.509366,0.160440,0.958669,0.867021,0.183520,0.026496
8001,7.745244,4.311693,7.893386,0.917804,3.457962,-1.425012,-1.291345,-0.741605,0.576198,0.851929,0.172012,0.788637,-0.004614
8002,5.279096,5.458030,4.123039,9.860768,2.441075,-1.028213,-0.790346,-0.622516,0.462567,0.287156,0.252946,0.663104,0.021423
8003,1.596736,5.744044,5.888295,2.434392,3.762364,-1.019351,-0.869846,-1.873167,0.769447,0.733218,0.096916,0.159050,-0.033336
8004,7.214450,2.265906,3.889059,9.012663,3.675266,-0.722557,-1.249246,-1.703463,0.837425,0.599088,0.975245,0.772472,0.061052
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789


In [16]:
# Convert train and test to numpy arrays
train = train.to_numpy()
test = test.to_numpy()
test

array([[ 5.30690458e+00,  2.45279249e+00,  5.41482459e+00, ...,
         8.67021410e-01,  1.83519882e-01,  2.64962906e-02],
       [ 7.74524391e+00,  4.31169345e+00,  7.89338602e+00, ...,
         1.72011877e-01,  7.88636816e-01, -4.61427241e-03],
       [ 5.27909591e+00,  5.45802967e+00,  4.12303948e+00, ...,
         2.52945554e-01,  6.63103928e-01,  2.14226453e-02],
       ...,
       [ 2.36403419e+00,  2.84203025e+00,  8.77639096e+00, ...,
         1.49286458e-01,  1.45984032e-01, -3.18098881e-02],
       [ 9.63151069e+00,  3.99439760e+00,  2.75707093e+00, ...,
         8.89118346e-01,  8.18391326e-01,  3.77888091e-02],
       [ 6.53052662e+00,  6.78178990e+00,  4.34969522e+00, ...,
         3.78760930e-01,  9.42630833e-01,  4.52633082e-02]])

In [17]:
# Calculate the Euclidean distances using 1 loop
num_test = test.shape[0]
num_train = train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
    test_point = test[i]
    subtraction_matrix_1l = train - test_point
    squared_matrix_1l = np.square(subtraction_matrix_1l)
    sum_of_squares_vector = np.sum(squared_matrix_1l, axis=1)
    distances_vector = np.sqrt(sum_of_squares_vector)
    # Store the distances in the distance matrix
    dists[i] = distances_vector
dists

array([[ 6.39588195,  7.80684936,  7.1736136 , ...,  9.22110562,
         6.16111504,  3.50595046],
       [ 3.92085128,  7.64151855, 10.30036779, ..., 11.94195104,
         8.36223921,  5.99341579],
       [ 9.29110844,  9.5060078 ,  4.99855722, ...,  5.72491318,
         9.68261024,  8.15986122],
       ...,
       [ 5.19825475,  7.64295322, 11.82678916, ..., 11.25359651,
         8.97047589,  4.66049535],
       [ 9.7621717 , 10.75797324,  5.25276363, ..., 10.17725321,
         8.12872306,  8.60682185],
       [ 8.05260679,  8.58839637,  4.13559361, ...,  5.74372075,
         9.38195329,  8.1618491 ]])

In [18]:
# Make predictions using k nearest neighbors

k = 23
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
    nearest_neighbors_indices = np.argsort(dists[i])
    closest_y = np.zeros(k)
    for m in range(k):
        closest_y[m] = train_labels[nearest_neighbors_indices[m]]
    labels = []
    label_counts = []
    for label in closest_y:
        if label not in labels:
          labels.append(label)
          label_counts.append(1)
        else:
          label_counts[labels.index(label)] = label_counts[labels.index(label)] + 1
    mode_labels = np.argsort(label_counts)[::-1]
    y_pred[i] = labels[mode_labels[0]]
y_pred

array([1., 1., 1., ..., 0., 1., 1.])

In [19]:
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_pred == test_labels)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

Got 1566 / 2000 correct => accuracy: 0.783000
