# Usage example on notMNIST

### First download the dataset from http://yaroslavvb.com/upload/notMNIST/notMNIST_small.mat

In [1]:
import numpy as np, scipy as sp, time, scipy.io, sklearn
import aknn_alg

notMNIST_small = scipy.io.loadmat("notMNIST_small.mat")['images'].reshape(784, 18724)
nmn = (notMNIST_small.T - 255.0/2)/255.0
labels = scipy.io.loadmat("notMNIST_small.mat")['labels'].astype(int)
labels_to_symbols = { 0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J' }
labels = np.array([labels_to_symbols[x] for x in labels])

In [2]:
import importlib
importlib.reload(aknn_alg)

# Calculate list of exact Euclidean nearest neighbors for each point - use more neighbors for less abstaining at a given parameter setting.
itime = time.time()
nbrs_list = aknn_alg._calc_nbrs_exact(nmn, k=1000, use_nndescent=False)
print('Neighbor indices computed. Time:\t {}'.format(time.time() - itime))

Neighbor indices computed. Time:	 9.087002992630005


In [9]:
import importlib
importlib.reload(aknn_alg)

ref_data = nmn
margin = 1.0
query_data = None
max_k = 1000

## Make AKNN predictions

In [7]:
itime = time.time()
aknn_predictions = aknn_alg.predict_nn_rule(nbrs_list, labels)
print('AKNN predictions made. Time:\t {}'.format(time.time() - itime))

AKNN predictions made. Time:	 3.1767020225524902


In [10]:
aknn_predictions_new = aknn_alg.aknn_predict(
    ref_data, 
    labels, 
    margin=1.0, 
    query_data=None, 
    max_k=1000, 
    use_nndescent=False
)


Neighbor indices computed. Time:	 9.21240496635437
AKNN predictions made. Time:	 12.64634394645691
AKNN predictions made. Time:	 12.653649091720581


## Comparison with k-NN

In [11]:
kvals = [3,5,7,8,10,30,100]
for i in range(len(kvals)):
    knn_predictions = aknn_alg.knn_rule(nbrs_list, labels, k=kvals[i])
    aknn_cov_ndces = aknn_predictions[1] <= kvals[i]
    aknn_cov = np.mean(aknn_cov_ndces)
    aknn_condacc = np.mean((aknn_predictions[0] == labels)[aknn_cov_ndces])
    print('{}-NN accuracy: \t\t{}'.format(kvals[i], np.mean(knn_predictions == labels)))
    print('AKNN accuracy (k <= {}): \t{} \t\t Coverage: \t{}\n'.format(
        kvals[i], aknn_condacc, aknn_cov))
print('Overall AKNN accuracy: {}'.format(np.mean(aknn_predictions[0] == labels)))

3-NN accuracy: 		0.8750267036957915
AKNN accuracy (k <= 3): 	0.9701739850869926 		 Coverage: 	0.838015381328776

5-NN accuracy: 		0.8833048493911557
AKNN accuracy (k <= 5): 	0.9450811565536099 		 Coverage: 	0.9180196539201025

7-NN accuracy: 		0.8836787011322367
AKNN accuracy (k <= 7): 	0.9408167974157822 		 Coverage: 	0.9258705404828028

8-NN accuracy: 		0.8834650715659047
AKNN accuracy (k <= 8): 	0.9362406530053086 		 Coverage: 	0.935644093142491

10-NN accuracy: 		0.8822901089510788
AKNN accuracy (k <= 10): 	0.9322341209133662 		 Coverage: 	0.9425870540482802

30-NN accuracy: 		0.8767891476180303
AKNN accuracy (k <= 30): 	0.9158672400485169 		 Coverage: 	0.9687032685323649

100-NN accuracy: 		0.858577227088229
AKNN accuracy (k <= 100): 	0.9071918180829072 		 Coverage: 	0.9817346720786156

Overall AKNN accuracy: 0.8925977355265969


In [12]:
kvals = [3,5,7,8,10,30,100]
for i in range(len(kvals)):
    knn_predictions = aknn_alg.knn_rule(nbrs_list, labels, k=kvals[i])
    aknn_cov_ndces = aknn_predictions_new[1] <= kvals[i]
    aknn_cov = np.mean(aknn_cov_ndces)
    aknn_condacc = np.mean((aknn_predictions_new[0] == labels)[aknn_cov_ndces])
    print('{}-NN accuracy: \t\t{}'.format(kvals[i], np.mean(knn_predictions == labels)))
    print('AKNN accuracy (k <= {}): \t{} \t\t Coverage: \t{}\n'.format(
        kvals[i], aknn_condacc, aknn_cov))
print('Overall AKNN accuracy: {}'.format(np.mean(aknn_predictions_new[0] == labels)))

3-NN accuracy: 		0.8750267036957915
AKNN accuracy (k <= 3): 	0.9701739850869926 		 Coverage: 	0.838015381328776

5-NN accuracy: 		0.8833048493911557
AKNN accuracy (k <= 5): 	0.9450811565536099 		 Coverage: 	0.9180196539201025

7-NN accuracy: 		0.8836787011322367
AKNN accuracy (k <= 7): 	0.9408167974157822 		 Coverage: 	0.9258705404828028

8-NN accuracy: 		0.8834650715659047
AKNN accuracy (k <= 8): 	0.9362406530053086 		 Coverage: 	0.935644093142491

10-NN accuracy: 		0.8822901089510788
AKNN accuracy (k <= 10): 	0.9322341209133662 		 Coverage: 	0.9425870540482802

30-NN accuracy: 		0.8767891476180303
AKNN accuracy (k <= 30): 	0.9158672400485169 		 Coverage: 	0.9687032685323649

100-NN accuracy: 		0.858577227088229
AKNN accuracy (k <= 100): 	0.9071918180829072 		 Coverage: 	0.9817346720786156

Overall AKNN accuracy: 0.8925977355265969


In [77]:
aknn_predictions_new[1][aknn_predictions[1] != aknn_predictions_new[1]+1]

array([0, 0, 0])