In [1]:
import numpy as np
import os
from scipy.io import loadmat
from sklearn.datasets import get_data_home
from sklearn.neighbors import LargeMarginNearestNeighbor as LMNN

In [9]:
MNIST_DESKEWED_URL = 'https://www.dropbox.com/s/mhsnormwt5i2ba6/mnist-deskewed-pca164.mat?dl=1'
MNIST_DESKEWED_PATH = os.path.join(get_data_home(), 'mnist-deskewed-pca164.mat')

if not os.path.exists(MNIST_DESKEWED_PATH):
    from urllib import request
    print('Downloading data from {} . . .'.format(MNIST_DESKEWED_URL), end='')
    request.urlretrieve(MNIST_DESKEWED_URL, MNIST_DESKEWED_PATH)
    print('done.')

print('Loading data from {}.'.format(MNIST_DESKEWED_PATH))
    
mnist_mat = loadmat(MNIST_DESKEWED_PATH)

X_train = np.asarray(mnist_mat['X_train'], dtype=np.float64)
X_test = np.asarray(mnist_mat['X_test'], dtype=np.float64)
y_train = np.asarray(mnist_mat['y_train'], dtype=np.int).ravel()
y_test = np.asarray(mnist_mat['y_test'], dtype=np.int).ravel()

Downloading data from https://www.dropbox.com/s/mhsnormwt5i2ba6/mnist-deskewed-pca164.mat?dl=1 . . .done.
Loading data from /home/johny/scikit_learn_data/mnist-deskewed-pca164.mat.


In [3]:
%load_ext memory_profiler

In [5]:
lmnn = LMNN(n_neighbors=3, random_state=42, verbose=1, n_jobs=-1)

In [6]:
%memit lmnn.fit(X_train, y_train)

Finding principal components... done in  0.61s
Finding the target neighbors... done in 40.40s
Computing static part of the gradient...

 Iteration   Function Call      Objective Value    Time(s)
----------------------------------------------------------
         1               1         1.044149e+07      17.42
         1               2         4.217784e+06      16.99
         2               3         3.781575e+06      21.10
         3               4         2.983367e+06      18.76
         4               5         2.504095e+06      22.74
         5               6         1.990753e+06      19.64
         6               7         1.393434e+06      18.94
         7               8         1.355413e+06      22.12
         7               9         1.128655e+06      18.90
         8              10         8.773327e+05      27.30
         9              11         7.368591e+05      23.11
        10              12         6.927031e+05      22.54
        11              13         6.7

In [7]:
from sklearn.neighbors import KNeighborsClassifier as KNN

In [8]:
knn = KNN(n_neighbors=lmnn.n_neighbors_)
knn.fit(lmnn.transform(X_train), y_train)
test_acc = knn.score(lmnn.transform(X_test), y_test)
print('LMNN accuracy on MNIST test set is {:5.2f}%.'.format(100*test_acc))

LMNN accuracy on MNIST test set is 98.58%.
