In [1]:
import numpy as np
import os
from scipy.io import loadmat
from sklearn.datasets import get_data_home
from sklearn.neighbors import LargeMarginNearestNeighbor as LMNN

In [2]:
MNIST_DESKEWED_URL = 'https://www.dropbox.com/s/mhsnormwt5i2ba6/mnist-deskewed-pca164.mat?dl=1'
MNIST_DESKEWED_PATH = os.path.join(get_data_home(), 'mnist-deskewed-pca164.mat')

if not os.path.exists(MNIST_DESKEWED_PATH):
    from urllib import request
    print('Downloading deskewed MNIST from {} . . .'.format(MNIST_DESKEWED_URL), end='')
    request.urlretrieve(MNIST_DESKEWED_URL, MNIST_DESKEWED_PATH)
    print('done.')

mnist_mat = loadmat(MNIST_DESKEWED_PATH)

X_train = np.asarray(mnist_mat['X_train'], dtype=np.float64)
X_test = np.asarray(mnist_mat['X_test'], dtype=np.float64)
y_train = np.asarray(mnist_mat['y_train'], dtype=np.int).ravel()
y_test = np.asarray(mnist_mat['y_test'], dtype=np.int).ravel()

print('Loaded deskewed MNIST from {}.'.format(MNIST_DESKEWED_PATH))

Loaded deskewed MNIST from /work/chiotell/scikit_learn_data/mnist-deskewed-pca164.mat.


In [3]:
%load_ext memory_profiler

In [4]:
lmnn = LMNN(n_neighbors=3, store_opt_result=True, random_state=42, verbose=1, max_iter=35, n_jobs=-1)

In [5]:
%memit lmnn.fit(X_train, y_train)

Finding principal components... done in  0.56s.
Finding the target neighbors... done in 17.61s.
Computing static part of the gradient... done.

 Iteration      Objective Value     #Active Triplets    Time(s)
---------------------------------------------------------------
         1         1.044149e+07              159,694      10.70
         1         4.215748e+06              109,517      10.75
         2         3.780318e+06               97,592      10.84
         3         2.983284e+06               73,082      10.77
         4         2.503140e+06               59,776      10.87
         5         1.988418e+06               53,426      10.74
         6         1.388322e+06               82,836      10.82
         7         1.462584e+06              425,157      11.23
         7         1.033541e+06              164,612      11.02
         8         1.146788e+06              304,904      12.06
         8         8.746367e+05              182,982      11.99
         9         7.676

In [6]:
from sklearn.neighbors import KNeighborsClassifier as KNN

In [7]:
knn = KNN(n_neighbors=lmnn.n_neighbors_, n_jobs=-1)
knn.fit(lmnn.transform(X_train), y_train)
test_acc = knn.score(lmnn.transform(X_test), y_test)
print('LMNN accuracy on MNIST test set is {:5.2f}%.'.format(100*test_acc))

LMNN accuracy on MNIST test set is 98.63%.


In [8]:
print('The optimizer finished after {} iterations and made {} function evaluations.'.format(lmnn.opt_result_.nit, lmnn.opt_result_.nfev))

The optimizer finished after 36 iterations and made 40 function evaluations.
