In [None]:
from NLMNN import NLMNN
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_validate, GridSearchCV
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from KNN_wrappers import LMNNClassifier, Chi2Classifier, NLMNNClassifier
import numpy as np
import glob
import scipy
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib notebook

In [None]:

classes = {
    'back_pack':1,
    'bike':2,
    'calculator':3,
    'headphones':4,
    'keyboard':5,
    'laptop_computer':6,
    'monitor':7,
    'mouse':8,
    'mug':9,
    'projector':10,
}
X = []
targets=[]
for file in glob.glob('data/webcam/*/*/*.mat'):
    
    target = file.split('/')[3]
    if target in classes:
        targets.append(classes[target])
        X.append(scipy.io.loadmat(file)['histogram'])
X = np.asarray(X, dtype=np.float).squeeze()
y = np.asarray(targets)

n,d = X.shape

X = X/np.sum(X, axis=1, keepdims=True)


In [None]:

scores_c1 = cross_validate(LMNNClassifier(k=3, train=False, pca=10), X, y, cv=5, return_train_score=True, return_estimator=True )
scores_c2 = cross_validate(Chi2Classifier(k=3, r=10), X, y, cv=5, return_train_score=True, return_estimator=True )

steps=10
parameters = {'mu': np.logspace(-2,2, num=steps)}
model = LMNNClassifier(k=3, pca=10)
clf_lmnn = GridSearchCV(model, parameters, cv=5, verbose=3, n_jobs=10)
clf_lmnn.fit(X,y)

In [None]:
model = NLMNNClassifier(k=3, r=10, use_softmax=False)
clf = GridSearchCV(model, parameters, cv=5, verbose=3, n_jobs=10)
clf.fit(X,y)

In [None]:
clf.best_estimator_.nlmnn.plot_debug()

In [None]:
plt.figure()
plt.imshow(clf.best_estimator_.nlmnn.L.T, aspect='auto')
plt.colorbar()

In [None]:
print(f"normal           KNN train acc={np.mean(scores_c1['test_score']):.3f} +- {np.std(scores_c1['test_score'])/np.sqrt(5):.3f},  test acc={np.mean(scores_c1['test_score']):.3f} +- {np.std(scores_c1['test_score'])/np.sqrt(5):.3f}")
print(f"trained    LMNN  KNN train acc={clf_lmnn.cv_results_['mean_train_score'][clf_lmnn.best_index_]:.3f} +- {clf_lmnn.cv_results_['std_train_score'][clf_lmnn.best_index_]/np.sqrt(5):.3f},  test acc={clf_lmnn.cv_results_['mean_test_score'][clf_lmnn.best_index_]:.3f} +- {clf_lmnn.cv_results_['std_test_score'][clf_lmnn.best_index_]/np.sqrt(5):.3f}")
print(f"untrained NLMNN  KNN train acc={np.mean(scores_c2['train_score']):.3f} +- {np.std(scores_c2['train_score'])/np.sqrt(5):.3f},  test acc={np.mean(scores_c2['test_score']):.3f} +- {np.std(scores_c2['test_score'])/np.sqrt(5):.3f}")
print(f"trained   NLMNN  KNN train acc={clf.cv_results_['mean_train_score'][clf.best_index_]:.3f} +- {clf.cv_results_['std_train_score'][clf.best_index_]/np.sqrt(5):.3f},  test acc={clf.cv_results_['mean_test_score'][clf.best_index_]:.3f} +- {clf.cv_results_['std_test_score'][clf.best_index_]/np.sqrt(5):.3f}")


In [None]:
parameters={'l':np.logspace(-2,1,num=10)}
model = NLMNNClassifier(k=3, r=10, use_softmax=False,mu=clf.best_params_['mu'])
clf_l = GridSearchCV(model, parameters, cv=5, verbose=3, n_jobs=10)
clf_l.fit(X,y)

In [None]:
print(f"trained   NLMNN  KNN train acc={clf_l.cv_results_['mean_train_score'][clf_l.best_index_]:.3f} +- {clf_l.cv_results_['std_train_score'][clf_l.best_index_]/np.sqrt(5):.3f},  test acc={clf_l.cv_results_['mean_test_score'][clf_l.best_index_]:.3f} +- {clf_l.cv_results_['std_test_score'][clf_l.best_index_]/np.sqrt(5):.3f}")


In [None]:
plt.figure()
plt.plot(clf_l.cv_results_['param_l'].data,clf_l.cv_results_['mean_fit_time'])
plt.xscale('log')


In [None]:

plt.figure()
plt.subplot(211)
plt.plot(clf.cv_results_['param_mu'].data,clf.cv_results_['mean_test_score'], label='r=10')
#These numbers are taken from the webcam_20 notebook
plt.plot(clf.cv_results_['param_mu'].data,[0.8       , 0.81694915, 0.85762712, 0.8779661 , 0.86779661,
       0.85084746, 0.82711864, 0.82372881, 0.82372881, 0.81694915], label='r=20')

plt.xscale('log')
plt.ylabel('Accuracy')
label=plt.xlabel('$\mu$')
label.set_fontsize(16)
plt.legend()

plt.subplot(212)
plt.plot(clf_l.cv_results_['param_l'].data,clf_l.cv_results_['mean_test_score'],label='r=10')
#These numbers are taken from the webcam_20 notebook
plt.plot(clf_l.cv_results_['param_l'].data,[0.8779661 , 0.88135593, 0.86779661, 0.87118644, 0.85084746,
       0.85084746, 0.85423729, 0.85423729, 0.85423729, 0.85423729], label='r=20')

plt.xscale('log')
label=plt.xlabel('$\ell$')
label.set_fontsize(16)
plt.ylabel('Accuracy')
plt.legend()

In [None]:
n = NLMNN(k=3, r=10, lr=10000)
n.fit(X,y, verbose=True)

In [None]:
n2 = NLMNN(k=3, r=10, lr=0.1, use_softmax=False)
n2.fit(X,y, verbose=True)

In [None]:
n3 = NLMNN(k=3, r=10, lr=20000)
n3.fit(X,y, verbose=True)

In [None]:
plt.figure()
plt.plot(n.losses, label='Softmax (lr=10000)')
plt.plot(n2.losses, label='Projection (lr=0.01)')
plt.plot(n3.losses, label='Softmax (lr=20000)')
plt.ylabel('loss')
plt.xlabel('Iterations')
plt.legend()