In [1]:
import numpy as np
import pylab as plt
import torch
from tqdm.auto import tqdm
import scipy.linalg
import pandas as pd
import torchvision.datasets as dst

from common import gsave, gload
from nde.kernels import KernelPredictor, KernelType, load_cifar_all

In [2]:
X, Y = load_cifar_all()
ker = KernelPredictor(X, Y)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
N = len(X)

def sample_I(I_orig):
    I = []
    for i in range(10):
        Ii = np.flatnonzero(Y[I_orig] == i)
        ni = len(Ii)
        p = ((i+1)/10.0)
        
        I.extend(np.random.choice(Ii, size=int(p*ni), replace=False))
        
    I = np.array(I_orig)[I]
    return np.random.permutation(I)

I_tr = sample_I(range(50000))
I_te = sample_I(range(50000, 60000))

In [4]:
[(Y[I_tr] == i).sum() for i in range(10)]

[500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]

In [5]:
[(Y[I_te] == i).sum() for i in range(10)]

[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

In [6]:
# y_pred = ker.predict(I_tr, I_te, ktype = KernelType.GAUSSIAN, sigma=0.10)
y_pred = ker.predict(I_tr, I_te, ktype = KernelType.LAPLACE, sigma=0.10)
acc = (y_pred == Y[I_te]).mean()
print('acc: ', acc)

acc:  0.598909090909091


In [7]:
print('test acc:', acc)
print('Num Predicted (class i):', [(y_pred == i).sum() for i in range(10)])
print('Num True (class i):', [(Y[I_te] == i).sum() for i in range(10)])

test acc: 0.598909090909091
Num Predicted (class i): [25, 25, 93, 153, 419, 598, 925, 913, 1042, 1307]
Num True (class i): [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]


In [11]:
## reuse setup

Dte = ker.D[I_te, :][:, I_tr]
Dtr = ker.D[I_tr, :][:, I_tr]
y_tr = ker.y[I_tr]
y_te = ker.y[I_te]

In [22]:
def get_acc(sigma=0.1):
    Kte = ker.d_to_K(Dte, ktype=KernelType.GAUSSIAN, sigma=sigma)
    Ktr = ker.d_to_K(Dtr, ktype=KernelType.GAUSSIAN, sigma=sigma)
    
    model = scipy.linalg.solve(Ktr, np.eye(10)[y_tr], sym_pos=True, check_finite=False)
    yhats = Kte.dot(model)
    preds = np.argmax(yhats, axis=1)
    return (preds == y_te).mean()

In [25]:
ss = np.linspace(0.01, 1, 10)
accs = []
for s in tqdm(ss):
    acc = get_acc(s)
    accs.append(acc)
    print(f's: {s}, acc: {acc}')

#plt.plot(ss, accs)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

s: 0.01, acc: 0.132
s: 0.12, acc: 0.596909090909091


  """


s: 0.23, acc: 0.5450909090909091


  """


s: 0.34, acc: 0.5118181818181818


  """


s: 0.45, acc: 0.48927272727272725


  """


s: 0.56, acc: 0.47763636363636364


KeyboardInterrupt: 

In [24]:
accs

NameError: name 'accs' is not defined