In [1]:
import os
import gzip
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import cm
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix

  from collections import Sequence


In [2]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [3]:
def load_FashionMNIST(data_path):
    Xtrn, Ytrn = load_mnist(data_path, "train")
    Xtst, Ytst = load_mnist(data_path, "t10k")
    return Xtrn.astype(np.float), Ytrn, Xtst.astype(np.float), Ytst

def load_CoVoST2(data_path):
    data = np.load(data_path+'/speech_trn.npz', allow_pickle=True)
    Xtrn, Ytrn = data['Xtrn'], data['Ytrn']
    data = np.load(data_path+'/speech_tst.npz', allow_pickle=True)
    Xtst, Ytst = data['Xtst'], data['Ytst']
    return Xtrn, Ytrn, Xtst, Ytst

In [4]:
datapath = os.path.join(os.getcwd())
datapath

'/Users/Hoffmann/Downloads/iaml-cw2'

In [5]:
Xtrn, Ytrn, Xtst, Ytst = load_FashionMNIST(datapath)

In [6]:
from copy import deepcopy

In [7]:
Xtrn_orig = deepcopy(Xtrn)
Xtst_orig = deepcopy(Xtst)

In [8]:
Xtrn = Xtrn/255.0
Xtst = Xtst/255.0

In [9]:
np.array_equal(Xtrn_orig, Xtrn*255.0)

True

In [10]:
Xmean_trn = np.mean(Xtrn, axis=0)

In [11]:
Xmean_tst = np.mean(Xtst, axis=0)

In [12]:
Xtst_nm = Xtst - Xmean_tst

In [13]:
Xtrn_nm = Xtrn - Xmean_trn

In [14]:
Xtrn_nm.shape

(60000, 784)

In [None]:
model_lr = LogisticRegression().fit(Xtrn_nm, Ytrn)

In [None]:
y_pred_lr = model_lr.predict(Xtst_nm)

In [None]:
len(y_pred_lr[y_pred_lr==9])

In [None]:
accuracy_score(Ytst, y_pred_lr)

In [None]:
confusion_matrix(Ytst, y_pred_lr)

In [None]:
model_svc = SVC(kernel='rbf', C=1.0, gamma='auto').fit(Xtrn_nm, Ytrn)

In [None]:
y_pred_svc = model_svc.predict(Xtst_nm)

In [None]:
model_svc.score(Xtst_nm, Ytst)

In [None]:
confusion_matrix(Ytst, y_pred_svc)

In [None]:
# get the eigenvectors

In [None]:
pca = PCA().fit(Xtrn_nm)

In [None]:
V = pca.components_

In [None]:
z_p = Xtrn_nm.dot(V.T)
z_p.shape

In [None]:
for i in range(60000):
    z_p[i][2:] = np.zeros((1,782))

In [None]:
z_p.shape

In [None]:
# filter points that lie in (-5sigma1, 5sigma2) to (5sigma1, -5sigma2)

In [None]:
[sigma1, sigma2] = np.sqrt(pca.explained_variance_)[:2]

In [None]:
xs = np.linspace(-5*sigma1, 5*sigma1, num=100)

In [None]:
ys = np.linspace(-5*sigma2, 5*sigma2, num=100)

In [None]:
xx, yy = np.meshgrid(xs, ys)

In [None]:
len(np.c_[xx.ravel(), yy.ravel()])

In [None]:
z_xy = np.c_[xx.ravel(), yy.ravel()]

In [None]:
z_xy[0]

In [None]:
z_orig = np.zeros((len(xx)*len(xx), 784))

In [None]:
for i in range(len(xx)*len(xx)):
    z_orig[i][:2] = z_xy[i]
    z_orig[i][2:] = np.zeros((1,782))

In [None]:
# transform to original basis

In [None]:
z_xy_orig = z_orig.dot(V)

In [None]:
Z = model_lr.predict(z_xy_orig)

In [None]:
len(Z[Z==9])

In [None]:
# get the points that lie on the 2d hyperplane

In [None]:
z_P = []

In [None]:
for i in range(60000):
    if z_p[i][0] >= -5*sigma1 and z_p[i][0] <= 5*sigma1 and z_p[i][1] >= -5*sigma2 and z_p[i][1] <= 5*sigma2:
        z_P.append([z_p[i][0], z_p[i][1]])

In [None]:
z_P = np.array(z_P)

In [None]:
# Put the result into a color plot
fig, ax = plt.subplots(figsize=(12,8))
cs = ax.contourf(xx, yy, Z.reshape(xx.shape), cmap=plt.get_cmap('coolwarm'))
cbar = fig.colorbar(cs)
plt.grid(True)
plt.savefig('lr_2d_plane.png')
plt.show()

In [None]:
z_P[0]