In [None]:
import numpy as np
import seaborn as sns

from matplotlib import pyplot
import matplotlib.pylab as plt 
%matplotlib inline

from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from sklearn import svm
import os

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:
#You may need to modify this based on your local path
os.chdir('/content/gdrive/My Drive/Colab Notebooks/AM205_Activity')
os.getcwd()

# Load data

In [None]:
mnist = tf.keras.datasets.mnist
(X_train, y_train),(X_test, y_test) = mnist.load_data()
#normalize img data
X_train, X_test = X_train / 255.0, X_test / 255.0

In [None]:
#check data shape
X_train.shape

This data represents 60000 images of numbers, each of them is an image of size 28 $\times$ 28. We can convert it into 2-D matrix by flattening the image height and width. 

In [None]:
#dimensions
n_samples, h, w = X_train.shape
#flatten for PCA
X_train_flat = X_train.reshape(n_samples, -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Fit PCA

In [None]:
def construct_pca(n_components, X_train_flat):
    return PCA(n_components).fit(X_train_flat)

pca = construct_pca(10, X_train_flat)#PCA(n_components=10).fit(X_train_flat)

In [None]:
pca_sample = construct_pca(50, X_train_flat)
#plot
f, ax = plt.subplots(1,2, figsize=(20,6))
ax[0].plot(pca_sample.explained_variance_ratio_)
ax[0].set_title("PCA explained variance ratio")
ax[0].set_xlabel("n components")
ax[0].set_ylabel("variance ration")

ax[1].plot(np.cumsum(pca_sample.explained_variance_ratio_))
ax[1].set_title("PCA explained variance ratio cumulative")
ax[1].set_xlabel("n components")
ax[1].set_ylabel("variance ration cumulative")

## First 10 components

In [None]:
fig, ax = plt.subplots(2, 5, figsize=(20, 10),
                       subplot_kw={'xticks':[], 'yticks':[]},
                       gridspec_kw=dict(hspace=0.1, wspace=0.1))
for i in range(10):
    ax[int(i/5), i%5].imshow(pca.components_[i].reshape((h,w)), cmap='gray')
    ax[int(i/5), i%5].set_title('component: '+str(i+1))

## pca reconstruction

In [None]:
components = pca.transform(X_train_flat)
projected = pca.inverse_transform(components)

In [None]:
fig, ax = plt.subplots(2, 10, figsize=(15, 3),
                       subplot_kw={'xticks':[], 'yticks':[]},
                       gridspec_kw=dict(hspace=0.1, wspace=0.1))
for i in range(10):
    ax[0, i].imshow(X_train_flat[i].reshape(h,w), cmap='gray')
    ax[1, i].imshow(projected[i].reshape(h,w), cmap='gray')
ax[0, 0].set_ylabel('full-dim\ninput')
ax[1, 0].set_ylabel('10-dim\nreconstruction');

# Linear classifier: SVM

## Directly apply svm: 10 cases

In [None]:
def draw_vector(v0, v1, ax=None):
    ax = plt.gca()
    arrowprops=dict(arrowstyle='->',
                    linewidth=2,
                    shrinkA=0, shrinkB=0)
    ax.annotate('', v1, v0, arrowprops=arrowprops)

def draw_PCA_eig(reduced_data):
    pca_test = PCA(n_components=2)
    pca_test.fit(reduced_data)
    for length, vector in zip(pca_test.explained_variance_, pca_test.components_):
        v = vector * 2 * np.sqrt(length)
        draw_vector(pca_test.mean_, pca_test.mean_ + v)

def draw_sv_pca(data, savefig = False):
    #get original data
    X_train, X_test, y_train, y_test = data
    pca_2 = PCA(n_components=2)
    X_r = pca_2.fit(X_train).transform(X_train)
    X_t = pca_2.fit(X_test).transform(X_test)
    sv_pca = svm.SVC(kernel='linear', C=1).fit(X_r, y_train)
    #plot
    plt.figure(figsize=(15, 10))
    #plot original test data
    plt.scatter(X_t[:, 0], X_t[:, 1], c=y_test, s=30, cmap=plt.cm.coolwarm)
    #prepare grid for svm
    x_min, x_max = X_t[:, 0].min() - 1, X_t[:, 0].max() + 1
    y_min, y_max = X_t[:, 1].min() - 1, X_t[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))#x, y coord
    Z = sv_pca.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    #plot svm
    plt.contour(xx, yy, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,linestyles=['--', '-', '--'])
    #plot pca
    draw_PCA_eig(X_t)
    if savefig:
        plt.savefig('svm_pca.jpg')
    plt.show()

In [None]:
draw_sv_pca((X_train_flat, X_test_flat, y_train, y_test), False)

## Observation by pca and binary classification

In [None]:
import matplotlib.colors as mcolors
color_choice = [i for i in mcolors.TABLEAU_COLORS]
pca_latent_train = PCA(n_components=2).fit(X_train_flat).transform(X_train_flat)
plt.figure(figsize=(10,10))
for i in range(10):
    plt.scatter(x=pca_latent_train[:,0][np.where(y_train==i)], y=pca_latent_train[:,1][np.where(y_train==i)],\
               c=color_choice[i], label=i)
plt.legend()
plt.title('MNIST, First 2 principal components')

It can be observed from above that the number 0 (orange) is far away from the number 1 (deep blue). So in principle, it should be easier to separate and classify the two numbers 0 and 1. Let's see how SVM works!

In [None]:
plt.figure(figsize=(10,10))
for i in range(2):
    plt.scatter(x=pca_latent_train[:,0][np.where(y_train==i)], y=pca_latent_train[:,1][np.where(y_train==i)],\
               c=color_choice[i], label=i)
plt.legend()
plt.title('MNIST 0 and 1, First 2 principal components')

In [None]:
# distinguishable 1 and 0
X_train_flat_01 = X_train_flat[np.where(y_train<=1)]
y_train_01 = y_train[np.where(y_train<=1)]
X_test_flat_01 = X_test_flat[np.where(y_test<=1)]
y_test_01 = y_test[np.where(y_test<=1)]
draw_sv_pca((X_train_flat_01, X_test_flat_01, y_train_01, y_test_01))

Similarly, we find that the number 9 quite overlaps with the number 7, so it would not be surprising if the classifier barely classifies these two numbers.

In [None]:
# barely distinguishable 7 and 9
X_train_flat_79 = X_train_flat[np.where((y_train>=7) & (y_train != 8))]
y_train_79 = y_train[np.where((y_train>=7) & (y_train != 8))]
X_test_flat_79 = X_test_flat[np.where((y_test>=7) & (y_test != 8))]
y_test_79 = y_test[np.where((y_test>=7) & (y_test != 8))]

In [None]:
draw_sv_pca((X_train_flat_79, X_test_flat_79, y_train_79, y_test_79))

Question: What would you say about number 6 and 8? Any other cases that would be easier/harder to classify based on the PCA plot?

# Nonlinear classifier: nonlinear Autoencoder (provided as a model)

In [None]:
Encoder = tf.keras.models.load_model('encoder_model')
Encoder.summary()

Here we provided a trained encoder model. It defines a latent space, and we can reconstrut the ten numbers in this space. 

In [None]:
ae_latent_train = Encoder.predict(X_train_flat)
ae_latent_train_x = ae_latent_train[:,0]
ae_latent_train_y = ae_latent_train[:,1]

import matplotlib.colors as mcolors
color_choice = [i for i in mcolors.TABLEAU_COLORS]
plt.figure(figsize=(10,10))
for i in range(10):
    plt.scatter(x=ae_latent_train[:,0][np.where(y_train==i)], y=ae_latent_train[:,1][np.where(y_train==i)],\
               c=color_choice[i], label=i)
plt.legend()