In [None]:
%matplotlib inline
import numpy as np
import sklearn.datasets as data
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sb; sb.set_style( 'darkgrid' ) # use whitegrid if prefer a white background
import pandas as pd

from numpy.random import SeedSequence, default_rng
rng = default_rng( SeedSequence().entropy )

import warnings
warnings.filterwarnings("ignore")

#matplotlib.rcParams.update( { 'font.size': 18 } ) # Use this to setup your preferred font size for plotting

#### 1- Use Self-Organizing map to see how a random set of colors self-organize with different learning rates,  number of epochs, grid sizes and training set sizes.
+ (a) Define learning rate as exp( $-t\gamma$ ), where $\gamma$ is the decay rate, and $t$ is a range of epochs.
+ (b) Define neighborhood distance between neurons on a grid, as exp( $d^2 / (2\sigma^2))$ 

In [None]:
# See how the learning rate behave with different number of epochs and decay rates
# 
epochs = np.arange(0, ? )
lr_decay = [ ?, ?, ?, ? ]
fig,ax = plt.subplots(nrows=1, ncols = 4, figsize=(15,4))
plt_ind = np.arange(4) + 141
for decay, ind in zip(lr_decay, plt_ind):
    plt.subplot(ind)
    learn_rate =  # (a)
    plt.plot(epochs, learn_rate, c='red')
    plt.title('decay rate: ' + str(decay))
    plt.xlabel('epochs $t$')
    plt.ylabel('$\eta^(t)$')
fig.subplots_adjust(hspace=0.5, wspace=0.3)
plt.show()

In [None]:
distance = np.arange(0, 10)
sigma_sq = [ ?, ?, ?, ? ] # Try out different spread (variance) ranges
fig,ax = plt.subplots(nrows=1, ncols=4, figsize=(15,4))
plt_ind = np.arange(4) + 141
for s, ind in zip(sigma_sq, plt_ind):
    plt.subplot(ind)
    f =  # (b)
    plt.plot(distance, f, c='red')
    plt.title('$\sigma^2$ = ' + str(s))
    plt.xlabel('Distance')
    plt.ylabel('Neighborhood function $f$')
fig.subplots_adjust(hspace=0.5, wspace=0.3)
plt.show()


In [None]:
# Return the (g,h) index of the BMU in the grid
def find_BMU(SOM,x):
    distSq = (np.square(SOM - x)).sum(axis=2)
    return np.unravel_index(np.argmin(distSq, axis=None), distSq.shape)
    
# Update the weights of the SOM cells when given a single training example
# and the model parameters along with BMU coordinates as a tuple
def update_weights(SOM, train_ex, learn_rate, radius_sq, 
                   BMU_coord, step=3):
    g, h = BMU_coord
    #if radius is close to zero then only BMU is changed
    if radius_sq < 1e-3:
        SOM[g,h,:] += learn_rate * (train_ex - SOM[g,h,:])
        return SOM
    # Change all cells in a small neighborhood of BMU
    for i in range(max(0, g-step), min(SOM.shape[0], g+step)):
        for j in range(max(0, h-step), min(SOM.shape[1], h+step)):
            dist_sq = np.square(i - g) + np.square(j - h)
            dist_func = np.exp(-dist_sq / 2 / radius_sq)
            SOM[i,j,:] += learn_rate * dist_func * (train_ex - SOM[i,j,:])   
    return SOM    

# Main routine for training an SOM. It requires an initialized SOM grid
# or a partially trained grid as parameter
def train_SOM(SOM, train_data, learn_rate = .1, radius_sq = 1, 
             lr_decay = .1, radius_decay = .1, epochs = 10):    
    learn_rate_0 = learn_rate
    radius_0 = radius_sq
    for epoch in np.arange(0, epochs):
        rand.shuffle(train_data)      
        for train_ex in train_data:
            g, h = find_BMU(SOM, train_ex)
            SOM = update_weights(SOM, train_ex, 
                                 learn_rate, radius_sq, (g,h))
        # Update learning rate and radius
        learn_rate = learn_rate_0 * np.exp(-epoch * lr_decay)
        radius_sq = radius_0 * np.exp(-epoch * radius_decay)            
    return SOM

In [None]:
# Dimensions of the SOM grid
m = 5 # Try out different grid sizes
n = 5 
# Number of training examples
n_x = 3000 # Try out different number of training samples
rand = np.random.RandomState(0)
# Initialize the training data
train_data = rand.randint(0, 255, (n_x, 3))
# Initialize the SOM randomly
SOM = rand.randint(0, 255, (m, n, 3)).astype(float)
# Display both the training matrix and the SOM grid
fig, ax = plt.subplots(
    nrows=1, ncols=2, figsize=(12, 3.5), 
    subplot_kw=dict(xticks=[], yticks=[]))
ax[0].imshow(train_data.reshape(50, 60, 3))
ax[0].title.set_text('Training Data')
ax[1].imshow(SOM.astype(int))
ax[1].title.set_text('Randomly Initialized SOM Grid')

In [None]:
fig, ax = plt.subplots(
    nrows=1, ncols=5, figsize=(15, 3.5), 
    subplot_kw=dict(xticks=[], yticks=[]))
total_epochs = 0
for epochs, i in zip([ ?, ?, ?, ? ], range(0,5)):
    total_epochs += epochs
    SOM = train_SOM(SOM, train_data, epochs=epochs)
    ax[i].imshow(SOM.astype(int))
    ax[i].title.set_text('Epochs = ' + str(total_epochs))


In [None]:
fig, ax = plt.subplots(
    nrows=3, ncols=3, figsize=(15, 15), 
    subplot_kw=dict(xticks=[], yticks=[]))

# Initialize the SOM randomly to the same state

for learn_rate, i in zip([0.001, 0.5, 0.99], [0, 1, 2]):
    for radius_sq, j in zip([0.01, 1, 10], [0, 1, 2]):
        rand = np.random.RandomState(0)
        SOM = rand.randint(0, 255, (m, n, 3)).astype(float)        
        SOM = train_SOM(SOM, train_data, epochs = 5,
                        learn_rate = learn_rate, 
                        radius_sq = radius_sq)
        ax[i][j].imshow(SOM.astype(int))
        ax[i][j].title.set_text('$\eta$ = ' + str(learn_rate) + 
                                ', $\sigma^2$ = ' + str(radius_sq))

#### 2- Compare Principal Component Analysis (PCA), Kernel PCA, Singular Value Decomposition and Linear Discriminant Analysis (LDA) in dimensionality reduction.
+ PCA: finds the maximum variation between variables in terms of variance (spread from the mean)
+ LDA: finds the maximum separation in terms of the ratio of between and in-class variances.
+ SVD: finds the eigenvalue and eigenvector representation of a data matrix
+ Kernel PCA: Kernelized version of PCA
+ Try adding Gaussian noise to the observations and observe how sensitive the projections are.

In [None]:
from sklearn.decomposition import PCA, TruncatedSVD, KernelPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

iris = datasets.load_iris()

X = iris.data
y = iris.target
target_names = iris.target_names

pca = PCA(n_components = 2)
X_r = pca.fit(X).transform(X)

lda = LinearDiscriminantAnalysis(n_components = ? )
X_r2 = lda.fit(X, y).transform(X)

svd = TruncatedSVD( n_components = ? )
Xsvd = svd.fit( X, y ).transform( X )

kpca = KernelPCA( n_components = ? )
Xkpca = kpca.fit( X, y ).transform( X )

# Percentage of variance explained for each components
print(
    "explained PCA variance ratio (first two components): %s"
    % str(pca.explained_variance_ratio_)
)

print(
    "explained SVD variance ratio (first two components): %s"
    % str(svd.explained_variance_ratio_)
)



plt.figure()
colors = ["navy", "turquoise", "darkorange"]
lw = 2

for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(
        X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=0.8, lw=lw, label=target_name
    )
plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.title("PCA of IRIS dataset")

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(
        X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name
    )
plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.title("LDA of IRIS dataset")

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(
        Xsvd[y == i, 0], Xsvd[y == i, 1], alpha=0.8, color=color, label=target_name
    )
plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.title("SVD of IRIS dataset")

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(
        Xkpca[y == i, 0], Xsvd[y == i, 1], alpha=0.8, color=color, label=target_name
    )
plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.title("Kernel PCA of IRIS dataset")

plt.show()

#### 3- Use PCA, Kernel PCA, t-SNE and Truncated SVD to create an embedding for the MNIST dataset. Adjust the model parameters and see how the embedding changes.

In [None]:
digits = data.load_digits()
X, y   = data.load_digits( return_X_y = True )

n_samples, n_features = X.shape
n_neighbors           = ?

fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(6, 6))
for idx, ax in enumerate(axs.ravel()):
    ax.imshow(X[idx].reshape((8, 8)), cmap=plt.cm.binary)
    ax.axis("off")
_ = fig.suptitle("A selection from the 64-dimensional digits dataset", fontsize=16)

In [None]:
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.manifold import MDS, TSNE
from fml5_helper_funcs import plot_embedding

In [None]:
embeddings = { "Truncated SVD embedding": TruncatedSVD( n_components = ? ),
               "MDS embedding"          : MDS( n_components = ?, n_init=?, max_iter=?, n_jobs=2),
               "PCA"                    : PCA( n_components = ? ),
               "KPCA"                   : KernelPCA( n_components = ? )
            ,
            }

In [None]:
projections =  {}
for name, transformer in embeddings.items():
    print(f"Computing {name}...")

    projections[name] = transformer.fit_transform(X, y)

In [None]:
for name in projections:
    title = f"{name}"
    plot_embedding(projections[name], y, digits, title)

plt.show()