# 3. Learning++

## 3.5 Applying tSNE on SVHN

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import loadmat
from skimage import color
from skimage import io
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from scipy.spatial.distance import pdist
from sklearn.manifold._t_sne import _joint_probabilities
from scipy import linalg
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import squareform
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
def load_data(path):
    """ Helper function for loading a MAT-File"""
    data = loadmat(path)
    return data['X'], data['y']

X_train, y_train = load_data('SVHN/train_32x32.mat')
X_test, y_test = load_data('SVHN/test_32x32.mat')

# Transpose the image arrays
X_train, y_train = X_train.transpose((3,0,1,2)), y_train[:,0]
X_test, y_test = X_test.transpose((3,0,1,2)), y_test[:,0]

# Calculate the total number of images
num_images = X_train.shape[0] + X_test.shape[0]

# Correct Labels
y_train[y_train == 10] = 0
y_test[y_test == 10] = 0

In [None]:
# Convert to grayscale
def to_grayscale (images):
    return np.expand_dims(np.dot(images, [0.2990, 0.5870, 0.1140]), axis=3)


train_gray = to_grayscale(X_train).astype(np.float32)
test_gray = to_grayscale(X_test).astype(np.float32)

n1, n2, n3, _ = train_gray.shape

train_gray = np.reshape (train_gray,(n1, n2 * n3))

sns.set (rc = {'figure.figsize':(11.7,8.27)})
palette = sns.color_palette("bright", 10)

In [None]:
tsne = TSNE()
X_embedded = tsne.fit_transform(train_gray)

sns.scatterplot(X_embedded[:,0], X_embedded[:,1], hue = y_train, legend = 'full', palette = palette)

In [None]:
tsne2 = TSNE (n_iter=10000,learning_rate=150)
X_embedded_take2 = tsne2.fit_transform(train_gray)

sns.scatterplot (X_embedded_take2[:,0], X_embedded_take2[:,1], hue=y_train, legend='full', palette=palette)

In [None]:
tsne3 = TSNE(perplexity=50)
X_embedded_take2 = tsne3.fit_transform(train_gray)

sns.scatterplot(X_embedded_take2[:,0], X_embedded_take2[:,1], hue=y_train, legend='full', palette=palette)

## 3.5 PCA on SVHN

In [None]:
sc = StandardScaler()
X_train_i = sc.fit_transform(train_gray)

pca = PCA (n_components=2)
X_train = pca.fit_transform (X_train_i)

principalDf = pd.DataFrame (data = X_train, 
                           columns = ['principal component 1', 'principal component 2'])
y = pd.DataFrame (data=y_train, columns=['target'])

finalDf = pd.concat ([principalDf,y],axis=1)
finalDf

In [None]:
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)
targets = [0,1, 2, 3,4,5,6,7,8,9]
colors = ['C0','C1','C2','C3','C4','C5','C6','C7','C8','C9']
for target, color in zip(targets,colors):
    indicesToKeep = finalDf['target'] == target
    plt.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
               , finalDf.loc[indicesToKeep, 'principal component 2']
               , c = color
               , s = 50)
ax.legend(targets)
ax.grid()