In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA

In [None]:
def plot_handwritten_digits(images, y, max_n=10):
    n_row, n_col = 2, 5
    fig = plt.figure(figsize=(2. * n_col, 2.26 * n_row))
    i=0
    while i < max_n and i < images.shape[0]:
        p = fig.add_subplot(n_row, n_col, i + 1, xticks=[], yticks=[])
        p.imshow(images[i], cmap=plt.cm.bone, interpolation='nearest')
        p.text(0, -1, str(y[i]))
        i = i + 1

In [None]:
def get_pca_embedding(data):
    pca = PCA(n_components=2)
    embedding = pca.fit_transform(data)
    return embedding

In [None]:
def scale_data(data, scaler_type='std'):
    if scaler_type =='std':
        scaler = StandardScaler()
    elif scaler_type == 'min_max':
        scaler = MinMaxScaler()
    scaler.fit(data)
    scaled_data = scaler.transform(data)
    return scaled_data

In [None]:
def visualize_embedding(embedding):
    plt.figure(figsize=(15,10))
    plt.scatter(embedding[:, 0], embedding[:, 1],
            c=digits.target, edgecolor='none', alpha=0.5,
            cmap=plt.cm.get_cmap('rainbow', 10))
    plt.xlabel('principal_component_1')
    plt.ylabel('principal_component_2')
    plt.colorbar();

# handwritten digits digitalized in matrices of 8×8 pixels

In [None]:
digits = load_digits()
print(f'data shape: {digits.data.shape}')

In [None]:
plot_handwritten_digits(digits.images, digits.target, max_n=10)

# create pca embedding

In [None]:
embedding = get_pca_embedding(digits.data)
visualize_embedding(embedding)

# repete with scaled data

In [None]:
embedding = get_pca_embedding(scale_data(digits.data, scaler_type='std'))
visualize_embedding(embedding)

In [None]:
embedding = get_pca_embedding(scale_data(digits.data, scaler_type='min_max'))
visualize_embedding(embedding)