# Manifold Learning - Introduction

### Loading Libraries

In [13]:
# Numerical Computing
import numpy as np
from numpy.random import choice, uniform, randn

# Data Visualization
import pandas as pd

# Path
from pathlib import Path

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# IPywidgets
# import ipyvolume as ipv
from ipywidgets import HBox

# Scikit-Learn
from sklearn.datasets import make_swiss_roll, make_s_curve

In [5]:
%matplotlib inline

In [6]:
sns.set_style('white')

DATA_PATH = Path('..', '..', 'data')

### Manifold Examples

In [9]:
n_points, noise = 1000, 0.1

angles = uniform(low=-np.pi, high=np.pi, size=n_points)
x = 2 * np.cos(angles) + noise * randn(n_points)
y = np.sin(angles) + noise * randn(n_points)

theta = np.pi/4 # 45 degree rotation
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)], 
                            [np.sin(theta), np.cos(theta)]])

rotated = np.column_stack((x, y)).dot(rotation_matrix)
x, y = rotated[:, 0], rotated[:, 1]

z = .2 * x  + .2 * y + noise * randn(n_points)
data = np.vstack((x, y, z)).T

#### Plot 3-D Elipse

In [11]:
ipv.quickscatter(*data.T, size=1, marker='sphere', color='blue')
ipv.show()

#### Non-Linear Manifold

In [14]:
n_samples = 10000

palette = sns.color_palette('viridis', n_colors=n_samples)

In [16]:
swiss_X, swiss_val = make_swiss_roll(
    n_samples=n_samples, noise=.1, random_state=42)

swiss_X = swiss_X[swiss_val.argsort()[::-1]]

In [17]:
scurve_X, scurve_val = make_s_curve(
    n_samples=n_samples, noise=.1, random_state=42)

scurve_X = scurve_X[scurve_val.argsort()[::-1]]

#### Plotting Toy Examples

In [19]:
HBox([
    ipv.quickscatter(*swiss_X.T, size=1, marker='sphere', color=palette),
    ipv.quickscatter(*scurve_X.T, size=1, marker='sphere', color=palette)
])

### Loading Fashion MNIST Data

In [20]:
fashion_mnist_path = DATA_PATH / 'fashion_mnist'

In [21]:
fashion_data = np.load(fashion_mnist_path / 'data.npy')
fashion_label = np.load(fashion_mnist_path / 'labels.npy')

classes = sorted(np.unique(fashion_label).astype(int))

In [22]:
label_dict = pd.read_csv(fashion_mnist_path / 'label_dict.csv', squeeze=True, header=None).to_dict()

In [23]:
h = w = int(np.sqrt(fashion_data.shape[1])) 

n_samples = 15

In [24]:
fig, ax = plt.subplots(figsize=(18, 8))
fashion_sample = np.empty(shape=(h * len(classes),
                                 w * n_samples))

for row, label in enumerate(classes):
    label_idx = np.argwhere(fashion_label == label).squeeze()
    sample_indices = choice(label_idx, size=n_samples, replace=False)
    i = row * h
    for col, sample_idx in enumerate(sample_indices):
        j = col * w
        sample = fashion_data[sample_idx].reshape(h, w)
        fashion_sample[i:i+h, j:j + w] = sample

ax.imshow(fashion_sample, cmap='Blues')
ax.set_title('Fashion Images', fontsize=14)
plt.axis('off')
fig.tight_layout()

### Visual Evidence for the Manifold Hypothesis: Pixel Structure of Different Image Classes

In [25]:
fig, axes = plt.subplots(nrows=len(classes), figsize=(15, 15))

n = 100
samples = []

for i, label in enumerate(classes):
    label_idx = np.argwhere(fashion_label == label).squeeze()
    samples = choice(label_idx, size=n, replace=False)
    sns.heatmap(fashion_data[samples], cmap='Blues', ax=axes[i], cbar=False)
    axes[i].set_title(label_dict[label], fontsize=14)
    axes[i].axis('off')
fig.tight_layout(h_pad=.1)

### Pixel Structure of Random Images

In [27]:
fig, ax = plt.subplots(figsize=(14, 2))
sns.heatmap(np.random.randint(low=0,
                              high=255,
                              size=(100, 784)),
            cmap='Blues', ax=ax, cbar=False)
plt.axis('off')
fig.tight_layout()