# Spiral classification

Given the x-y coordinates of 2D points, classify them into one of three possible spiral branches that they belong to.

Objectives:
 - Build and train a PyTorch MLP model from scratch to classify the points to a high accuracy
 - Visualize how the points are transformed akin to: https://youtu.be/EyKiYVwrdjE?si=JiAKShLumRxbFWXA&t=1002

In [None]:
import torch
from torch import nn, optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

import torch.functional as F

In [None]:
import fastbook
from fastbook import *

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
device

## Create data

In [None]:
seed = 12345
torch.manual_seed(seed)
N = 1000  # num_samples_per_class
K = 3     # num_classes

In [None]:
# Generate spirals

t = torch.linspace(0, 1, N)
a = 0.8 * t + 0.2  # amplitude 0.2 → 1.0
X = list()
y = list()
for k in range(K):
    θ = (2 * t + k) * 2 * torch.pi / K + 0.2 * torch.randn(N)
    X.append(torch.stack((a * θ.sin(), a * θ.cos()), dim=1))
    y.append(torch.zeros(N, dtype=torch.long).fill_(k))
X = torch.cat(X)
y = torch.cat(y)

In [None]:
# X.shape, y.shape

In [None]:
# X[:5]

In [None]:
# y.unique()

In [None]:
plt.figure(figsize=(4.5,4))
plt.scatter(X[:,0], X[:,1], c=y)
plt.show()

## Dataset and DataLoader Splits and Batches Data

In [None]:
class ToyDataset(Dataset):
    def __init__(self, X, y, split=0.2):
        self.features = X
        self.labels = y
    
    def __getitem__(self, index):
        x_item = self.features[index]
        y_item = self.labels[index]
        return x_item, y_item
    
    def __len__(self):
        return self.labels.shape[0]

In [None]:
whole_ds = ToyDataset(X=X, y=y)
# len(whole_ds)

In [None]:
def split_dataset(ds, train_percent=0.8):
    """
    ds - ToyDataset
    """

    train_size = int(len(ds) * train_percent)
    test_size = len(ds) - train_size

    train_subset, test_subset = random_split(ds, [train_size, test_size])

    X_train = ds.features[train_subset.indices]
    y_train = ds.labels[train_subset.indices]

    X_test = ds.features[test_subset.indices]
    y_test = ds.labels[test_subset.indices]

    train_ds = ToyDataset(X_train,y_train)
    test_ds = ToyDataset(X_test,y_test)
    
    return train_ds, test_ds

In [None]:
train_ds, test_ds = split_dataset(whole_ds)

In [None]:
# train_ds.features.shape

In [None]:
# type(train_ds)

In [None]:
# test_ds.features.shape

In [None]:
torch.manual_seed(1337)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=32,
    shuffle=True,
    drop_last=True,
    num_workers=0
)

test_loader = DataLoader(
    dataset=test_ds,
    batch_size=32,
    shuffle=False,
    drop_last=True,
    num_workers=0
)

In [None]:
# for idx, (x, y) in enumerate(train_loader):
#     print(f"Batch {idx}: ", x.shape, y.shape)
#     if idx > 0:
#         break

## Model

In [None]:
# class MLP(nn.Module):
#     def __init__(self, input_size, hidden_size, output_size, n_layers=3):
#         super().__init__() # IMPORTANT
#         # TODO: figure out later how to create a variable nn.Sequential
#         # self.n_layers = n_layers
#         self.h = hidden_size

#         self.net = nn.Sequential(
#             nn.Linear(input_size, hidden_size),
#             nn.ReLU(),
#             nn.Linear(hidden_size, hidden_size),
#             nn.ReLU(),
#             nn.Linear(hidden_size,output_size)
#         )
    
#     def forward(self, xb):
#         score = self.net(xb)
#         return score


In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=3):
        super().__init__() # IMPORTANT

        self.n_layers = n_layers
        self.h = hidden_size

        layers = [nn.Linear(input_size, hidden_size)]
        for _ in range(n_layers-2):
            layers.append(nn.ReLU())
            layers.append(nn.Linear(hidden_size, hidden_size))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_size,output_size))

        self.net = nn.Sequential(*layers)
    
    def forward(self, xb):
        score = self.net(xb)
        return score


In [None]:
# initialize weights
# model = MLP(input_size = 2, hidden_size = 16, output_size = 3)

In [None]:
# X_batch, y_batch = next(iter(train_loader))
# probs = model(X_batch[0])
# probs

In [None]:
# pred = torch.argmax(probs)
# pred.item()

In [None]:
# target = y_batch[0]
# target.item()

In [None]:
# target == pred

In [None]:
# probs = model(X_batch)
# probs.shape

In [None]:
# preds = torch.argmax(probs, dim=-1) # now we need to specify dim
# preds.shape

In [None]:
# targets = y_batch
# targets.shape

In [None]:
# ((preds-targets)==0).float()

In [None]:
# ((preds-targets)==0).float().mean().item()

## Training Loop

In [None]:
# optim = torch.optim.AdamW(model.parameters(), lr=0.01)

In [None]:
def train_epoch(model, train_loader, optim):
    # set model to train
    model.train()

    for idx, (xb, yb) in enumerate(train_loader):
        # forward pass
        score = model(xb)
        
        # cross-entropy loss 
        loss = F.cross_entropy(score, yb)

        # accuracy
        preds = torch.argmax(score, dim=-1)
        acc = ((preds - yb) == 0).float().mean().item()

        # clear old gradient from previous backprop
        optim.zero_grad()

        # compute new gradient and backprop
        loss.backward()

        # update parameter step
        optim.step()

    # set model back to eval in case 
    # other code wants to do that by default
    model.eval()

    return model, loss.item(), acc
    

In [None]:
# train_epoch(model=model, train_loader=train_loader, optim=optim)

## Testing Loop

In [None]:
def test_epoch(model, test_loader):
    
    # no gradient calculations
    with torch.no_grad():
        for idx, (xb, yb) in enumerate(test_loader):
            # model output
            score = model(xb)

            # loss
            loss = F.cross_entropy(score, yb)

            # accuracy
            preds = torch.argmax(score, dim=-1)
            acc = ((preds - yb) == 0).float().mean().item()
    
    return loss.item(), acc

In [None]:
# test_epoch(model=model, test_loader=test_loader)

## Training and Testing Loop

In [None]:
def train_test(model, train_loader, test_loader, optim, epochs=100, verbose=False):
    for e in range(epochs):
        model, train_loss, train_acc = train_epoch(model, train_loader, optim)
        test_loss, test_acc = test_epoch(model, test_loader)

        if verbose:
            if (e % 10 == 0) or (e == epochs-1):
                print(f"[Epoch {e}] Train/Test Loss: {train_loss:.2f} / {test_loss:.2f} | Train/Test Acc: {train_acc*100:.1f}% / {test_acc*100:.1f}%")
    return model

In [None]:
# train_test(model, train_loader, test_loader, optim, verbose=True)

## Hyperparameter tuning

In [None]:
def hyper_model(h, n_layers, lr, verbose=False):
    torch.manual_seed(1337)

    model = MLP(input_size = 2, hidden_size = h, n_layers=n_layers, output_size = 3)
    optim = torch.optim.AdamW(model.parameters(), lr=lr)
    
    model = train_test(model, train_loader, test_loader, optim, verbose=verbose)
    return model

In [None]:
model = hyper_model(h=10, n_layers=3, lr=0.1, verbose=True)

In [None]:
# hyper_model(h=8, n_layers=3, lr=0.5, verbose=True)

In [None]:
# hyper_model(h=8, n_layers=3, lr=0.1, verbose=True)

In [None]:
# hyper_model(h=16, n_layers=3, lr=0.1, verbose=True)

In [None]:
# hyper_model(h=16, n_layers=4, lr=0.1, verbose=True)

In [None]:
# model = hyper_model(h=32, n_layers=3, lr=0.1, verbose=True)

## Visualizing Model Performance

In [None]:
preds = torch.argmax(model(X), dim=-1)
preds.shape

In [None]:
# show all points in side-by-side subplots: model prediction vs ground truth 
fig, ax = plt.subplots(1,2, figsize=(10,4))

ax[0].scatter(X[:,0], X[:,1], c=y)
ax[1].scatter(X[:,0], X[:,1], c=preds)

ax[0].set_title('Original Spiral Classes')
ax[1].set_title('Network Predicted Spiral Classes')
plt.show()

In [None]:
((preds - y) == 0).float().mean().item()

In [None]:
# TODO: show all points as red or green for incorrect/correct classification vs ground truth 

In [None]:
# TODO: show model decision boundary

## Visualize 2D Embeddings

In [None]:
class MLPemb(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=3):
        super().__init__() # IMPORTANT
        self.h = hidden_size

        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size,2),
            nn.Linear(2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size,2),
            nn.Linear(2,output_size)
        )
    
    def forward(self, xb):
        score = self.net(xb)
        return score


In [None]:
torch.manual_seed(1337)

model = MLPemb(input_size = 2, hidden_size = 10, output_size = 3)
optim = torch.optim.AdamW(model.parameters(), lr=1e-3)

model = train_test(model, train_loader, test_loader, optim, verbose=True, epochs=2_000)

In [None]:
model

In [None]:
# empty dictionary for storing outputs of any layer
layer_outputs = {}

def get_layer_outputs(name):
    
    def hook(model, input, output):
        layer_outputs[name] = output
    
    return hook

In [None]:
model.net[-2]

In [None]:
# visualize the second to last layer output in 2D

# identify the layer we want outputs from
layer1 = model.net[2] # layer after first ReLU
layer2 = model.net[-2] # second to last layer

# register forward hook
layer1.register_forward_hook(get_layer_outputs('first_emb'))
layer2.register_forward_hook(get_layer_outputs('last_emb'))

# forward pass through model
# store outputs into layer_outputs dictionary
with torch.no_grad():
    scores = model(X)

h1 = layer_outputs['first_emb']
h2 = layer_outputs['last_emb']

preds = torch.argmax(scores, dim=-1)

# show all points in side-by-side subplots: model prediction vs ground truth 
fig, ax = plt.subplots(1,3, figsize=(15,4))

ax[0].scatter(X[:,0], X[:,1], c=y)
ax[1].scatter(X[:,0], X[:,1], c=preds)
ax[2].scatter(h2[:,0], h2[:,1], c=y)

ax[0].set_title('Original Spiral Classes')
ax[1].set_title("Network's Predicted Spiral Classes")
ax[2].set_title("Network's Transformed 2D Points")

plt.show()


In [None]:
# show all points in side-by-side subplots: model prediction vs ground truth 
fig, ax = plt.subplots(1,3, figsize=(15,4))

ax[0].scatter(X[:,0], X[:,1], c=y)
ax[1].scatter(h1[:,0], h1[:,1], c=y)
ax[2].scatter(h2[:,0], h2[:,1], c=y)

ax[0].set_title('Original Spiral Classes')
ax[1].set_title("Network's Transformed LinReLU")
ax[2].set_title("Network's Transformed LinReLUx2")

plt.show()

In [None]:
((preds - y) == 0).float().mean().item()

## Visualize Hidden Layer Outputs

(sorta)

In [None]:
class MLPuw(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__() # IMPORTANT

        self.h = hidden_size

        self.linear1 = nn.Linear(input_size, hidden_size)
        self.nonlin1 = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.nonlin2 = nn.ReLU()
        self.linear3 = nn.Linear(hidden_size, output_size)

    
    def forward(self, xb):
        xb = self.linear1(xb)
        xb = self.nonlin1(xb)
        xb = self.linear2(xb)
        xb = self.nonlin2(xb)
        xb = self.linear3(xb)
        output = xb.sigmoid()
        return output


In [None]:
# (x, y, s, r, g, b)
# (x, y, r, g, b, a)
# (x, y, r_in, r1, g1, b1, r_out, r2, g2, b2)

### Is `MLP` the same as `MLPuw` model?

In [None]:
torch.manual_seed(1337)

model = MLP(input_size = 2, hidden_size = 6, output_size = 3)
torch.manual_seed(1337)
optim = torch.optim.AdamW(model.parameters(), lr=0.1)
torch.manual_seed(1337)
model = train_test(model, train_loader, test_loader, optim, verbose=True)

In [None]:
model

In [None]:
torch.manual_seed(1337)

model = MLPuw(input_size = 2, hidden_size = 6, output_size = 3)
torch.manual_seed(1337)
optim = torch.optim.AdamW(model.parameters(), lr=0.1)
torch.manual_seed(1337)

model = train_test(model, train_loader, test_loader, optim, verbose=True)

In [None]:
model