# Setup

In [None]:
import random
import torch
from torch import nn
import math

from plot_lib import plot_data, plot_model
from matplotlib import pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Classification

## Create Dataset

In [None]:
# Set seeds and dimensions of vectors/layers
seed = 12345
random.seed(seed)
torch.manual_seed(seed)
N = 1000  # num_samples_per_class
D = 2  # dimensions
C = 3  # num_classes
H = 100  # num_hidden_units


X = torch.zeros(N * C, D).to(device)
y = torch.zeros(N * C, dtype=torch.long).to(device)
for c in range(C):
    index = 0
    t = torch.linspace(0, 1, N)
    # When c = 0 and t = 0: start of linspace
    # When c = 0 and t = 1: end of linpace
    # This inner_var is for the formula inside sin() and cos() like sin(inner_var) and cos(inner_Var)
    inner_var = torch.linspace(
        # When t = 0
        (2 * math.pi / C) * (c),
        # When t = 1
        (2 * math.pi / C) * (2 + c),
        N
    ) + torch.randn(N) * 0.2
    
    for ix in range(N * c, N * (c + 1)):
        X[ix] = t[index] * torch.FloatTensor((
            math.sin(inner_var[index]), math.cos(inner_var[index])
        ))
        y[ix] = c
        index += 1

print("Shapes:")
print("X:", tuple(X.size()))
print("y:", tuple(y.size()))


# visualise the data
plot_data(X, y)

## Build and Run Classifiers

In [None]:
learning_rate = 1e-3
lambda_l2 = 1e-5

def build_classifier(model: nn.Sequential) -> None:
    model.to(device) #Convert to CUDA

    # nn package also has different loss functions.
    # we use cross entropy loss for our classification task
    criterion = torch.nn.CrossEntropyLoss()

    # we use the optim package to apply
    # stochastic gradient descent for our parameter updates
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2

    # Training
    for t in range(1000):
        
        # Feed forward to get the logits
        y_pred = model(X)
        
        # Compute the loss and accuracy
        loss = criterion(y_pred, y)
        score, predicted = torch.max(y_pred, 1)
        acc = (y == predicted).sum().float() / len(y)
        
        # zero the gradients before running the backward pass.
        optimizer.zero_grad()
        
        # Backward pass to compute the gradient
        # of loss w.r.t our learnable params. 
        loss.backward()
        
        # Update params
        optimizer.step()

    print(f"NUM EPOCHS: {t}, LOSS: {loss.item():.6f}, ACCURACY: {acc:.3f}")
    # Plot trained model
    plot_model(X, y, model)

### Linear model

In [None]:
# nn package to create our linear model each Linear module has a weight and bias
linear_model = nn.Sequential(
    nn.Linear(D, H),
    nn.Linear(H, C)
)

build_classifier(linear_model)

### Add non-linearity

In [None]:
nonlinear_model = nn.Sequential(
    nn.Linear(D, H),
    nn.ReLU(),  # Note non-linearity added here
    nn.Linear(H, C)
)

build_classifier(nonlinear_model)

# Regression

## Create Dataset

In [None]:
seed = 1
random.seed(seed)
torch.manual_seed(seed)
N = 1000  # num_samples_per_class
D = 1  # dimensions
C = 1  # num_classes
H = 100  # num_hidden_units


X = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1).to(device)
y = X.pow(3) + 0.3 * torch.rand(X.size()).to(device)

print("Shapes:")
print("X:", tuple(X.size()))
print("y:", tuple(y.size()))

plt.scatter(X.cpu().numpy(), y.cpu().numpy())
plt.axis('equal');

## Build and Run regressors

In [None]:

def build_regressor(model: nn.Sequential) -> None:

    model.to(device) # Convert to CUDA

    # nn package also has different loss functions.
    # we use MSE loss for our regression task
    criterion = torch.nn.MSELoss()

    # we use the optim package to apply
    # stochastic gradient descent for our parameter updates
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2

    # Training
    for t in range(1000):
        
        # Feed forward to get the logits
        y_pred = model(X)
        
        # Compute the loss (MSE)
        loss = criterion(y_pred, y)
        
        # zero the gradients before running
        # the backward pass.
        optimizer.zero_grad()
        
        # Backward pass to compute the gradient
        # of loss w.r.t our learnable params. 
        loss.backward()
        
        # Update params
        optimizer.step()
    
    print(f"NUM EPOCHS: {t}, LOSS or MSE: {loss.item():.6f}")
    plt.scatter(X.data.cpu().numpy(), y.data.cpu().numpy())
    plt.plot(X.data.cpu().numpy(), y_pred.data.cpu().numpy(), 'r-', lw=5)
    plt.axis('equal');

### Linear model

In [None]:
model = nn.Sequential(
    nn.Linear(D, H),
    nn.Linear(H, C)
)

build_regressor(model)

### Adding non-linearity

In [None]:
model = nn.Sequential(
    nn.Linear(D, H),
    nn.ReLU(),
    nn.Linear(H, C)
)

build_regressor(model)