In [None]:
# Useful starting lines
%matplotlib inline

import numpy as np
import torch
import matplotlib.pyplot as plt

from helper import generate_dataset, visualize_one_dataset, visualize_datasets, predict_grid, visualize_predictions
from torch.utils import data

%load_ext autoreload
%autoreload 2

# Generate data

We provide a helper function which generates artificial datasets

In [None]:
NUM_SAMPLES = 100

blobs_train, blobs_test = generate_dataset("blobs", NUM_SAMPLES)
moons_train, moons_test = generate_dataset("moons", NUM_SAMPLES)
xor_train, xor_test = generate_dataset("xor", NUM_SAMPLES)
squares_train, squares_test = generate_dataset("bar", NUM_SAMPLES)

# The generate_dataset function returns PyTorch dataset objects
type(blobs_train), type(blobs_test)

Let us plot the train and the test data sets. Note the differences between the two.

In [None]:
datasets = [
    ("Blobs", blobs_train, blobs_test),
    ("Moons", moons_train, moons_test),
    ("Bar", squares_train, squares_test),
    ("XOR", xor_train, xor_test)
]

visualize_datasets(datasets)

# Optimizing using PyTorch

Write an optimizer in PyTorch by taking using its default SGD class

In [None]:
def optimize(train_data, test_data, model, loss_fn = torch.nn.CrossEntropyLoss(), lr = 0.1):
    """
    Stochastic Gradient Descent optimizer
    
    :param train_data: torch.utils.data.dataset.Subset
    :param test_data: torch.utils.data.dataset.Subset
    :param model: torch.nn.Module (see https://pytorch.org/docs/stable/nn.html)
    :param loss_fn: torch.nn.modules.loss (see https://pytorch.org/docs/stable/nn.html#id51)
    :param lr: float, learning rate
    
    :return:
    - objectives, a list of loss values on the test dataset, collected at the end of each pass over the dataset (epoch)
    """
    # defatult pytorch functions which are useful for loading testing and training data
    train_loader = data.DataLoader(train_data, batch_size=10, shuffle=True)
    test_loader = data.DataLoader(test_data, batch_size=NUM_SAMPLES)
    losses = []
        
    # ***************************************************
    # INSERT YOUR CODE HERE
    # TODO: Define SGD optimizer with learning rate = lr
    # HINT: Use torch.optim.SGD and model.parameters()
    # ***************************************************
    optimizer = ?
    
    # Run SGD
    for epoch in range(1000):
        for minibatch, label in train_loader:
            
            optimizer.zero_grad() # Zeroes the previously computed gradients
            
            # ***************************************************
            # INSERT YOUR CODE HERE
            # TODO: prediction on minibatch
            # HINT: Use model.forward
            # ***************************************************
            prediction = ?
            
            # ***************************************************
            # INSERT YOUR CODE HERE
            # TODO: compute the loss on prediction
            # HINT: Use loss_fn
            # ***************************************************
            loss = ?
            
            # ***************************************************
            # INSERT YOUR CODE HERE
            # TODO: compute the minibatch gradient
            # HINT: Use loss.backward!
            # ***************************************************
            
            
            # ***************************************************
            # INSERT YOUR CODE HERE
            # TODO: perform an SGD step
            # HINT: Use optimizer.step!
            # ***************************************************
            
            
        # Compute the test loss
        for minibatch, label in test_loader:
            # we let torch know that we dont intend to call .backward
            with torch.no_grad():
                # ***************************************************
                # INSERT YOUR CODE HERE
                # TODO: compute the test prediction and test loss
                # ***************************************************
                loss = ?
                
                losses.append(loss.item())
                
                # Print the test loss to monitor progress
                if epoch % 100 == 0:
                    print(epoch, loss.item())
                
    return losses

# RBF Kernel

An RBF kernel is the most commonly used `out of the box` kernel method for non-linear data. Intuitively, an RBF-kernel blurs the training data and uses this for classification i.e. the individual green and blue points above get blurred to make green and blue regions, which are used to make predictions. A critical parameter `sigma` defines the width of this blurring---large `sigma` results in more blurring.

See [here](https://github.com/epfml/ML_course/blob/master/lectures/07/lecture07b_kernelRidge.pdf) for more information on the `kernel trick` and [here](https://www.cs.huji.ac.il/~shais/Lectures2014/lecture8.pdf) for an indepth mathematical treatment. Here, we will try develop an intuition for the RBF kernel.

In [None]:
class RadialBasisFunction(torch.nn.Module):
    def __init__(self, sigma=0.1):
        super().__init__()
        self.gamma = 1 / (2 * sigma ** 2)
        self.num_classes = 2
        self.name = 'RBF'
    
    def init_params(self, train_data):
        # data reshaping to do torch broadcasting magic
        data_matrix = train_data.dataset.tensors[0][train_data.indices, :]
        self.data_matrix = data_matrix.t().view(1, *data_matrix.t().shape)
        
        # ***************************************************
        # INSERT YOUR CODE HERE
        # TODO: initialize parameters alpha to 0
        # HINT: use torch.zeros
        # ***************************************************
        zeros = ?
        
        self.alpha = torch.nn.Parameter(zeros)

    def forward(self, minibatch):
        minibatch = minibatch.view(*minibatch.shape, 1)
        K = torch.exp(
            -self.gamma * torch.sum((self.data_matrix - minibatch) ** 2, dim=1, keepdim=True)
        ).squeeze()
        return K @ self.alpha


Try you code on the blobs data set. Your test loss should be around 0.01 by the end.

In [None]:
rbf_model = RadialBasisFunction(0.5)
rbf_model.init_params(blobs_train)
rbf_blob_losses = optimize(blobs_train, blobs_test, rbf_model)

Plot the training data points and the predictions made

In [None]:
ax = plt.gca()
predict_grid(rbf_model, ax)
visualize_one_dataset(blobs_train, ax)

## Effect of alpha

Plot the results while varying the value of alpha in [0.1, 0.5, 1]. What do you observe? Which is the best value?


In [None]:
rbf_model = RadialBasisFunction(0.1)
visualize_predictions(datasets, rbf_model, optimize)

# Neural Networks

We will create a simple 2 layer neural network using the default functions provided by PyTorch

In [None]:
class NeuralNetwork(torch.nn.Sequential):
    def __init__(self, hidden_layer_size):
        self.name = 'NN'
        self.num_classes = 2
        # ***************************************************
        # INSERT YOUR CODE HERE
        # TODO: Define your neural network model with ReLU
        # HINT: Use torch.nn.Sequential and torch.nn.ReLU
        # ***************************************************
        super().__init__(
            ?
        )
        
    def init_params(self, train_data):
        ''' No need to do anything since it is taken care of by torch.nn.Sequential'''
    
        

Plot the results while varying the size of the hidden layer in [20, 200, 1000]

What do you observe?

In [None]:
nn_model = NeuralNetwork(200)
visualize_predictions(datasets, nn_model, optimize)

Also try increase the number of layers. How does this effect the classifier learnt?

[This](https://playground.tensorflow.org/) is a cool website where you can play around more with training of neural networks on toy datasets.