# Exercise 2 (EE-686)
Editor: Alireza Mohammadshahi, Florian Mai

# Train a simple Neural Network

 Build a neural network with a minimun of 2 layers in order to do classification.

In [2]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
from sklearn.datasets import load_boston
import torch.utils.data as utils
import time
from sklearn.datasets import make_classification, make_regression
from torch.utils.data import Dataset
import pdb
from torch.utils.data.sampler import SubsetRandomSampler
%matplotlib inline

torch.manual_seed(1)    # reproducible

<torch._C.Generator at 0x7f44a126b7f0>

## Creating a PyTorch Dataset and DataLoaders
PyTorch provides standardized interfaces for handeling datasets and data loading during neural network training. For a detailed explanation visit [the PyTorch tutorial](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class).

One of the most important components is the abstract 'Dataset' class. It merely requires to implement the '\__getitem__' function, which returns the i-th example from the dataset for a given i, and the '\__len__' function, which returns the total number of examples in the dataset.

In [2]:
class RandomClassificationDataset(Dataset):
    """Randomly generates a classificaton dataset."""
    
    def __init__(self, n_classes = 2, n_features = 100, n_samples = 10000):
        self.X, self.y = make_classification(n_classes = n_classes, n_features = n_features, n_samples = n_samples)
        
        
    def __getitem__(self, index):
        return torch.tensor(self.X[index], dtype=torch.float), torch.tensor(self.y[index], dtype=torch.long)
            
        
    def __len__(self):
        return self.X.shape[0]
        

The other important component are 'Sampler's and 'DataLoader's. The former determines the order in which the examples appear during training, e.g., the 'RandomSampler' draws examples randomly. The latter is mainly responsible for loading and batching multiple examples based on the sampling strategy.

In the following 'get\_train\_valid\_loader', we create two subsamplers to split 10% from the training set for use as validation set.

In [3]:
def get_train_valid_loader(dataset,
                           batch_size=64,
                           random_seed = 1,
                           valid_size=0.1,
                           shuffle=True,
                           num_workers=4,
                           pin_memory=False):
    """
    Utility function for loading and returning train and valid
    multi-process iterators over a dataest.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - batch_size: how many samples per batch to load.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    # split into train and validation sets
    num_train = len(dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    return (train_loader, valid_loader)

## Creating the neural network architecture
In theory, it is possible to simply chain the basic operations that we have got to know last week to build up the entire neural network. However, it is more convenient to comply with the PyTorch framework, where every neural network (component) is implemented as a subclass of 'torch.nn.Module'. This makes it possible to reuse them again as components in other modules. 

In the example below, 'nn.Linear' and 'nn.ReLU' are such components that are used in our 'SimpleNet' architecture by instantiating them in the constructor and using them in the 'forward' function. Intuitively, the 'forward' function implements the forward pass through the network, and makes use of the 'forward' implementations of it's components.

In [4]:
class SimpleNet(torch.nn.Module):
    """
    Create a simple feedforward neural network with a single hidden layer. 
    Takes as input a tensor of size [batch_size, n_feature] and returns a tensor of size [batch_size, n_output].
    
    Params
    ------
    - n_feature: Size of the input data.
    - n_hidden: Number of hidden units.
    - n_output: Number of output classes.
    """
    def __init__(self, n_feature, n_hidden, n_output):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(n_feature, n_hidden)
        self.relu = nn.ReLU()
        self.fcout = nn.Linear(n_hidden,n_output)

    def forward(self, x):
        x = x.view(x.size(0),-1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fcout(x)
        return x

## Training procedure
Because PyTorch provides convenient functionality for doing the forward and backward pass, the core of the training loop consists of 
* a forward pass to get the predictions
* computing the error/loss
* a backward pass, i.e., computing the gradients via back-propagation
* applying an optimizer step.

In PyTorch, each of these steps is a one-liner, as you can see in the example below. However, before doing the backward() step, it is important to call optimizer.zero_grad(), because otherwise the gradients from previous iterations will be retained, which is undesirable in the usual case.

The remainder of the code is for adjusting the learning rate (if needed) and tracking the validation set performance.

In [5]:
def train(learning_rate, optimizer, loss_func, net, trainloader, valloader, 
          compute_correct_outputs, 
          epochs = 30,
          update_lr = 4,
          decay_rate = 0.8,
          print_every = 20):
    
    def adjust_learning_rate(lr,update_lr, optimizer, epoch):
        lr = lr * (decay_rate ** (epoch // update_lr))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    net.to(device)
    best_val = 0.0
    steps = 0

    for e in range(epochs):
        adjust_learning_rate(learning_rate,update_lr,optimizer,e)
        start = time.time()
        for data, labels in iter(trainloader):
            steps += 1
            data = data.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = net(data)
            loss = loss_func(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if steps % print_every == 0:
                    stop = time.time() 
                    # Test accuracy
                    net.eval()
                    correct = 0
                    total = 0
                    with torch.no_grad():
                        for data in valloader:
                            images, labels = data
                            images = images.to(device)
                            labels = labels.to(device)
                            outputs = net(images)
                            total += labels.size(0)
                            correct += compute_correct_outputs(outputs, labels)
                            
                        accuracy = 100.0 * correct / total
                        print('Accuracy of the network on the %d val data: \
                        %f %%' % (total,accuracy))
                        if (accuracy > best_val):
                            best_val = accuracy
                            torch.save(net.state_dict(),'model.ckpt')

                    start = time.time()

In [6]:
def compute_correct_classification(outputs, labels):
    _, predicted = torch.max(F.softmax(outputs).data, 1)
    correct = (predicted == labels).sum().item()
    return correct

## Configuring the optimizer
Finally, it remains to choose an appropriate loss function and optimizer.
The 'CrossEntropyLoss' module expects the class labels and the predicted logits as input, applies softmax to it, and returns the negative loglikelihood loss.

In [7]:
# create the dataset and loaders
dataset = RandomClassificationDataset()
trainloader, valloader = get_train_valid_loader(dataset)

# instantiate the neural net
net = SimpleNet(n_feature=100, n_hidden=10, n_output=2)     # define the network
print(net)  # net architecture

# define loss and optimizer
learning_rate = 1.5e-4
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)  # Choose the optimizer you want and tune its hyperparameter
loss_func = torch.nn.CrossEntropyLoss()  # the target label is NOT an one-hotted

SimpleNet(
  (fc1): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
  (fcout): Linear(in_features=10, out_features=2, bias=True)
)


In [8]:
train(learning_rate, optimizer, loss_func, net, trainloader, valloader, compute_correct_classification)

cpu
Accuracy of the network on the 1000 val data:                         50.300000 %


  


Accuracy of the network on the 1000 val data:                         50.800000 %
Accuracy of the network on the 1000 val data:                         51.600000 %
Accuracy of the network on the 1000 val data:                         52.300000 %
Accuracy of the network on the 1000 val data:                         52.600000 %
Accuracy of the network on the 1000 val data:                         53.100000 %
Accuracy of the network on the 1000 val data:                         53.400000 %
Accuracy of the network on the 1000 val data:                         54.400000 %
Accuracy of the network on the 1000 val data:                         55.800000 %
Accuracy of the network on the 1000 val data:                         57.300000 %
Accuracy of the network on the 1000 val data:                         58.500000 %
Accuracy of the network on the 1000 val data:                         59.600000 %
Accuracy of the network on the 1000 val data:                         61.600000 %
Accuracy of the 

Accuracy of the network on the 1000 val data:                         92.400000 %
Accuracy of the network on the 1000 val data:                         92.400000 %
Accuracy of the network on the 1000 val data:                         92.500000 %
Accuracy of the network on the 1000 val data:                         92.500000 %
Accuracy of the network on the 1000 val data:                         92.500000 %
Accuracy of the network on the 1000 val data:                         92.500000 %
Accuracy of the network on the 1000 val data:                         92.600000 %
Accuracy of the network on the 1000 val data:                         92.600000 %
Accuracy of the network on the 1000 val data:                         92.500000 %
Accuracy of the network on the 1000 val data:                         92.600000 %
Accuracy of the network on the 1000 val data:                         92.600000 %
Accuracy of the network on the 1000 val data:                         92.700000 %
Accuracy of the 

Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         93.000000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         92.900000 %
Accuracy of the network on the 1000 val data:                         93.000000 %


After training, the model should be saved to be tested on the test dataset or to be used in a real-life application. To save a model in pytorch:

In [9]:
torch.save(net.state_dict(), 'model.ckpt')

To load a pretrained model:

In [10]:
checkpoint = torch.load("model.ckpt")
net.load_state_dict(checkpoint)

# Exercise
Your task is to implement a feed forward neural network to classify an image dataset. The dataset contains 10 classes of images, and you should write a simple neural network to classify it with a reasonable accuracy.

## Helper functions:

We give you the helper function to load and preprocess the dataset, so you should just write your network and the training procedure.

In [5]:
from torchvision import datasets,transforms

def get_train_valid_loader(data_dir='../data',
                           batch_size=64,
                           augment=False,
                           random_seed = 1,
                           valid_size=0.1,
                           shuffle=True,
                           show_sample=False,
                           num_workers=4,
                           pin_memory=False):
    """
    Utility function for loading and returning train and valid
    multi-process iterators over the CIFAR-10 dataset. A sample
    9x9 grid of the images can be optionally displayed.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - augment: whether to apply the data augmentation scheme
      mentioned in the paper. Only applied on the train split.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=9, shuffle=shuffle,
            num_workers=num_workers, pin_memory=pin_memory,
        )
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy().transpose([0, 2, 3, 1])
        plot_images(X, labels)

    return (train_loader, valid_loader)

trainloader, valloader = get_train_valid_loader()

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


100%|█████████▉| 170418176/170498071 [02:45<00:00, 1118278.62it/s]

Files already downloaded and verified


170500096it [03:00, 1118278.62it/s]                               

# The end