# Exploring CNN - Fine Tuning
## Improving CNN Performances

Exploring:
 - Data Augmentation
 - Hyperparamenter Tuning - Learning Rate

 Helper Functions Code Credit - Udacity DL Nanodegree Program

---
## Data Augmentation

Here we write two functions that create appropriate transforms for the training, validation and test dataset, and then create the relative dataloaders.

As usual, complete the code in the sections marked with `# YOUR CODE HERE`.

In [3]:
import torchvision.transforms as T
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler
import random
import multiprocessing
from helpers import get_train_val_data_loaders, get_test_data_loader
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')

# Let's write a function that gives us the transforms so we can optimize the hyperparameters
def get_transforms(rand_augment_magnitude):

    # These are the per-channel mean and std of CIFAR-10 over the dataset
    mean = (0.49139968, 0.48215827, 0.44653124)
    std = (0.24703233, 0.24348505, 0.26158768)

    # Define our transformations
    return {
        "train": T.Compose(
            [
                # All images in CIFAR-10 are 32x32. We enlarge them a bit so we can then
                # take a random crop
                T.Resize(40),
                
                # take a random part of the image
                T.RandomCrop(32),
                
                # Horizontal flip is not part of RandAugment according to the RandAugment
                # paper
                T.RandomHorizontalFlip(0.5),
                
                # Use RandAugment
                # RandAugment has 2 main parameters: how many transformations should be
                # applied to each image, and the strength of these transformations. This
                # latter parameter should be tuned through experiments: the higher the more
                # the regularization effect.
                # Setup a T.RandAugment transformation using 2 as num_opts, and the
                # rand_augment_magnitude input parameter as magnitude. 
                # Use T.InterpolationMode.BILINEAR as interpolation. Look at the pytorch
                # manual if needed: 
                # https://pytorch.org/vision/main/generated/torchvision.transforms.RandAugment.html
                
                # YOUR CODE HERE
                T.RandAugment(
                    num_ops=2,
                    magnitude=rand_augment_magnitude,
                    interpolation=T.InterpolationMode.BILINEAR,
                ),
                T.ToTensor(),
                T.Normalize(mean, std),
            ]
        ),
        "valid": T.Compose(
            [
                # Both of these are useless, but we keep them because
                # in a non-academic dataset you will need them
                T.Resize(32),
                T.CenterCrop(32),
                
                # Convert to tensor and apply normalization:
                
                # YOUR CODE HERE
                T.ToTensor(),
                T.Normalize(mean, std),
            ]
        ),
        # Identical to the valid set in this case
        "test": T.Compose(
            [
                T.Resize(32),
                T.CenterCrop(32),
                
                # Convert to tensor and apply normalization:
                
                # YOUR CODE HERE
                T.ToTensor(),
                T.Normalize(mean, std),
            ]
        ),
    }


def get_data_loaders(batch_size, valid_size, transforms, num_workers, random_seed=42):
    
    # Reseed random number generators to get a deterministic split. This is useful
    # when comparing experiments, so you'll know they all run on the same data.
    # In principle you should repeat this a few times (cross validation) to see
    # the variability of your measurements, but we won't do this here for simplicity
    torch.manual_seed(random_seed)
    random.seed(random_seed)
    np.random.seed(random_seed)
    
    # Get the CIFAR10 training dataset from torchvision.datasets and set the transforms
    # We will split this further into train and validation in this function
    train_data = datasets.CIFAR10("data", train=True, download=True, transform=transforms['train'])
    valid_data = datasets.CIFAR10("data", train=True, download=True, transform=transforms['valid'])

    # Compute how many items we will reserve for the validation set
    n_tot = len(train_data)
    split = int(np.floor(valid_size * n_tot))

    # compute the indices for the training set and for the validation set
    shuffled_indices = torch.randperm(n_tot)
    train_idx, valid_idx = shuffled_indices[split:], shuffled_indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # prepare data loaders (combine dataset and sampler)
    # NOTE that here we use train_data for the train dataloader but valid_data
    # for the valid_loader, so the respective transforms are applied
    train_loader = torch.utils.data.DataLoader(
        train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers
    )
    
    test_data = datasets.CIFAR10("data", train=False, download=True, transform=transforms['test'])
    test_loader = torch.utils.data.DataLoader(
        test_data, batch_size=batch_size, num_workers=num_workers
    )
    
    return {'train': train_loader, 'valid': valid_loader, 'test': test_loader}

# specify the image classes
classes = [
    "airplane",
    "automobile",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
]

# Model Definition

Here we use a model very similar to the one we used before, but we add Batch Normalization that makes our training faster and more robust, and also allows us to go deeper.

In [4]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):
    def __init__(self, n_classes=10):

        super(Net, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1),
            # Add batch normalization (BatchNorm2d) here
            # YOUR CODE HERE
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(16, 32, 3, padding=1),  # -> 32x16x16
            # Add batch normalization (BatchNorm2d) here
            # YOUR CODE HERE
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # -> 32x8x8
            
            nn.Conv2d(32, 64, 3, padding=1),  # -> 64x8x8
            # Add batch normalization (BatchNorm2d) here
            # YOUR CODE HERE
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # -> 64x4x4
            
            # Since we are using BatchNorm and data augmentation,
            # we can go deeper than before and add one more conv layer
            nn.Conv2d(64, 128, 3, padding=1),  # -> 128x4x4
            # Add batch normalization (BatchNorm2d) here
            # YOUR CODE HERE
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # -> 128x2x2
            
            nn.Flatten(),  # -> 1x64x4x4
            
            nn.Linear(128 * 2 * 2, 500),  # -> 500
            nn.Dropout(0.5),
            # Add batch normalization (BatchNorm1d, NOT BatchNorm2d) here
            # YOUR CODE HERE
            nn.BatchNorm1d(500),
            nn.ReLU(),
            nn.Linear(500, n_classes),
        )

    def forward(self, x):
        
        # Just call the model on x here:
        # YOUR CODE HERE
        return self.model(x)


# create a complete CNN
model = Net()
