<a href="https://colab.research.google.com/github/ZhiCLiu/DS-WorkShop/blob/main/CIFAR10_Walkthrough_mitGPU_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch 
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
from torch.optim import SGD, Adam
import matplotlib.pyplot as plt

import torchvision
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms

In [None]:
transform =transforms.Compose([transforms.ToTensor()])

In [None]:
train_dataset = CIFAR10(root='data/', train=True, download=True, transform=transform)
validation_dataset = CIFAR10(root='data/', train=False, download=True, transform=transform)

In [None]:
sample_image = train_dataset[19][0]
plt.imshow(sample_image.permute(1, 2, 0));

In [None]:
image1 = transforms.RandomHorizontalFlip()(sample_image)
plt.imshow(image1.permute(1, 2, 0));

In [None]:
image2 = transforms.RandomVerticalFlip()(sample_image)
plt.imshow(image2.permute(1, 2, 0));

In [None]:
image3 = transforms.RandomRotation(45)(sample_image)
plt.imshow(image3.permute(1, 2, 0));

In [None]:
image4 = transforms.RandomAffine(10)(sample_image)
plt.imshow(image4.permute(1, 2, 0));

In [None]:
image5 = transforms.RandomCrop(32,32)(sample_image)
plt.imshow(image5.permute(1, 2, 0));

In [None]:
train_transform =transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.RandomVerticalFlip(),
                                     transforms.RandomCrop(32),
                                     transforms.ToTensor(), 
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                     ])

In [None]:
validation_transform =transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [None]:
train_dataset = CIFAR10(root='data/', train=True, download=True, transform=train_transform)
validation_dataset = CIFAR10(root='data/', train=False, download=True, transform=validation_transform)

In [None]:
train_dl = DataLoader(train_dataset, batch_size= 32, shuffle=True, num_workers=2)
validation_dl = DataLoader(validation_dataset, batch_size= 32, num_workers=2)

In [None]:
for xb,yb in train_dl:
  print(xb.shape)
  print(yb.shape)
  break

In [None]:
def denorm(images, means=0.5, stds=0.5):
  images_standard = (images*means)+stds
  return images_standard

In [None]:
print(train_dataset.classes)
print(len(train_dataset.classes), " classes")
print(len(train_dataset))
print(len(validation_dataset))

In [None]:
grid_img = torchvision.utils.make_grid(denorm(xb), nrow=8)
plt.figure(figsize=(12,12))
plt.imshow(grid_img.permute(1, 2, 0));

In [None]:
class MyCNN(nn.Module):
  def __init__(self, input_channels=3, output_shape=10):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
    self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
    
    self.flatten = nn.Flatten()
    self.fc = nn.Linear(4096, 10)

    self.relu = nn.ReLU()

  def forward(self, xb):
    x = self.relu(self.conv1(xb))
    x = self.pool1(x)

    x = self.relu(self.conv2(x))
    x = self.pool2(x)

    x = self.flatten(x)
    out = self.fc(x)
    return out

In [None]:
def get_accuracy(scores, labels):
  softmax_scores = F.softmax(scores, dim=1)
  _, predictions = torch.max(softmax_scores, dim=1)
  acc = torch.sum(predictions == labels).item()/len(labels)
  return acc

In [None]:
def get_device():
  if torch.cuda.is_available():
    return torch.device('cuda')
  else:
    return torch.device('cpu')

In [None]:
device = get_device()
print(device)

In [None]:
lr = 0.001
num_epochs = 10
model = MyCNN().to(device)

loss_fn = F.cross_entropy
opt = SGD(model.parameters(),lr=lr)

In [None]:
for epoch in range(num_epochs):
  model.train()
  for xb, yb in train_dl:
    xb = xb.to(device)
    yb = yb.to(device)
    outputs = model(xb)
    loss = loss_fn(outputs, yb)
    opt.zero_grad()
    loss.backward()
    opt.step()

  model.eval()
  batch_accuracies =[]
  for xb, yb in validation_dl:
    xb = xb.to(device)
    yb = yb.to(device)
    outputs = model(xb)
    batch_accuracy = get_accuracy(outputs, yb)
    batch_accuracies.append(batch_accuracy)

  epoch_accuracy = torch.tensor(batch_accuracies).mean().item()
  print(f"Epoch: {epoch+1} \t Accuracy: {epoch_accuracy}")

***Exercise***

Now, you need to define your own CNN architecture and train it for 10 epochs on the GPU. Campare the performance on the validation dataset to select between the SGD optimizer and the Adam optimizer.

A suggestion is:
Layer 1: Conv2D with 3 input_channels, 32 output_channels
Layer 2: Conv2D 32 input_chanels, 64, output_channels
Layer 3: MaxPooling

Layer 4: Conv2D with 64 input_channels, 128 output_channels
Layer 5: Conv2D with 128 input_channels, 256 output_channels
Layer 6: MaxPooling

Layer 7: Flatten
Layer 8: Linear Layer with 8192 inputs dimension, 512 output dimension
Layer 9: Linear Layer with 512 D input and 10 D output

All Conv and Linear Layers need a relu activation, except for Layer 9.
The kernel size is 3, stide is 1 and padding is 1 for all Conv layers.
No activation directly after the Maxpool layers, and they can have a stride of 2 and a filter size of 2 (ie, 2,2)