# Image Classification with Convolution Neural Network (CNN) Assignment

1. Construct Kaggle Dataset 
2. Construct a simple CNN
3. Set hyperparameters (optimizer, criterion, num epochs)
4. Write train / validate code

In [1]:
# Import libraries to use for Deep Learning 

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torchvision import datasets, transforms
from torchvision.io import read_image
from torchsummary import summary
import pandas as pd
from PIL import Image
import os 

import cv2 as cv2

In [2]:
!pip install gdown && gdown 'https://drive.google.com/uc?id=1rctM1HDoc24XOcRzsYyTSavaFrvuoKZc' && unzip ./archive.zip -d ./sports

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ./sports/train/rings/058.jpg  
  inflating: ./sports/train/rings/059.jpg  
  inflating: ./sports/train/rings/060.jpg  
  inflating: ./sports/train/rings/061.jpg  
  inflating: ./sports/train/rings/062.jpg  
  inflating: ./sports/train/rings/063.jpg  
  inflating: ./sports/train/rings/064.jpg  
  inflating: ./sports/train/rings/065.jpg  
  inflating: ./sports/train/rings/066.jpg  
  inflating: ./sports/train/rings/067.jpg  
  inflating: ./sports/train/rings/068.jpg  
  inflating: ./sports/train/rings/069.jpg  
  inflating: ./sports/train/rings/070.jpg  
  inflating: ./sports/train/rings/071.jpg  
  inflating: ./sports/train/rings/072.jpg  
  inflating: ./sports/train/rings/073.jpg  
  inflating: ./sports/train/rings/074.jpg  
  inflating: ./sports/train/rings/075.jpg  
  inflating: ./sports/train/rings/076.jpg  
  inflating: ./sports/train/rings/077.jpg  
  inflating: ./sports/train/rings/078.jpg  
  inflating

#1. (Assignment) Construct Kaggle Dataset

- Please construct custom dataset dealt in the class.
- Do not use `torch.utils.data.ImageFolder`.
- The structure of Custom Dataset follows 
- Tips) use `sports.csv` files to get data. (it contains filepath, labels and which dataset each data belongs to)
- Tips) use `class_dict.csv` to get the index of each class - numeric values, not string.
- Tips) there are some grayscale (1-channel) images. I recommend to use `cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)` to make it 3-channel image.

```
class CustomDataset(torch.utils.data.Dataset):
    # Inherit torch.utils.data.Dataset class

    def __init__(self,):
        # Initialize the dataset (handling data paths, check input and target data, data augmentation, etc.)

    def __len__(self):
        # Return the number of data or sample in dataset 
    
    def __getitem__(self, index):
        # Return the input and target by index
```


In [3]:
### PLEASE WRITE YOUR CODE BELOW.

class CustomDataset(Dataset):

    def __init__(self, csv_data, csv_target, root, set, *, transform = None):
        # Initialize the dataset (handling data paths, check input and target data, data augmentation, etc.)
        all_data = pd.read_csv(csv_data)
        self.dataset = all_data[all_data["data set"] == set]
        self.target = pd.read_csv(csv_target)
        self.root = root
        self.transform = transform
        

    def __len__(self):
        # Return the number of data or sample in dataset 
        return len(self.dataset)
    def __getitem__(self, index):
        # Return the input and target by index
        row = self.dataset.iloc[index]
        path = os.path.join(self.root, row[0])
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transform:
            img = self.transform(img)
        target = self.target[self.target["class"]==row[1]]["class_index"].values[0]
        return img, int(target)

### END OF THE CODE.

In [4]:
### PLEASE WRITE YOUR CODE BELOW.

train_dataset = CustomDataset("sports/sports.csv","sports/class_dict.csv","sports","train", transform=transforms.ToTensor())
valid_dataset = CustomDataset("sports/sports.csv","sports/class_dict.csv","sports","valid", transform=transforms.ToTensor())
test_dataset = CustomDataset("sports/sports.csv","sports/class_dict.csv","sports","test", transform=transforms.ToTensor())

### YOU CAN USE ANY TRANSFORMS YOU WANT. MAKE IT RUNNABLE!

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

### END OF THE CODE.

##2. (Assignment) Construct a network - Simple CNN

- Please construct 4 convolution blocks with following sequences.


```
first layer = [2D Conv -> BatchNorm -> ReLU -> Dropout -> Pooling]

- 2D Convolution with 3x3 kernel size, returns output dimension of 16, use stride and padding = 1.
- use whatever pooling you want with 2x2 kernel size and stride of 2.

second layer = [2D Conv -> BatchNorm -> ReLU -> Dropout -> Pooling]

- 2D Convolution with 3x3 kernel size, returns output dimension of 32, use stride and padding = 1.
- use whatever pooling you want with 2x2 kernel size and stride of 2.

thrid layer = [2D Conv -> BatchNorm -> ReLU -> Dropout -> Pooling]

- 2D Convolution with 3x3 kernel size, returns output dimension of 64, use stride and padding = 1.
- use whatever pooling you want with 2x2 kernel size and stride of 2.

fourth layer = [2D Conv -> BatchNorm -> ReLU -> Dropout -> Pooling]

- 2D Convolution with 3x3 kernel size, returns output dimension of 128, use stride and padding = 1.
- use whatever pooling you want with 2x2 kernel size and stride of 2.

classifier = [Linear -> ReLU -> Linear]

- flatten the output tensor.
- first linear layer returns output dimension of 5012
- second linear layer returns output dimension of number of classes

```


In [5]:
### PLEASE WRITE YOUR CODE BELOW.

class SimpleCNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.AvgPool2d(2,2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.AvgPool2d(2,2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.AvgPool2d(2,2))
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(0.8),
            nn.AvgPool2d(2,2))
        self.classifier = nn.Sequential(
            nn.Linear(25088, 5012),
            nn.ReLU(),
            nn.Linear(5012, num_classes)
        )


    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

### END OF THE CODE.

In [6]:
model = SimpleCNN(in_channels=3, num_classes=100).cuda()
summary(model, (3, 224, 224), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             448
       BatchNorm2d-2         [-1, 16, 224, 224]              32
              ReLU-3         [-1, 16, 224, 224]               0
           Dropout-4         [-1, 16, 224, 224]               0
         AvgPool2d-5         [-1, 16, 112, 112]               0
            Conv2d-6         [-1, 32, 112, 112]           4,640
       BatchNorm2d-7         [-1, 32, 112, 112]              64
              ReLU-8         [-1, 32, 112, 112]               0
           Dropout-9         [-1, 32, 112, 112]               0
        AvgPool2d-10           [-1, 32, 56, 56]               0
           Conv2d-11           [-1, 64, 56, 56]          18,496
      BatchNorm2d-12           [-1, 64, 56, 56]             128
             ReLU-13           [-1, 64, 56, 56]               0
          Dropout-14           [-1, 64,

##3. (Assignment) Set hyperparameters

- Set the total number of epochs to be 50 and the learning rate to be 0.001.

- Use any optimizers you want. Please refer [here](https://pytorch.org/docs/stable/optim.html) for furter details.
    - Remember different optimizers have different hyperparameters.
- Set the loss function to be cross entropy loss.

In [7]:
### PLEASE FILL OUT THE HYPERPARAMETERS
### NOTE THAT YOU SHOULD SET DIFFERENT PARAMETERS FOR DIFFERENT OPTIMIZERS.

lr = 0.001
epochs = 50

## OPTIMIZER HYPERPARAMETERS - PLEASE ADD/REMOVE DEPENDS ON OPTIMIZER.
betas = (0.9, 0.999)

## WHEN USING GPU, PUT `.cuda()` on model and criterion.

model = SimpleCNN(3, 100).cuda()
optimizer = torch.optim.Adam(params = model.parameters(), lr=lr, betas = betas)
criterion = torch.nn.CrossEntropyLoss().cuda()

##4. (Assignment) Write train / validation code

- For each epoch, we train and validate the model.
- Note that the validation dataset is not included in test set. 
- Please refer to the following procedure:


    for each epoch:
        model.train()
        get input and target data from train loader
        
        optmizer.zero_grad()             # reset the gradient 
        pred = model(input)

        loss = criterion(pred, target)   # compute the loss
        loss.backward()                  # backprop
        optimizer.step()                 # update the model weights

        model.eval()                     # set the evaluation mode (turn off batchnorm, dropout)
        with torch.no_grad():
            get the input and target data from validation loader

            pred = model(input)
            compute the validation loss  # Optional 
            calculate the validation accuracy
            save the model w.r.t. validation accuracy



In [8]:
def train(model, optimizer, criterion, data_loader, epoch):
    model.train()
    total_loss = 0.0
    for idx, batch in enumerate(data_loader):
        img, target = batch[0].cuda(), batch[1].cuda()

        ### PLEASE WRITE YOUR CODE BELOW.
        # Initialize the optimizer
        optimizer.zero_grad()
        # Make a prediction
        output = model(img)
        # Calculate loss with prediction and target
        loss = criterion(output, target)
        # Compute the gradient
        loss.backward()
        # Update Parameters
        optimizer.step()
        ### END OF THE CODE.

        total_loss += loss.item() 

        if idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, idx * img.size(0), len(data_loader.dataset),
                100. * idx * img.size(0) / len(data_loader.dataset), 
                loss.data))

    return total_loss / len(data_loader)

def validate(model, criterion, data_loader):
    model.eval()
    val_loss = 0.0
    val_acc = 0.0

    with torch.no_grad():
        for idx, batch in enumerate(data_loader):
            img, target = batch[0].cuda(), batch[1].cuda()

            ### PLEASE WRITE YOUR CODE BELOW.

            # Make a prediction
            output = model(img)
            # Calculate validation loss (although it is optional)
            loss = criterion(output, target)
            # Get the right prediction - make sure naming the prediction as 'predicted' 
            _, predicted = torch.max(output.data, 1)
            ### END OF THE CODE.

            val_loss += loss.item()
            val_acc += (predicted == target).sum().item()

        total_val_acc = val_acc / len(data_loader.dataset)
        print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            val_loss / len(data_loader), val_acc, len(data_loader.dataset),
            100. * total_val_acc))
    
    return total_val_acc

In [9]:
def test(model, data_loader):
    model.eval()
    test_acc = 0.0

    with torch.no_grad():
        for idx, batch in enumerate(data_loader):
            img, target = batch[0].cuda(), batch[1].cuda()
            
            ### PLEASE WRITE YOUR CODE BELOW.

            # Make a prediction
            output = model(img)
            # Calculate validation loss (although it is optional)
            #loss = criterion(output, target)
            # Get the right prediction - make sure naming the prediction as 'predicted' 
            _, predicted = torch.max(output.data, 1)
            #test_loss += loss.item()
            test_acc += (predicted == target).sum().item()
            ### END OF THE CODE.

        print('\n Test set:  Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_acc, len(data_loader.dataset),
            100. * test_acc / len(data_loader.dataset)))

In [10]:
for epoch in range(epochs):

    ### PLEASE WRITE YOUR CODE BELOW.
    
    # Train your model with train dataloader
    train_loss = train(model, optimizer, criterion, train_loader, epoch)
    # Validate your model with validation dataloader
    validation_accuracy = validate(model, criterion, valid_loader)

    ### END OF THE CODE.


Validation set: Average loss: 3.6350, Accuracy: 57.0/500 (11%)


Validation set: Average loss: 3.1159, Accuracy: 107.0/500 (21%)


Validation set: Average loss: 3.2344, Accuracy: 108.0/500 (22%)


Validation set: Average loss: 3.1189, Accuracy: 115.0/500 (23%)


Validation set: Average loss: 2.5886, Accuracy: 173.0/500 (35%)


Validation set: Average loss: 3.2920, Accuracy: 144.0/500 (29%)


Validation set: Average loss: 2.7790, Accuracy: 177.0/500 (35%)


Validation set: Average loss: 2.1654, Accuracy: 218.0/500 (44%)


Validation set: Average loss: 2.3241, Accuracy: 198.0/500 (40%)


Validation set: Average loss: 2.1307, Accuracy: 232.0/500 (46%)


Validation set: Average loss: 2.0404, Accuracy: 238.0/500 (48%)


Validation set: Average loss: 2.1771, Accuracy: 224.0/500 (45%)


Validation set: Average loss: 2.1496, Accuracy: 234.0/500 (47%)


Validation set: Average loss: 2.0437, Accuracy: 247.0/500 (49%)


Validation set: Average loss: 2.1830, Accuracy: 227.0/500 (45%)


Validation

In [11]:
# Test your model with test dataloader
test(model, test_loader)


 Test set:  Accuracy: 283.0/500 (57%)



In [12]:
### PLEASE EXECUTE THE FOLLOWING CELL BEFORE SUBMITTING YOUR CODE

### DO NOT MODIFY THIS CELL
print(train_dataset[0][0].size())
print(model(torch.rand(1, 3, 224, 224, device='cuda')).size())
test(model, test_loader)
### DO NOT MODIFY THIS CELL

torch.Size([3, 224, 224])
torch.Size([1, 100])

 Test set:  Accuracy: 283.0/500 (57%)

