# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [1]:
# ! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
! unzip /content/drive/MyDrive/ML/HW3/food11.zip

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
  inflating: food11/training/8_805.jpg  
  inflating: food11/training/8_806.jpg  
  inflating: food11/training/8_807.jpg  
  inflating: food11/training/8_808.jpg  
  inflating: food11/training/8_809.jpg  
  inflating: food11/training/8_81.jpg  
  inflating: food11/training/8_810.jpg  
  inflating: food11/training/8_811.jpg  
  inflating: food11/training/8_812.jpg  
  inflating: food11/training/8_813.jpg  
  inflating: food11/training/8_814.jpg  
  inflating: food11/training/8_815.jpg  
  inflating: food11/training/8_816.jpg  
  inflating: food11/training/8_817.jpg  
  inflating: food11/training/8_818.jpg  
  inflating: food11/training/8_819.jpg  
  inflating: food11/training/8_82.jpg  
  inflating: food11/training/8_820.jpg  
  inflating: food11/training/8_821.jpg  
  inflating: food11/training/8_822.jpg  
  inflating: food11/training/8_823.jpg  
  inflating: food11/training/8_824.jpg  
  inflating: food11/training/8_825.jpg  
  inflating: food11/trai

# Training

In [4]:
_exp_name = "mixup"

In [5]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset, random_split,SubsetRandomSampler
from torchvision.datasets import DatasetFolder, VisionDataset
from torchvision import models
from sklearn.model_selection import KFold
# This is for the progress bar.
from tqdm.auto import tqdm
import random
from torch.autograd import Variable

In [6]:
myseed = 1126  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [7]:
from torchvision.transforms.transforms import RandomGrayscale
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomGrayscale(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [8]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None, mode = 'train'):
        super(FoodDataset).__init__()
        self.path = path
        self.mode = mode
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        if self.mode == 'train':
            if idx % 5 == 0:
                label = int(fname.split("/")[-1].split("_")[0])
                mix_idx = random.randint(0, len(self.files)-1)
                mix_name = self.files[mix_idx]
                mix_img = Image.open(mix_name)
                mix_img = self.transform(mix_img)
                alpha = 1.0
                lam = np.random.beta(alpha, alpha)
                im = lam*im + (1-lam)*mix_img
                mix_label = int(mix_name.split("/")[-1].split("_")[0])
                return im, label, mix_label, lam
            else:
                label = int(fname.split("/")[-1].split("_")[0])
                mix_label = 0
                lam = 1
                return im, label, mix_label, lam
        else:
            label = -1 # test has no label
        return im,label



In [9]:
class Residual_Block(nn.Module):
    def __init__(self, in_channel, out_channel, stride=1, down_sample=None):
        super(Residual_Block, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, 
                    out_channels=out_channel, 
                    kernel_size=3, 
                    stride=stride, 
                    padding=1,
                    bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels=out_channel, 
                    out_channels=out_channel, 
                    kernel_size=3, 
                    stride=1, 
                    padding=1,
                    bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.down_sample = down_sample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.down_sample:
            residual = self.down_sample(x)
        out += residual
        out = self.relu(out)

        return out
    
class Classifier(nn.Module):
    def __init__(self, block, layers, num_classes=11):
        super(Classifier, self).__init__()
        self.conv = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)
        self.in_channels = 16
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[0], 2)
        self.layer3 = self.make_layer(block, 64, layers[1], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(p=0.5)
        
    def make_layer(self, block, out_channels, blocks, stride=1):  
        down_sample = None
        if (stride != 1) or (self.in_channels != out_channels):
            down_sample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, down_sample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = self.dropout(out) # dropout
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

In [10]:
batch_size = 48
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm, mode='train')
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11/training sample ./food11/training/0_0.jpg


In [11]:
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=train_tfm,mode='train')
full_set = ConcatDataset([train_set,valid_set])
full_loader = DataLoader(full_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11/validation sample ./food11/validation/0_0.jpg


In [13]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

In [14]:
model = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)
model.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_best.ckpt"))

<All keys matched successfully>

In [15]:
# For the classification task, we use cross-entropy as the measurement of performance.
# Reference: https://github.com/facebookresearch/mixup-cifar10/blob/main/train.py
criterion = nn.CrossEntropyLoss(label_smoothing=0.2)
def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [None]:
# The number of training epochs and patience.
n_epochs = 150
patience = 30 # If no improvement in 'patience' epochs, early stop


# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=5, verbose=True,min_lr=0.00001)
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []
    for batch in full_loader:

        # A batch consists of image data and corresponding labels.
        imgs, label_a, label_b, lam = batch
        imgs, label_a, label_b, lam = imgs.to(device), label_a.to(device), label_b.to(device), lam.to(device)
        # Forward the data. (Make sure data and model are on the same device.)
        imgs, label_a, label_b, = map(Variable, (imgs,label_a,label_b))
        logits = model(imgs.to(device))
        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = mixup_criterion(criterion, logits, label_a, label_b, lam)
        loss = loss.mean()
        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        pred = logits.argmax(dim=-1)
        acc = ((lam * (pred == label_a)) + ((1 - lam) * (pred == label_b))).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)
    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    # update logs
    scheduler.step(train_acc)
    if train_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")


    # save models
    if train_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"/content/drive/MyDrive/ML/HW3/{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = train_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

KeyboardInterrupt: ignored

In [16]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm, mode='test')
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One ./food11/test sample ./food11/test/0001.jpg


# Testing and generate prediction CSV

In [None]:
model_best = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)
model_best.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_best.ckpt"))

In [None]:
def training(model,optimizer,criterion,dataloader):
    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

        # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in dataloader:

        # A batch consists of image data and corresponding labels.
        imgs, label_a, label_b, lam = batch
        imgs, label_a, label_b, lam = imgs.to(device), label_a.to(device), label_b.to(device), lam.to(device)
        imgs, label_a, label_b, = map(Variable, (imgs,label_a,label_b))
        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = mixup_criterion(criterion, logits, label_a, label_b, lam)
        loss = loss.mean()

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        pred = logits.argmax(dim=-1)
        acc = ((lam * (pred == label_a)) + ((1 - lam) * (pred == label_b))).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
            
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    return train_loss, train_acc

In [None]:
def validation(model,optimizer,criterion,dataloader):
    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in dataloader:

        # A batch consists of image data and corresponding labels.
        imgs, label_a, label_b, lam = batch
        #imgs = imgs.half()
        imgs, label_a, label_b, lam = imgs.to(device), label_a.to(device), label_b.to(device), lam.to(device)
        imgs, label_a, label_b, = map(Variable, (imgs,label_a,label_b))
        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = mixup_criterion(criterion, logits, label_a, label_b, lam)
        loss = loss.mean()

        # Compute the accuracy for current batch.
        pred = logits.argmax(dim=-1)
        acc = ((lam * (pred == label_a)) + ((1 - lam) * (pred == label_b))).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
    return valid_loss, valid_acc

In [None]:
# The number of training epochs and patience.
n_epochs = 10
patience = 5 # If no improvement in 'patience' epochs, early stop
k = 5
splits=KFold(n_splits=k,shuffle=True,random_state=1126)
foldperf={}
history = {'train_loss': [], 'val_loss': [],'train_acc':[],'val_acc':[]}
# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss(label_smoothing=0.15)
fold_acc = dict.fromkeys(['1','2','3','4','5'])
# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.00005, weight_decay=0.000001) 

# Initialize trackers, these are not parameters and should not be changed
for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(full_set)))):
    model_best = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)
    model_best.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_best.ckpt"))
    best_acc = 0
    stale = 0
    print(f'Fold {fold + 1}')
    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(full_set, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(full_set, batch_size=batch_size, sampler=val_sampler)
    for epoch in range(n_epochs):
        train_loss, train_acc = training(model_best,optimizer,criterion,train_loader)

        val_loss, val_acc = validation(model_best,optimizer,criterion,val_loader)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc.cpu())
        history['val_acc'].append(val_acc.cpu())
        
        # update logs
        if val_acc > best_acc:
            with open(f"./{_exp_name}_log.txt","a"):
                print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {val_loss:.5f}, acc = {val_acc:.5f} -> best")
        else:
            with open(f"./{_exp_name}_log.txt","a"):
                print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {val_loss:.5f}, acc = {val_acc:.5f}")


        # save models
        if val_acc > best_acc:
            print(f"Best model found at epoch {epoch}, saving model")
            torch.save(model_best.state_dict(), f"/content/drive/MyDrive/ML/HW3/{_exp_name}_Fold{fold+1}_best.ckpt") # only save best to prevent output memory exceed error
            best_acc = val_acc
            fold_acc[f'{fold+1}'] = best_acc
            stale = 0
        else:
            stale += 1
            if stale > patience:
                print(f"No improvment {patience} consecutive epochs, early stopping")
                break
    foldperf[f'fold{fold+1}'] = history

In [None]:
testl_f,tl_f,testa_f,ta_f=[],[],[],[]
k=5
for f in range(1,k+1):

     tl_f.append(np.mean(foldperf['fold{}'.format(f)]['train_loss']))
     testl_f.append(np.mean(foldperf['fold{}'.format(f)]['val_loss']))

     ta_f.append(np.mean(foldperf['fold{}'.format(f)]['train_acc']))
     testa_f.append(np.mean(foldperf['fold{}'.format(f)]['val_acc']))

print('Performance of {} fold cross validation'.format(k))
print("Average Training Loss: {:.3f} \t Average Test Loss: {:.3f} \t Average Training Acc: {:.2f} \t Average Test Acc: {:.2f}".format(np.mean(tl_f),np.mean(testl_f),np.mean(ta_f),np.mean(testa_f)))

In [17]:
_exp_name='mixup'
model1 = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)

model2 = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)

model3 = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)

model4 = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)

model5 = Classifier(Residual_Block, [2, 2, 2, 2]).to(device)

model1.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_Fold1_best.ckpt"))
model2.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_Fold2_best.ckpt"))
model3.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_Fold3_best.ckpt"))
model4.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_Fold4_best.ckpt"))
model5.load_state_dict(torch.load(f"/content/drive/MyDrive/ML/HW3/{_exp_name}_Fold5_best.ckpt"))

<All keys matched successfully>

In [20]:
prediction = []
model.eval()
model1.eval()
model2.eval()
model3.eval()
model4.eval()
model5.eval()
with torch.no_grad():
    for data,_ in test_loader:
        data_original = data.to(device)
        data_flip = transforms.RandomHorizontalFlip(1)(data).to(device)
        data_rotation = transforms.RandomRotation(20)(data).to(device)
        data_gray = transforms.RandomGrayscale(1)(data).to(device)
        data_jitter = transforms.ColorJitter(brightness=0.1, contrast=0.1)(data).to(device)

        test_pred11 = model1(data_original)
        test_pred12 = model1(data_flip)
        test_pred13 = model1(data_rotation)
        test_pred14 = model1(data_gray)
        test_pred15 = model1(data_jitter)
        #test_pred16 = model1(data_per)
        test_pred1 = 0.2*test_pred11 + 0.5*test_pred12 + 0.1*test_pred13 + 0.1*test_pred14 + 0.1*test_pred15

        test_pred21 = model2(data_original)
        test_pred22 = model2(data_flip)
        test_pred23 = model2(data_rotation)
        test_pred24 = model2(data_gray)
        test_pred25 = model2(data_jitter)
        #test_pred26 = model2(data_per)
        test_pred2 = 0.2*test_pred21 + 0.5*test_pred22 + 0.1*test_pred23 + 0.1*test_pred24 + 0.1*test_pred25

        test_pred31 = model3(data_original)
        test_pred32 = model3(data_flip)
        test_pred33 = model3(data_rotation)
        test_pred34 = model3(data_gray)
        test_pred35 = model3(data_jitter)
        #test_pred36 = model3(data_per)
        test_pred3 = 0.2*test_pred31 + 0.5*test_pred32 + 0.1*test_pred33 + 0.1*test_pred34 + 0.1*test_pred35

        test_pred41 = model4(data_original)
        test_pred42 = model4(data_flip)
        test_pred43 = model4(data_rotation)
        test_pred44 = model4(data_gray)
        test_pred45 = model4(data_jitter)
        #test_pred46 = model4(data_per)
        test_pred4 = 0.2*test_pred41 + 0.5*test_pred42 + 0.1*test_pred43 + 0.1*test_pred44 + 0.1*test_pred45


        test_pred51 = model5(data_original)
        test_pred52 = model5(data_flip)
        test_pred53 = model5(data_rotation)
        test_pred54 = model5(data_gray)
        test_pred55 = model5(data_jitter)
        #test_pred56 = model5(data_per)
        test_pred5 = 0.2*test_pred51 + 0.5*test_pred52 + 0.1*test_pred53 + 0.1*test_pred54 + 0.1*test_pred55

        test_pred = (test_pred1+test_pred2+test_pred3+test_pred4+test_pred5)/5
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [21]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("/content/drive/MyDrive/ML/HW3/submission.csv",index = False)

In [None]:
# best public score 0.88745
# Residual model with dropout(0.5)
# RandomHorizontalFlip(0.5) RandomRotation(20) RandomGrayScale() and Normalization and mixup
# train on both train and validation data for 150 epochs
# CrossEntropy with label_smoothing=0.2
# Adam with lr = 0.001, weight_decay = 0.00001
# 5 fold cross validation and ensemble, cross_validation with lr=0.0001 and weight_decay=0.00001
# test time augmentation [horizontalflip(1) rotation(20) grayscale(1) colorjitter(brightness=0.1, contrast=0.1)], ratio: original:0.2, flip:0.5, rotation:0.1, grayscale:0.1, colorjitter:0.1

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [None]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomGrayscale(),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.RandomInvert(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
])

# Q2. Residual Implementation
![](https://i.imgur.com/GYsq1Ap.png)
## Directly copy the following block and paste it on GradeScope after you finish the code


In [None]:
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        ) # (64, 128, 128)

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        ) # (64, 128, 128)

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        ) # (128, 64, 64)

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        ) # (128, 64, 64)
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        ) # (256, 32, 32)
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        ) # (256, 32, 32)
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        ) 
        self.relu = nn.ReLU()
        '''self.down_sample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )'''
    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x1 = self.cnn_layer1(x)
        x1 = self.relu(x1)
        
        x2 = self.cnn_layer2(x1)
        x2 = x2 + x1
        x2 = self.relu(x2)
        
        x3 = self.cnn_layer3(x2)
        x3 = self.relu(x3)
        
        x4 = self.cnn_layer4(x3)
        x4 = x4 + x3
        x4 = self.relu(x4)
        
        x5 = self.cnn_layer5(x4)
        x5 = self.relu(x5)
        
        x6 = self.cnn_layer6(x5)
        x6 = x6 + x5
        x6 = self.relu(x6)
        
        # The extracted feature map must be flatten before going to fully-connected layers.
        xout = x6.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        xout = self.fc_layer(xout)
        return xout