# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Check GPU

In [None]:
import torch
GPU_name = torch.cuda.get_device_name()
print("Your GPU is {}!".format(GPU_name))

# Mount at Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [None]:
#! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip
! wget -O food11.zip "https://github.com/virginiakm1988/ML2022-Spring/blob/main/HW03/food11.zip?raw=true"

In [None]:
! unzip food11.zip

# Training

In [None]:
_exp_name = "sample"

In [None]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm import tqdm
import random

In [None]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [None]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    transforms.RandomResizedCrop(224), 
    transforms.RandomHorizontalFlip(), 
    transforms.AutoAugment(),
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [None]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        # print(f"One {path} sample", self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [None]:
class Classifier1(nn.Module):
    def __init__(self):
        super(Classifier1, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.ResNet = models.resnet34(pretrained=False)
        self.num_feat = self.ResNet.fc.in_features
        self.ResNet.fc = nn.Linear(self.num_feat, 11)

    def forward(self, x):
        return self.ResNet(x)

class Classifier2(nn.Module):
    def __init__(self):
        super(Classifier2, self).__init__()
        self.ResNet = models.resnet18(pretrained=False)
        self.num_feat = self.ResNet.fc.in_features
        self.ResNet.fc = nn.Linear(self.num_feat, 11)

    def forward(self, x):
        return self.ResNet(x)

n_epochs_34 = 300
n_epochs_18 = 300

In [None]:
batch_size = 64
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

# Mixedup

In [None]:
import torch
import numpy as np
from torch.autograd import Variable
def MixUp(x, y, alpha=1.0, device="cuda"):
    if alpha > 0:
        w = np.random.beta(alpha, alpha)
    else:
        w = 1
    
    batch_size = x.size()[0]
    idx = torch.randperm(batch_size).to(device)
    
    x = w * x + (1 - w) * x[idx, :]
    y_a, y_b = y, y[idx]
    x, y_a, y_b = map(Variable, (x, y_a, y_b))
    return x, y_a, y_b, w
def MixUp_criterion(criterion, w, outputs, y_a, y_b):
    return w * criterion(outputs, y_a) + (1 - w) * criterion(outputs, y_b)

# Training

In [None]:
ens_prob = None

In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
# n_epochs = 80
patience = 300 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier1().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()
sf = torch.nn.Softmax(dim=1).to(device)
# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
# Initialize trackers, these are not parameters and should not be changed
use_MixUp = True

def training(model, n_epochs, Classifier):
    stale = 0
    best_acc = 0
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-4) 
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)
    for epoch in range(n_epochs):

        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()

        # These are used to record information in training.
        train_loss = []
        train_accs = []

        for batch in tqdm(train_loader):

            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            if use_MixUp:
                x, y_a, y_b, w = MixUp(imgs, labels, alpha=1, device=device)
                outputs = model(x.to(device))
                loss = MixUp_criterion(criterion, w, outputs, y_a.to(device), y_b.to(device))
                _, pred = torch.max(outputs.data, 1)
                acc = (w * (pred.argmax(dim=-1) == y_a.to(device)).float().mean() 
                        + (1-w) * (pred.argmax(dim=-1) == y_b.to(device)).float().mean())
            else:
                logits = model(imgs.to(device))
                loss = criterion(logits, labels.to(device))
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()

            # Compute the gradients for parameters.
            loss.backward()

            # Clip the gradient norms for stable training.
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

            # Update the parameters with computed gradients.
            optimizer.step()

            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_accs.append(acc)

        scheduler.step()
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)

        # Print the information.
        print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()

        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []

        # Iterate the validation set by batches.
        for batch in tqdm(valid_loader):

            # A batch consists of image data and corresponding labels.
            imgs, labels = batch

            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))

            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_accs.append(acc)
            #break

        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)


        # update logs
        if valid_acc > best_acc:
            with open(f"./{_exp_name}_log.txt","a"):
                print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
        else:
            with open(f"./{_exp_name}_log.txt","a"):
                print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


        # save models
        if valid_acc > best_acc:
            print(f"Best model found at epoch {epoch+1}, saving model")
            torch.save(model.state_dict(), f"./gdrive/MyDrive/ML2022/ML2022_hw3/{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
            best_acc = valid_acc
            stale = 0
        else:
            stale += 1
            if stale > patience:
                print(f"No improvment {patience} consecutive epochs, early stopping")
                break
    tta_prob = None
    model_best = Classifier().to(device)
    model_best.load_state_dict(torch.load(f"./gdrive/MyDrive/ML2022/ML2022_hw3/{_exp_name}_best.ckpt"))
    model_best.eval()
    for j in range(19): 
        aug_test_set = FoodDataset(os.path.join(_dataset_dir, "test"), tfm=train_tfm)
        aug_test_loader = DataLoader(aug_test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
        test_prob = None
        with torch.no_grad():
            for data, _ in tqdm(aug_test_loader):
                batch_test_prob = model_best(data.to(device))
                if test_prob == None:
                    test_prob = batch_test_prob
                else:
                    test_prob = torch.cat([test_prob, batch_test_prob], dim=0)
            if tta_prob == None:
                tta_prob = sf(test_prob) / 20
            else:
                tta_prob += sf(test_prob) / 20
        del aug_test_set, aug_test_loader
    aug_test_set = FoodDataset(os.path.join(_dataset_dir, "test"), tfm=test_tfm)
    aug_test_loader = DataLoader(aug_test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    test_prob = None
    with torch.no_grad():
        for data,_ in tqdm(aug_test_loader):
            batch_test_prob = model_best(data.to(device))
            if test_prob == None:
                test_prob = batch_test_prob
            else:
                test_prob = torch.cat([test_prob, batch_test_prob], dim=0)
    tta_prob += sf(test_prob) / 20
    return tta_prob

In [None]:
model1 = Classifier1().to(device)
ens_prob = training(model1, n_epochs_34, Classifier1) / 2
model2 = Classifier2().to(device)
ens_prob += training(model2, n_epochs_18, Classifier2) / 2

In [None]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)

# Testing and generate prediction CSV

In [None]:
prediction = np.argmax(ens_prob.cpu().data.numpy(), axis=1)
for p in prediction:
  print(p)

In [None]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("./gdrive/MyDrive/ML2022/ML2022_hw3/prediction.csv",index = False)

In [None]:
# For Report
from torch import nn
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        )

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.cnn_layer1(x)
        x = self.relu(x)
        x1 = self.cnn_layer2(x)
        x1 += x
        x1 = self.relu(x1)
        x1 = self.cnn_layer3(x1)
        x1 = self.relu(x1)
        x2 = self.cnn_layer4(x1)
        x2 += x1
        x2 = self.relu(x2)
        x2 = self.cnn_layer5(x2)
        x2 = self.relu(x2)
        x3 = self.cnn_layer6(x2)
        x3 += x2
        x3 = self.relu(x3)
        x3 = self.fc_layer(x3)
        return x3