# 1 Import packages

In [None]:
import numpy as np
import pandas as pd
import os
import random

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import ConcatDataset, Dataset, DataLoader, Subset

from PIL import Image
from torchvision.transforms import transforms
from torchvision.datasets import DatasetFolder, VisionDataset


# This is for the progress bar.
from tqdm import tqdm

_exp_name = "sol"

In [None]:
def same_seed(seed):
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        
my_seed = 6666
same_seed(my_seed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [None]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

transform1 = transforms.RandomHorizontalFlip()
transform2 = transforms.RandomRotation(30)
transform3 = transforms.ColorJitter(brightness=0.5)
transform4 = transforms.RandomAffine(degrees=20, translate=(0.2, 0.2), scale=(0.7, 1.3))

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_transform = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    
    # You may add some transforms here.
    # transforms.RandomResizedCrop(size=(128, 128), antialias=True),
    # transforms.RandomHorizontalFlip(p=0.5), #50%的概率水平翻转
    # transforms.RandomVerticalFlip(p=0.5), #50%的概率垂直翻转
    # transforms.RandomRotation(degrees=(0, 180)),
    # transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
    # transforms.RandomCrop(128, padding=10),
    # transforms.RandomGrayscale(p=0.1),  #根据概率转灰度
    
    transforms.RandomChoice([transform1, transform2, transform3, transform4]), # 对每个样本随意挑选一种转换方式

    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    
])

## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [None]:
from os import name


class FoodDataset(Dataset):
    def __init__(self, folder, transform = None, files = None):
        super(FoodDataset, self).__init__()
        self.path = folder
        self.transform = transform
        
        if files is None:
            self.files = sorted([os.path.join(folder, file) for file in os.listdir(folder) if file.endswith('.jpg')])
        else:
            self.files = files
            
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        fname = self.files[idx]
        image = Image.open(fname)
        if self.transform:
            image = self.transform(image)
        
        name_split = fname.split('/')[-1].split('_')
        if len(name_split) > 1:
            label = int(name_split[0])
        else:
            label = -1 # Testing data
        return image, label

# Model

You are free to modify the model architecture here for further improvement. However, if you want to use some well-known architectures such as ResNet50, please make sure NOT to load the pre-trained weights. Using such pre-trained models is considered cheating and therefore you will be punished. Similarly, it is your responsibility to make sure no pre-trained weights are used if you use torch.hub to load any modules.

For example, if you use ResNet-18 as your model:

model = torchvision.models.resnet18(pretrained=False) → This is fine.

model = torchvision.models.resnet18(pretrained=True) → This is NOT allowed.

In [None]:
class FoodClassifier(nn.Module):
    def __init__(self):
        super(FoodClassifier, self).__init__()
        # Define your neural network here
        
        # input image size: [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [64, 64, 64]
            
            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [128, 32, 32]
            
            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [256, 16, 16]
            
            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, 11)
        )
        
    def forward(self, x):
        out = self.cnn(x)
        out = out.flatten(1)
        return self.fc(out)
    
    
def params_count(model):
    """
    Compute the number of parameters.
    Args:
        model (model): model to count the number of parameters.
    """
    # return np.sum([p.numel() for p in model.parameters()]).item()
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Configuration

In [None]:
batch_size = 64
_dataset_dir = "../input/ml2022spring-hw3b/food11"

# 也可以用DatasetFolder來讀取資料
# filename 是图片的category, 不适用于2022年的作业, 所以还是用dataset自己的方法
# train_dataset = DatasetFolder(_dataset_dir + '/training', loader=lambda x: Image.open(x), extensions='jpg', transform=train_transform)
# valid_dataset = DatasetFolder(_dataset_dir + '/validation', loader=lambda x: Image.open(x), extensions='jpg', transform=test_transform)
# test_dataset = DatasetFolder(_dataset_dir + '/testing', loader=lambda x: Image.open(x), extensions='jpg', transform=test_transform)
# unlabeled_dataset = DatasetFolder(_dataset_dir + '/unlabeled', loader=lambda x: Image.open(x), extensions='jpg', transform=train_transform)

train_dataset = FoodDataset(_dataset_dir + '/training', transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

valid_dataset = FoodDataset(_dataset_dir + '/validation', transform=test_transform)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# 如果使用KFold Cross Validation

# 首先吧所有的数据集合并
# all_dataset = ConcatDataset([train_dataset, valid_dataset])

# from sklearn.model_selection import KFold
# kf = KFold(n_splits=5, shuffle=True, random_state=my_seed)
# k = 4
# kf = KFold(n_splits=k, shuffle=True, random_state=42)
# for fold, (train_idx, valid_idx) in enumerate(kf.split(all_dataset)):
#     train_set = Subset(all_dataset, train_idx)
#     valid_set = Subset(all_dataset, valid_idx)

#     # Create data loaders for training and validation sets
#     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
#     valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False, pin_memory=True)


In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 80
patience = 300 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = FoodClassifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0


# Train the model

You can finish supervised learning by simply running the provided code without any modification.

The function "get_pseudo_labels" is used for semi-supervised learning.
It is expected to get better performance if you use unlabeled data for semi-supervised learning.
However, you have to implement the function on your own and need to adjust several hyperparameters manually.

For more details about semi-supervised learning, please refer to [Prof. Lee's slides](https://speech.ee.ntu.edu.tw/~tlkagk/courses/ML_2016/Lecture/semi%20(v3).pdf).

Again, please notice that utilizing external data (or pre-trained model) for training is **prohibited**.

In [None]:
class PseudoDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, id):
        return self.x[id][0], self.y[id] # self.x[id][0] is the image, self.y[id] is the label, self.x[id][1] is the original label

def get_pseudo_labels(dataset, model, threshold=0.65):
    # This functions generates pseudo-labels of a dataset using given model.
    # It returns an instance of DatasetFolder containing images whose prediction confidences exceed a given threshold.
    # You are NOT allowed to use any models trained on external data for pseudo-labeling.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Construct a data loader.
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # Make sure the model is in eval mode.
    model.eval()
    # Define softmax function.
    softmax = nn.Softmax(dim=-1)
    
    idx = []
    labels = []

    # Iterate over the dataset by batches.
    for i, batch in enumerate(data_loader):
        img, _ = batch

        # Forward the data
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(img.to(device))

        # Obtain the probability distributions by applying softmax on logits.
        probs = softmax(logits)

        # ---------- TODO ----------
        # Filter the data and construct a new dataset.
        for j, x in enumerate(probs):
            if torch.max(x) > threshold:
                idx.append(i * batch_size + j)
                labels.append(int(torch.argmax(x)))

    # # Turn off the eval mode.
    model.train()
    
    print ("\nNew data: {:5d}\n".format(len(idx)))
    dataset = PseudoDataset(Subset(dataset, idx), labels)
    
    return dataset

# Whether to do semi-supervised learning.
do_semi = False

In [None]:
for epoch in range(n_epochs):
    # ---------- Training ----------
    # In each epoch, relabel the unlabeled dataset for semi-supervised learning.
    # Then you can combine the labeled dataset and pseudo-labeled dataset for the training.
    if do_semi:
        # Obtain pseudo-labels for unlabeled data using trained model.
        pseudo_dataset = get_pseudo_labels(unlabeled_set, model)

        # Construct a new dataset and a data loader for training.
        # This is used in semi-supervised learning only.
        concat_dataset = ConcatDataset([train_dataset, pseudo_dataset])
        train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
        
        
    # Make sure the model is in train mode before training.
    model.train()
    
    # These are used to record information in training.
    train_loss = []
    train_accs = []
    
    # Iterate the training set by batches.
    for batch in tqdm(train_loader):
        imgs, labels = batch
        imgs, labels = imgs.to(device), labels.to(device)
        
        # Forward the data
        logits = model(imgs)
        # Calculate the cross-entropy loss
        loss = criterion(logits, labels)
        
        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()
        # Compute the gradients for parameters
        loss.backward()
        
        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
        
        # Update the parameters with computed gradients
        optimizer.step()
        
        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels).float().mean()
        
        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)
    
    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")


    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()
    
    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []
    
    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):
        imgs, labels = batch
        imgs, labels = imgs.to(device), labels.to(device)
        
        # We don't need gradient in validation.
        with torch.no_grad():
            logits = model(imgs)
        
        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels)
        
        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels).float().mean()
        
        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        
    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)
    
    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
    
    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
    
    # ---------- Early Stopping ----------
    if valid_acc > best_acc:
        best_acc = valid_acc
        stale = 0
        torch.save(model.state_dict(), f"{_exp_name}.pth")
    else:
        stale += 1
        if stale >= patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

## Testing and generate prediction CSV

In [None]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best = FoodClassifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}.pth"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()
        
        
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)