## HW3 Image Classification
#### Solve image classification with convolutional neural networks(CNN).
#### If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

In [1]:
# check GPU type.
!nvidia-smi

Sun Mar 26 13:57:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

### Import Packages

In [2]:
_exp_name = "sample"

In [3]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim.lr_scheduler as lr_scheduler
from PIL import Image
# from autoaugment import ImageNetPolicy
import torchvision.models as models
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
# from tqdm.auto import tqdm
from tqdm import tqdm
import random

In [4]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

### Transforms

In [5]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomAffine(30),
    transforms.RandomGrayscale(0.2),
    transforms.ColorJitter(brightness = 0.4, saturation = 0.4, contrast = 0.4),
#     ImageNetPolicy(),
    
#     # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

### Datasets

In [6]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
            
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
            
        return im,label

### Model

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PreActBlock(nn.Module):
    def __init__(self, in_planes, planes, stride=1):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)

        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
            )

        # SE layers
        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))

        # Squeeze
        w = F.avg_pool2d(out, out.size(2))
        w = F.relu(self.fc1(w))
        w = F.sigmoid(self.fc2(w))
        # Excitation
        out = out * w

        out += shortcut
        return out


class SENet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(SENet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def SENet18():
    return SENet(PreActBlock, [2,2,2,2])


net = SENet18()
y = net(torch.randn(1,3,32,32))



### Configurations

In [8]:
# [0.5,1,0.25,1,0.125,0.25,0.5,1,0.5,0.5,0.5]
import torch
import torch.nn as nn
import torch.nn.functional as F
# [0.5,0.125,0.75,0.125,0.9,0.75,0.5,0.125,0.5,0.5,0.5]
class FocalLoss(nn.Module):

    def __init__(self, gamma=1.2, alpha=[0.25, 0.25, 0.45, 0.25, 0.45, 0.45, 0.25, 0.25, 0.25, 0.25, 0.25], size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha, (float, int)): self.alpha = torch.Tensor([alpha, 1 - alpha])
        if isinstance(alpha, list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0), input.size(1), -1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1, 2)                         # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1, input.size(2))    # N,H*W,C => N*H*W,C
        target = target.view(-1, 1)

        logpt = F.log_softmax(input, dim=1)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = logpt.exp()

        if self.alpha is not None:
            if self.alpha.type() != input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0, target.data.view(-1))
            logpt = logpt * at

        loss = -1 * (1 - pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()



In [9]:
# "cuda" only when GPUs are available.
import torch
import torch.nn as nn
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
# model = SENet18()
model = models.resnet50(weights = None)
# model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=False)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet161', weights=None)
# model= nn.DataParallel(model)
model.to(device)
ensemble_num = 3
# models = [models.resnet18(weights = None).to(device) for i in range(ensemble_num)]
# optimizers = [torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4) for model in models]
# The number of batch size.
batch_size = 64

# The number of training epochs.
n_epochs = 300

# If no improvement in 'patience' epochs, early stop.
patience = 40

# For the classification task, we use cross-entropy as the measurement of performance.
# criterion = nn.CrossEntropyLoss(label_smoothing = 0.08)
criterion = FocalLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-3)
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = 40,T_mult = 2)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max = 10)
# schedulers = [lr_scheduler.CosineAnnealingLR(optimizer, T_max = 10) for model in models]

### Dataloader

In [10]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("/kaggle/input/ml2023spring-hw3/train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
valid_set = FoodDataset("/kaggle/input/ml2023spring-hw3/valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

### Start Training

In [11]:
# def trainer_k_folds(config, dataset_dir, batch_size, train_tfm, test_tfm, devices):
#     train_dir = os.path.join(dataset_dir,"training")
#     val_dir = os.path.join(dataset_dir,"validation")
#     train_files = [os.path.join(train_dir, x) for x in os.listdir(train_dir) if x.endswith('.jpg')]
#     val_files = [os.path.join(val_dir, x) for x in os.listdir(val_dir) if x.endswith('.jpg')]
#     total_files = np.array(train_files + val_files)
#     random.shuffle(total_files)
#     num_folds = config['num_folds']   
#     train_folds = np.array_split(np.arange(len(total_files)), num_folds)
#     train_folds = np.array(train_folds, dtype=object) # 防止因为数组维度不整齐而报错
        
#     for i in range(num_folds):
#         print(f'\n\nStarting Fold: {i} ********************************************')  
#         train_data = total_files[np.concatenate(np.delete(train_folds, i)) ] 
#         val_data = total_files[train_folds[i]]        
    
#         train_set = FoodDataset(tfm=train_tfm, files=train_data)
#         train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, drop_last = True)    
#         valid_set = FoodDataset(tfm=test_tfm, files=val_data)
#         valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, drop_last = True)
#         print('训练集总长度是 {:d}, batch数量是 {:.2f}'.format(len(train_set), len(train_set)/ batch_size))
#         print('验证集总长度是 {:d}, batch数量是 {:.2f}'.format(len(valid_set), len(valid_set)/ batch_size))
        
# #         tep = config['model_path']
# #         config['model_path'] += f"Fold_{i}_best"
# #         config['best_acc'] = 0.0
#         model = models.resnet18().to(device)
#         # model.load_state_dict(torch.load('models/foldmodel0.0001')) 提前训练几个epoch，可能加快后面每一个模型的训练
#         trainer(train_loader, valid_loader, model, config, devices)
#         config['best_accs'].append(config['best_acc'])
#         config['model_path'] = tep



In [None]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0


for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []
    index = 0
    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
#         scheduler.step(epoch+index/len(train_loader))
        optimizer.step()
        index+=1
#         scheduler.step()
        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

### Dataloader for test

In [13]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loaders = []
for i in range(5):
    test_set_i = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=train_tfm)
    test_loader_i = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    test_loaders.append(test_loader_i)

### Testing and generate prediction CSV

In [14]:
# model_best =SENet18()
# import torchvision.models as models
model_best = models.resnet50(weights = None)
# model_best = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=False)
# model_best = torch.hub.load('pytorch/vision:v0.10.0', 'densenet161', weights = None)

# model_best = nn.DataParallel(model_best)
model_best.to(device)

model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
# model_best.load_state_dict(torch.load("/kaggle/input"))

model_best.eval()
preds = [[],[],[],[],[],[]]
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device)).cpu().data.numpy()
        preds[0].extend(test_pred)
#         test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
#         prediction += test_label.squeeze().tolist()
    
    for i,loader in enumerate(test_loaders):
        for data,_ in (loader):
            test_pred = model_best(data.to(device)).cpu().data.numpy()
            preds[i+1].extend(test_pred)
            
    #         test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
    #         prediction += test_label.squeeze().tolist()

pred_np = np.array(preds, dtype = object)
tmp = 0.6*pred_np[0]
for i in range(1, 6):
    tmp+=0.1*pred_np[i]

prediction = np.argmax(tmp, axis = 1)

100%|██████████| 47/47 [00:22<00:00,  2.06it/s]


In [15]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)

In [16]:
# device = "cuda" if torch.cuda.is_available() else "cpu"
# # Make sure the model is in eval mode.
# # Some modules like Dropout or BatchNorm affect if the model is in training mode.



# # Initialize a list to store the predictions.
# predictions = []

# # Iterate the testing set by batches.
# for batch in tqdm(test_loader):
#     # A batch consists of image data and corresponding labels.
#     # But here the variable "labels" is useless since we do not have the ground-truth.
#     # If printing out the labels, you will find that it is always 0.
#     # This is because the wrapper (DatasetFolder) returns images and labels for each batch,
#     # so we have to create fake labels to make it work normally.
#     imgs, labels = batch

# #     # We don't need gradient in testing, and we don't even have labels to compute loss.
# #     # Using torch.no_grad() accelerates the forward process.
#     with torch.no_grad():
#         logits1 = model1(imgs.to(device))
#         logits2 = model2(imgs.to(device))
#         logits3 = model3(imgs.to(device))
#         logits = (logits1 + logits2 + logits3) / 3

# #     # Take the class with greatest logit as prediction and record it.
#     predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

In [17]:

# # batch_size = 256

 
# preds = [[], [], [], [], [], []] 

# model1 = models.resnet18().to(device)
# model1.load_state_dict(torch.load('/kaggle/input/model-d/sample_best772.ckpt'))
# model2 = models.resnet18().to(device)
# model2.load_state_dict(torch.load('/kaggle/input/model-b/sample_best-78.ckpt'))
# model3 = models.resnet18().to(device)
# model3.load_state_dict(torch.load('/kaggle/input/model-c/sample_best782.ckpt'))
# models_ = [model1, model2, model3]
# with torch.no_grad():
#     for data, _ in test_loader:
#         batch_preds = [] 
#         for model_best in models_:
#             batch_preds.append(model_best(data.to(device)).cpu().data.numpy())
#         batch_preds = sum(batch_preds)
#         preds[0].extend(batch_preds.squeeze().tolist())    
    
#     for i, loader in enumerate(test_loaders):
#         for data, _ in loader:
#             batch_preds = []
#             for model_best in models_:
#                 batch_preds.append(model_best(data.to(device)).cpu().data.numpy())
#             batch_preds = sum(batch_preds)
#             preds[i+1].extend(batch_preds.squeeze().tolist())
# pred_np = np.array(preds, dtype = object)
# tmp = 0.6*pred_np[0]
# for i in range(1, 6):
#     tmp+=0.1*pred_np[i]

# prediction = np.argmax(tmp, axis = 1)


In [18]:
# # Save predictions into the file.
# with open("Ensemble2.csv", "w") as f:

#     # The first row must be "Id, Category"
#     f.write("Id,Category\n")

#     # For the rest of the rows, each image id corresponds to a predicted class.
#     for i, pred in  enumerate(prediction):
#          f.write(f"{i},{pred}\n")