In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [2]:
### Import necessary libraries ###
import os
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, random_split, Subset
from torchvision.models import resnet18
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix

from LoadData import CustomDataset
np.random.seed(42)

In [3]:
### Hyperparameters ###
re_size = (196, 196) #(144, 260)
batch_size = 32
lr = 0.001
k = 3
num_classes = 5
epochs = 2

In [4]:
### Define Transforms ###
train_transform = transforms.Compose(
    [
        # Image pre-processing: resize & randomly Horizontal-Flip & normalize
        transforms.Resize(re_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

val_transform = transforms.Compose(
    [
        # Image pre-processing: resize & normalize
        transforms.Resize(re_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def custom_collate_train(batch):
    images, labels = zip(*batch)
    # Apply train_transform on images
    images = [train_transform(img) for img in images]

    return torch.stack(images), torch.tensor(labels)

def custom_collate_val(batch):
    images, labels = zip(*batch)
    # Apply val_transform on images
    images = [val_transform(img) for img in images]

    return torch.stack(images), torch.tensor(labels)

In [5]:
### Load datasets ###
# Load formatted dataset from the directory "AI_Project1/data"
dataset = CustomDataset(os.path.join(os.getcwd(), 'gdrive/MyDrive/AI_Project1/data'))

# Split the dataset into train/test subsets, with an 8:2 ratio
dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
# Load the train/test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, collate_fn=custom_collate_train)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=custom_collate_val)

In [None]:
"""
### Apply PCA(optional) ###
from sklearn.decomposition import PCA

n_components = 300

# Define PCA model
pca = PCA(n_components=n_components)

re_shape = 10
all_flat_imgs = []
# Collect training set
for batch_img, _ in train_loader:
  # Reshape the image size to match the specified input shape for PCA
  batch_flat_img = batch_img.reshape(batch_img.size(0), 3*re_size[0]*re_size[1])
  all_flat_imgs.append(batch_flat_img)
all_flat_imgs = torch.vstack(all_flat_imgs)

# Fit PCA model
pca.fit(all_flat_imgs)
"""

In [7]:
# Define functions for model training and testing process
def train_model(model, loader, criterion, optimizer, device):
  model.train()

  for (inputs, targets) in tqdm(loader, ncols=80):
    """
    # (PCA Part)
    # Reshape the inputs to match the specified input shape for PCA
    inputs_flat = inputs.reshape(inputs.size(0), 3*re_size[0]*re_size[1])
    # Transfor dataset with trained PCA model
    inputs_flat = pca.transform(inputs_flat)
    # Reshape the inputs back to the specified input shape for ResNet
    inputs = torch.tensor(inputs_flat.reshape(inputs.size(0), 3, re_shape, re_shape))
    """

    # Set the gradients to zero
    optimizer.zero_grad()
    inputs, targets = inputs.to(device), targets.to(device)

    # Forward propagation
    x = net(inputs.float())
    loss = criterion(x, targets)
    # Backward propagation
    loss.backward()

    optimizer.step()


def test_model(model, loader, criterion,  device):
  model.eval()

  test_loss = 0
  preds_history = torch.tensor([])
  labels_history = torch.tensor([])

  with torch.no_grad():
    for inputs, targets in tqdm(loader, ncols=80):
      """
      # (PCA Part)
      # (The process of applying PCA is the same as done in train_model())
      inputs_flat = inputs.reshape(inputs.size(0), 3*re_size[0]*re_size[1])
      inputs_flat = pca.transform(inputs_flat)
      inputs = torch.tensor(inputs_flat.reshape(inputs.size(0), 3, re_shape, re_shape))
      """

      inputs, targets = inputs.to(device), targets.to(device)

      # Make prediciton
      x = net(inputs.float())
      _, predicted = torch.max(x.data, 1)
      # Compute loss
      loss = criterion(x, targets)
      test_loss += loss.item()

      preds_history = torch.cat((preds_history, predicted.cpu()), dim=0)
      labels_history = torch.cat((labels_history, targets.cpu()), dim=0)


  # Compute the accuracy and confusion matrix
  acc = accuracy_score(labels_history.cpu().numpy(), preds_history.cpu().numpy())
  cnf = confusion_matrix(labels_history.cpu().numpy(), preds_history.cpu().numpy())

  return test_loss/len(loader), acc, cnf

In [None]:
### Train ResNet ###
# Define K-fold cross-validation model
kf = KFold(n_splits=k,shuffle=True)

# K-fold training iterations
per_fold_result={}
best_acc = 0.0
best_model_path = os.path.join(os.getcwd(), 'gdrive/MyDrive/AI_Project1/result/CNN/resnet18.pth')
for i, (train_idx,val_idx) in enumerate(kf.split(np.arange(len(train_dataset)))):
  print("Fold no.{}:".format(i + 1))

  # Training and testing subset of this fold
  train_subset = Subset(train_dataset, train_idx)
  val_subset = Subset(train_dataset, val_idx)
  subtrain_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2, collate_fn=custom_collate_train)
  subval_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=custom_collate_val)


  ### Model Setting ###
  # Call the pretrained model and adjust its fully-connected layer to suit my case
  net = resnet18(pretrained=True)
  net.fc = torch.nn.Linear(net.fc.in_features, num_classes)

  # Set device
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  net.to(device)

  # Enable gradient computation for all parameters in the model
  for param in net.parameters():
    param.requires_grad = True

  # Define loss function and optimizer
  criterion = CrossEntropyLoss()
  optimizer = Adam(net.parameters(), lr=lr)


  # n-epochs training iterations
  history = {'val_loss': [], 'val_acc':[]}
  for epoch in range(epochs):
    # Fit the model on the current training subset
    train_model(net, subtrain_loader, criterion, optimizer, device)
    # Make prediction on the current testing subset
    val_loss, val_acc, _ = test_model(net, subval_loader, criterion, device)
    print("Testing Epoch {} | loss: {:.4f} | Accuracy:{:.4f}".format(epoch+1, val_loss/len(subval_loader), val_acc))

    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

  per_fold_result['fold{}'.format(i+1)] = history


  # Save the current model if it is the one with the best performance so far
  if history['val_acc'][epochs-1]>best_acc:
    best_acc = history['val_acc'][epochs-1]
    torch.save(net.state_dict(), best_model_path)

In [None]:
# Load the best trained model
best_net = resnet18()
# (The model setting process following is the same as before)
best_net.fc = torch.nn.Linear(best_net.fc.in_features, num_classes)
best_net.load_state_dict(torch.load(best_model_path))

best_net.to(device)

for param in best_net.parameters():
  param.requires_grad = True

criterion = CrossEntropyLoss()
optimizer = Adam(best_net.parameters(), lr=lr)

# Retrain the model with the whole training set
train_model(best_net, train_loader, criterion, optimizer, device)

In [None]:
### Test ###
# Make prediction with trained model
final_loss, final_acc, final_cnf = test_model(best_net, test_loader, criterion, device)

# Show the prediciton result
print("Final loss: {:.4f} | Accuracy: {:.4f}".format(final_loss/len(test_loader), final_acc))
print("\nConfusion_matrix:\n{}".format(final_cnf))

In [None]:
### Show results ###
import matplotlib.pyplot as plt

int2str = {0: 'Crayon_Shin', 1: 'Doraemon', 2: 'Hua_Family', 3: 'Ilu', 4: 'Maruko'}

# Show the image and its prediction
for i in range(10):
  image, label = test_dataset[i]
  image_trans = val_transform(image)
  image_trans = image_trans.to(device)

  x = best_net(image_trans.unsqueeze(0))
  _, predicted = torch.max(x.data, 1)

  plt.imshow(image)
  plt.text(0, -0.1, "Pred : {}\nLabel: {}".format(int2str[predicted.cpu().item()], int2str[label]), transform=plt.gca().transAxes)
  plt.axis('off')

  plt.savefig(os.path.join(os.getcwd(), f'gdrive/MyDrive/AI_Project1/result/CNN/prediction_{i+1}.jpg'))
  plt.show()

  plt.clf()