## 1. Define the Hyperparameters

In [1]:
## Hyperparameters
NUM_EPOCHS =  20
BATCH_SIZE= 64
LR = 0.01

## 2. Importing Packages

In [2]:
import os
import time
import cv2
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import trange
import torch
import torch.nn as nn
import torchvision
from torchvision import models, transforms
from torch.autograd import Variable
from torchvision.io import read_image
import torch.utils.data as Data
from torch.utils.data import Dataset, WeightedRandomSampler
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score,f1_score


# Check if cuda is available
use_cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using GPU (cuda)' if use_cuda else 'Using CPU')

Using GPU (cuda)


## 3. Data Loading

In [3]:
def multilabel_onehot(y, num_class=20):
    """
    Define onehot converter, which can turn the labels from integer to a onehot vector
    Args:
        y (list): A list with labels, can be single element or multiple elements
        num_class: The total number of possible classes. 
                   In this project it is identified to be 20 classes.
    return:
        binarized label (array): a numpy array to represent labels with 0 and 1 values.
    """
    onehot_y = np.eye(num_class)[y]
    return onehot_y.sum(axis=0)


def reverse_onehot(y):
    """
    Define reverse onehot converter, which can turns onehot vector to labels string
    Args:
        y (array or tensor): An array or tensor which is onehot encoded with 0 and 1
    return:
        str_y: string of labels, separated by space
    """ 
    # Convert the label from onehot vector to tensor, then convert to list
    label_y = np.argwhere(y==1)
    list_y = label_y[0].tolist()
    str_y = " ".join([str(val) for val in list_y])
    return str_y


## Data Augmentation for training data ##
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),              # Flip image randomly
    transforms.RandomRotation(degrees=15),          # Rotate the image randomly
    transforms.CenterCrop([224,224]),
    # transforms.Resize([224,224]),                   # Resize every image to 224x224
    transforms.ToTensor(),                           # Nomalize to [0,1]
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),

])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.CenterCrop([224,224]),
    # transforms.Resize([224,224]),           
    transforms.ToTensor(),                    # Nomalize to [0,1]
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

In [4]:
class Asm2Dataset(Dataset):
    def __init__(self, csv_path, img_folder, transform=None, target_transform=None, have_label=True):
        """Initialize the dataset, loading images and labels."""    
        self.img_labels = pd.read_csv(csv_path)
        self.img_folder = img_folder
        self.transform = transform
        self.target_transform = target_transform
        self.have_label = have_label
        self.image_col = self.img_labels["ImageID"]
        self.caption_col = self.img_labels["Caption"]
        if self.have_label:
            self.labels_col = self.img_labels["Labels"]
        else:
            self.labels_col = torch.zeros(len(self.img_labels))

    def __len__(self):
        """Return the length of the dataset"""
        return len(self.img_labels)

    def __getitem__(self, index):
        """Return image and its label by calling index"""
        img_path = os.path.join(self.img_folder, self.image_col[index])
        image = read_image(img_path)
        caption = self.caption_col[index]
        label = self.labels_col[index]
        if self.have_label:
            label = label.split(' ')
            label = [int(l) for l in label]
        else: 
            label = [int(label)]
        label_onehot = multilabel_onehot(label)
        label_tensor = torch.tensor(label_onehot)
        if self.transform: 
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label_tensor

In [5]:
data_withlabel = Asm2Dataset("./Code/Input/test.csv", "./Code/Input/data", train_transform)

testing_data = Asm2Dataset("./Code/Input/test.csv", "./Code/Input/data", test_transform, have_label=False)

# Split the labeled data as 80% training set and 20% validation set
train_size = int(0.8 * len(data_withlabel))
valid_size = len(data_withlabel) - train_size
training_data, validing_data = torch.utils.data.random_split(data_withlabel, [train_size, valid_size])

In [6]:
# Creating dataloader 
train_loader = Data.DataLoader(dataset=training_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = Data.DataLoader(dataset=validing_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(dataset=testing_data, batch_size=BATCH_SIZE, shuffle=False)
dataloader = {"train": train_loader,
              "valid": valid_loader}

## 4. Model Definition

In [7]:
model = models.densenet121(pretrained=True) # load the pretrained model
num_features = model.classifier.in_features # get the no of on_features in last Linear unit
print(num_features)
## freeze the entire convolution base
for param in model.parameters(): 
    param.requires_grad_(False)

1024


In [8]:
def fc_network(input_features , number_classes ,dropout_prob=0.5 ,activation_func =nn.ReLU):
    features_lst = [num_features , num_features*2 , num_features, num_features//2 ]
    layers = []
    for in_f ,out_f in zip(features_lst[:-1] , features_lst[1:]):
        layers.append(nn.Linear(in_f , out_f))
        layers.append(activation_func())
        layers.append(nn.BatchNorm1d(out_f))
        if dropout_prob !=0 : layers.append(nn.Dropout(dropout_prob))
    layers.append(nn.Linear(features_lst[-1] , number_classes))
    return nn.Sequential(*layers)

fc_layers = fc_network(num_features, 20) # because 20-classes
model.classifier = fc_layers # replace the fully connected layer

model

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

## 5. Optimizer and criterion

In [9]:
model = model.to(device)
criterion = nn.BCEWithLogitsLoss()
# criterion.to(device)
optimizer = optim.Adagrad(model.parameters(), lr=LR)
sgdr_partial = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=LR)

In [10]:
def train(model, dataloader, criterion, optimizer, scheduler, epochs):
    # Initialize lists to store loss and fscore each epoch
    LOSS_train = []
    LOSS_valid = []
    FSCORE_train = []
    FSCORE_valid = []  

    for epoch in trange(epochs, desc="Epochs"):
        result = []
        train_loss = 0.0
        train_f1 = 0.0
        valid_loss = 0.0
        valid_f1 = 0.0
        # Begin training
        model.train()
        for data, labels in dataloader["train"]:
            data, labels = data.to(device), labels.to(device)

            output = model(data)                            # Forward Passing
            loss = criterion(output, labels)                # Compute loss
            preds = torch.sigmoid(output) > 0.5             # Make prediction
            preds = preds.to(torch.float32)                 # Convert to float32 torch
            loss.backward()                                 # Compute gradients
            optimizer.step()                                # Update the model parameters
            scheduler.step()                                # Update learning rate with scheduler
            optimizer.zero_grad()                           # Clear the gradients
            train_loss += loss.item() * data.size(0)        # Compute training loss
            train_f1 += f1_score(labels.to("cpu").to(torch.int).numpy(), 
                                 preds.to("cpu").to(torch.int).numpy(), 
                                 average="samples") * data.size(0)

        # Begin validation
        model.eval()
        for data, labels in dataloader["valid"]:
            data, labels = data.to(device), labels.to(device)

            output = model(data)                            # Forward Passing
            loss = criterion(output, labels)                # Compute loss
            preds = torch.sigmoid(output) > 0.5             # Make prediction
            preds = preds.to(torch.float32)                 # Convert to float32 torch
            valid_loss += loss.item() * data.size(0)        # Compute validation loss
            valid_f1 += f1_score(labels.to("cpu").to(torch.int).numpy(), 
                                 preds.to("cpu").to(torch.int).numpy(), 
                                 average="samples") * data.size(0)
        
        # Compute epoch loss and f1
        epoch_train_loss = train_loss / len(dataloader["train"].dataset)
        epoch_train_f1 = train_f1 / len(dataloader["train"].dataset)
        epoch_valid_loss = valid_loss / len(dataloader["valid"].dataset)
        epoch_valid_f1 = valid_f1 / len(dataloader["valid"].dataset)

        # Record epoch loss and f1 to the list
        LOSS_train.append(epoch_train_loss)
        LOSS_valid.append(epoch_valid_loss)
        FSCORE_train.append(epoch_train_f1)
        FSCORE_valid.append(epoch_valid_f1)       

        result.append(f'{epoch} TRAIN loss: {epoch_train_loss:.4f}, F1: {epoch_train_f1:.4f}   VALID loss: {epoch_valid_loss:.4f}, F1: {epoch_valid_f1:.4f}')
        print(result)
    return LOSS_train, LOSS_valid, FSCORE_train, FSCORE_valid

## Training

In [11]:
LOSS_train, LOSS_valid, FSCORE_train, FSCORE_valid = train(model, dataloader, criterion, optimizer, sgdr_partial, NUM_EPOCHS)

Epochs:   5%|███▋                                                                      | 1/20 [01:41<32:17, 101.98s/it]

['0 TRAIN loss: 0.1956, F1: 0.6514   VALID loss: 0.1025, F1: 0.7604']


Epochs:  10%|███████▌                                                                   | 2/20 [03:15<29:04, 96.90s/it]

['1 TRAIN loss: 0.1136, F1: 0.7400   VALID loss: 0.0941, F1: 0.7834']


Epochs:  15%|███████████▎                                                               | 3/20 [04:47<26:46, 94.52s/it]

['2 TRAIN loss: 0.1060, F1: 0.7615   VALID loss: 0.0917, F1: 0.7923']


Epochs:  20%|███████████████                                                            | 4/20 [06:21<25:12, 94.53s/it]

['3 TRAIN loss: 0.1028, F1: 0.7685   VALID loss: 0.0913, F1: 0.7971']


Epochs:  25%|██████████████████▊                                                        | 5/20 [07:57<23:43, 94.90s/it]

['4 TRAIN loss: 0.1005, F1: 0.7791   VALID loss: 0.0896, F1: 0.8045']


Epochs:  30%|██████████████████████▌                                                    | 6/20 [09:32<22:12, 95.18s/it]

['5 TRAIN loss: 0.0987, F1: 0.7803   VALID loss: 0.0894, F1: 0.8016']


Epochs:  35%|██████████████████████████▎                                                | 7/20 [11:02<20:14, 93.40s/it]

['6 TRAIN loss: 0.0969, F1: 0.7834   VALID loss: 0.0887, F1: 0.8069']


Epochs:  40%|██████████████████████████████                                             | 8/20 [12:35<18:38, 93.21s/it]

['7 TRAIN loss: 0.0958, F1: 0.7871   VALID loss: 0.0885, F1: 0.8059']


Epochs:  45%|█████████████████████████████████▊                                         | 9/20 [14:06<16:59, 92.67s/it]

['8 TRAIN loss: 0.0945, F1: 0.7898   VALID loss: 0.0884, F1: 0.8047']


Epochs:  50%|█████████████████████████████████████                                     | 10/20 [15:43<15:38, 93.86s/it]

['9 TRAIN loss: 0.0938, F1: 0.7908   VALID loss: 0.0885, F1: 0.8017']


Epochs:  55%|████████████████████████████████████████▋                                 | 11/20 [17:16<14:02, 93.64s/it]

['10 TRAIN loss: 0.0935, F1: 0.7921   VALID loss: 0.0874, F1: 0.8056']


Epochs:  60%|████████████████████████████████████████████▍                             | 12/20 [18:48<12:24, 93.02s/it]

['11 TRAIN loss: 0.0927, F1: 0.7931   VALID loss: 0.0869, F1: 0.8060']


Epochs:  65%|████████████████████████████████████████████████                          | 13/20 [20:23<10:56, 93.82s/it]

['12 TRAIN loss: 0.0915, F1: 0.7961   VALID loss: 0.0875, F1: 0.8080']


Epochs:  70%|███████████████████████████████████████████████████▊                      | 14/20 [21:58<09:25, 94.22s/it]

['13 TRAIN loss: 0.0910, F1: 0.7972   VALID loss: 0.0867, F1: 0.8150']


Epochs:  75%|███████████████████████████████████████████████████████▌                  | 15/20 [23:34<07:52, 94.54s/it]

['14 TRAIN loss: 0.0902, F1: 0.8003   VALID loss: 0.0872, F1: 0.8053']


Epochs:  80%|███████████████████████████████████████████████████████████▏              | 16/20 [25:08<06:18, 94.55s/it]

['15 TRAIN loss: 0.0894, F1: 0.8029   VALID loss: 0.0869, F1: 0.8089']


Epochs:  85%|██████████████████████████████████████████████████████████████▉           | 17/20 [26:43<04:43, 94.57s/it]

['16 TRAIN loss: 0.0892, F1: 0.8001   VALID loss: 0.0860, F1: 0.8134']


Epochs:  90%|██████████████████████████████████████████████████████████████████▌       | 18/20 [28:13<03:06, 93.12s/it]

['17 TRAIN loss: 0.0887, F1: 0.8018   VALID loss: 0.0863, F1: 0.8114']


Epochs:  95%|██████████████████████████████████████████████████████████████████████▎   | 19/20 [29:50<01:34, 94.35s/it]

['18 TRAIN loss: 0.0880, F1: 0.8059   VALID loss: 0.0866, F1: 0.8074']


Epochs: 100%|██████████████████████████████████████████████████████████████████████████| 20/20 [31:25<00:00, 94.28s/it]

['19 TRAIN loss: 0.0877, F1: 0.8047   VALID loss: 0.0871, F1: 0.8086']





In [21]:
# Densenet121, after 10 epochs, lr = 0.001

print(f"Train loss: {LOSS_train}\n")
print(f"Valid loss: {LOSS_valid}\n")
print(f"Train F1: {FSCORE_train}\n")
print(f"Valid F1: {FSCORE_valid}\n")

Train loss: [0.19557030546075022, 0.11363606836774183, 0.10601480300691975, 0.10282945186170525, 0.10047956630263886, 0.09870872370939617, 0.09690353180943062, 0.09584857225648954, 0.09452675135164164, 0.09383235938684326, 0.09346716051476651, 0.09265342182959256, 0.09148688464596398, 0.09098498438784641, 0.09024341662935895, 0.0894428305059876, 0.08918556811906693, 0.0886550970302926, 0.08802787160990252, 0.0876819197271115]

Valid loss: [0.10251195984107357, 0.09413597433203692, 0.09174743369726852, 0.09127008170315802, 0.08957335359554189, 0.0893622749598387, 0.08866589648261815, 0.08854848622584935, 0.08840478810821638, 0.08854049963715727, 0.08744280228993347, 0.08692458013650131, 0.08748444264621348, 0.08666173846572416, 0.08716003464531961, 0.0868661766333694, 0.0860131311502636, 0.08629443173156227, 0.08660120466760342, 0.08707316773013715]

Train F1: [0.6513980442982767, 0.7400408760221261, 0.7615105940355935, 0.7684978301790807, 0.7791008597883604, 0.7802697480759981, 0.78338

In [14]:
def createCheckpoint(filename="./LatestCheckpoint.pt"):
      checkpoint = {
                  'epoch': NUM_EPOCHS,
                  'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  "batch_size": BATCH_SIZE,
                  } # save all important stuff
      torch.save(checkpoint, filename)
createCheckpoint(filename="model_checkpoint.pt")

In [15]:
def predict(row, model):
    row = torch.tensor(row)
    yhat = model(row)
    preds = torch.sigmoid(yhat).data > 0.5
    preds = preds.to(torch.float32)
    return preds

In [16]:
def testset_pred():
    test_label = []
    model.eval()
    for data , target in test_loader:
        data = data.to(device)
        output = model(data)
        preds = torch.sigmoid(output).data > 0.5
        preds = preds.to(torch.float32)
        preds = np.array(preds.cpu())
        for y in preds:
            # Convert onehot encoded y_hat to array -> convert array to list
            # -> convert list to string
            y_hat = np.argwhere(y==1)
            y_hat = [i[0] for i in y_hat]
            y_hat = " ".join(str(elem) for elem in y_hat)
            test_label.append(y_hat)
    return test_label

In [17]:
test_df = pd.read_csv('./Code/Input/test.csv')
test_df = test_df.drop(labels="Caption", axis=1)
test_label = testset_pred()

In [20]:
test_df["Labels"] = test_label
test_df.to_csv("Predicted_labels.txt", index=False)