### Check GPU

In [1]:
# Data manipulation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

#Pytorch
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from PIL import Image
import matplotlib.pyplot as plt

from torchsummaryX import summary
from torch.utils.tensorboard import SummaryWriter
from torchvision import models
from tqdm.notebook import tqdm


In [2]:
from tensorboard import version; print(version.VERSION)

2.4.1


In [3]:
# check GPU support
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

### Dataset and dataloader

In [4]:
PATH = 'C:/Users/trang/OneDrive - HKUST Connect/COMP_4211/pa2/' 

- resize all the images to a size of 32x32
- convert them to single-channel images

Dataset.getitem() should return:
- two 32x32, single-channel images
- one target label

In [5]:
class ImageDataset(Dataset):
    def __init__(self, info_file, index_file, image_dir, transform=None):
        self.info_df = pd.read_csv(info_file)  
        self.index_df = pd.read_csv(index_file, sep=',',header =None,names=['index','source'])
        #self.classes = np.array(self.info_df.columns[1:]) # image labels 
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        img_index1 = self.info_df.iloc[idx, 0]
        img_name1 = os.path.join(self.image_dir, str(self.index_df.iloc[img_index1-1, 1])[5:])
        image1 = Image.open(img_name1)
        image1 = image1.convert('L')
        
        img_index2 = self.info_df.iloc[idx, 1]
        img_name2 = os.path.join(self.image_dir, str(self.index_df.iloc[img_index2-1, 1])[5:])
        image2 = Image.open(img_name2)
        image2 = image2.convert('L')
        
        label = self.info_df.iloc[idx, 2]
        
        if self.transform is not None:
            image1 = self.transform(image1)
            image2 = self.transform(image2)
        
        return image1, image2, label

    def __len__(self):
        return len(self.info_df)

Check dataset

In [6]:
# more transform functions can refer to https://pytorch.org/docs/stable/torchvision/transforms.html
transform = transforms.Compose(
    [transforms.Resize((32,32)),
     transforms.ToTensor(),    # range [0, 255]  -> [0.0,1.0] Convert a PIL Image or numpy.ndarray (H x W x C)  to tensor (C x H x W) 
     transforms.Normalize((0.5,), (0.5,))   # channel=（channel-mean）/std  -> [-1, 1]
     ])

# if the image has one channel (grayscale), normalise operation will be 
#transforms.Normalize((0.5, ), (0.5, ))

In [7]:
dataset = ImageDataset(PATH + 'train.csv', PATH + 'index.txt',PATH, transform=transform)

Load image dataset

In [8]:
train_set = ImageDataset(PATH + 'train.csv', PATH + 'index.txt', PATH, transform=transform)
valid_set = ImageDataset(PATH + 'valid.csv', PATH + 'index.txt', PATH, transform=transform)
test_set = ImageDataset(PATH + 'test.csv', PATH + 'index.txt', PATH, transform=transform)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=128, shuffle=False)
test_loader = DataLoader(test_set, batch_size=128,shuffle=False)


### Define Convolutional Neural Network

In [9]:
class Net(nn.Module):
    def __init__(self, aggregation_type):
        super(Net, self).__init__()
        
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding = 1) 
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU(inplace=True)
            
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding = 1)  
        self.bn2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU(inplace=True)
            
        self.maxPool = nn.MaxPool2d(kernel_size=2, stride=2)
            
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding = 1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU(inplace=True)
            
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding = 1)
        self.bn4 = nn.BatchNorm2d(128)
        self.relu4 = nn.ReLU(inplace=True)
            
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding = 1)
        self.bn5 = nn.BatchNorm2d(256)
        self.relu5 = nn.ReLU(inplace=True)
            
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding = 1)
        self.bn6 = nn.BatchNorm2d(512)
        self.relu6 = nn.ReLU(inplace=True)
            
        self.avg = nn.AvgPool2d(kernel_size=16,stride=1)
            
        self.flatten = nn.Flatten()
            
        
        self.aggregation_type = aggregation_type
        if(self.aggregation_type=="concatenation"):
            self.fc1 = nn.Linear(1024,512) # concatenation
        elif(self.aggregation_type=="absolute_substraction"):
            self.fc1 = nn.Linear(512,512) # absolute substraction
        self.drop_out = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(512,1)

    def forward(self, image1, image2):
        
        image1 = self.relu1(self.bn1(self.conv1(image1)))
        image1 = self.relu2(self.bn2(self.conv2(image1)))
        image1 = self.maxPool(image1)
        image1 = self.relu3(self.bn3(self.conv3(image1)))
        image1 = self.relu4(self.bn4(self.conv4(image1)))
        image1 = self.relu5(self.bn5(self.conv5(image1)))
        image1 = self.relu6(self.bn6(self.conv6(image1)))
        image1 = self.avg(image1)
        image1 = self.flatten(image1)
        
        image2 = self.relu1(self.bn1(self.conv1(image2)))
        image2 = self.relu2(self.bn2(self.conv2(image2)))
        image2 = self.maxPool(image2)
        image2 = self.relu3(self.bn3(self.conv3(image2)))
        image2 = self.relu4(self.bn4(self.conv4(image2)))
        image2 = self.relu5(self.bn5(self.conv5(image2)))
        image2 = self.relu6(self.bn6(self.conv6(image2)))
        image2 = self.avg(image2)
        image2 = self.flatten(image2)
        
        if(self.aggregation_type=="concatenation"):
            h3 = torch.cat((image1,image2),1) #concatenation
        elif(self.aggregation_type=="absolute_substraction"):
            h3 = torch.abs(torch.sub(image1, image2)) #absolute substraction

        output = F.relu(self.fc1(h3))
        output = self.drop_out(output)
        output = self.fc2(output)
        output = nn.Sigmoid()(output)
        #output = torch.reshape(output,(-1,))
        
        return output

#### Function count the total number of trainable parameters

When aggregation function is absolute difference

In [10]:
model = Net('absolute_substraction').cuda()
summary(model, torch.zeros((128, 1, 32, 32)).cuda(), torch.zeros((128, 1, 32, 32)).cuda())

RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 4.00 GiB total capacity; 104.04 MiB already allocated; 35.87 MiB free; 120.00 MiB reserved in total by PyTorch)

When aggregation function is concatenation

In [None]:
model = Net('concatenation').cuda()
summary(model, torch.zeros((128, 1, 32, 32)).cuda(), torch.zeros((128, 1, 32, 32)).cuda())

### 4.2.3 Training and Validation

In [None]:
def save_checkpoint(save_path, model, optimizer, val_loss):
    if save_path==None:
        return
    save_path = save_path 
    state_dict = {'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'val_loss': val_loss}

    torch.save(state_dict, save_path)
    print(f'Model saved to {save_path}')

def load_checkpoint(save_path, model, optimizer):
    save_path = save_path #f'cifar_net.pt'
    state_dict = torch.load(save_path)
    model.load_state_dict(state_dict['model_state_dict'])
    optimizer.load_state_dict(state_dict['optimizer_state_dict'])
    val_loss = state_dict['val_loss']
    print(f'Model loaded from {save_path}, with val loss: {val_loss}')
    return val_loss

def TRAIN(net, train_loader, valid_loader,  num_epochs, criterion, optimizer, val_loss,threshold, device, save_name):
    
    step_list = []
    training_loss_list = []
    valid_loss_list = []
    
    if val_loss==None:
        best_val_loss = float("Inf")  
    else: 
        best_val_loss=val_loss
        print('Resume training')


    no_step = 0;
    for epoch in range(num_epochs):  # loop over the dataset multiple times
        net.train()
        running_loss = 0.0
        running_corrects = 0
        for input1,input2, labels in tqdm(train_loader):
            
            no_step +=1
            input1 = input1.to(device)
            input2 = input2.to(device)
            labels = labels.to(device)
            
            labels = labels.unsqueeze(1)
            labels = labels.float()

            '''Training of the model'''
            # Forward pass
            outputs = net(input1, input2)
            
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            training_loss = loss.item()
            running_loss += loss.item()
            preds = torch.gt(outputs, threshold).int()                       
            running_corrects += torch.sum(preds == labels.data)            
            
            with torch.no_grad():
                net.eval()
                running_loss_valid = 0.0
                running_corrects_valid = 0
                for input1_valid,input2_valid, labels_valid in tqdm(valid_loader):
            
                    input1_valid = input1_valid.to(device)
                    input2_valid = input2_valid.to(device)
                    labels_valid = labels_valid.to(device)

                    labels_valid = labels_valid.unsqueeze(1)
                    labels_valid = labels_valid.float()

                    outputs_valid = net(input1_valid,input2_valid)
                    loss_valid = criterion(outputs_valid, labels_valid)

                    running_loss_valid += loss_valid.item()
                    preds_valid = torch.gt(outputs_valid, threshold).int()
                    running_corrects_valid += torch.sum(preds_valid == labels_valid.data)

                valid_loss = running_loss_valid / len(valid_loader)
                valid_acc = running_corrects_valid / float(len(valid_loader.dataset))
            
            if(no_step%10==0):
                step_list.append(no_step)
                training_loss_list.append(training_loss)
                valid_loss_list.append(valid_loss)
                
                print("step ",no_step," training loss: ",training_loss,", validation loss: ",valid_loss,"\n")
            

        train_loss = running_loss / len(train_loader)
        train_acc = running_corrects / float(len(train_loader.dataset))

        with torch.no_grad():
            net.eval()
            running_loss = 0.0
            running_corrects = 0
            for input1,input2, labels in tqdm(valid_loader):
            
                input1 = input1.to(device)
                input2 = input2.to(device)
                labels = labels.to(device)
                
                labels = labels.unsqueeze(1)
                labels = labels.float()

                outputs = net(input1,input2)
                loss = criterion(outputs, labels)

                running_loss += loss.item()
                preds = torch.gt(outputs, threshold).int()
                running_corrects += torch.sum(preds == labels.data)

            valid_loss = running_loss / len(valid_loader)
            valid_acc = running_corrects / float(len(valid_loader.dataset))

        print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}, Valid Loss: {:.4f},  Valid Acc: {:.4f}'
              .format(epoch+1, num_epochs, train_loss, train_acc, valid_loss, valid_acc))

        if valid_loss < best_val_loss:
            best_val_loss = valid_loss
            save_checkpoint(save_name, net, optimizer, best_val_loss)
    
    plt.plot(step_list,training_loss_list,label="Training Loss")
    plt.plot(step_list,valid_loss_list,label="Validation Loss")
    plt.xlabel('Number of steps')
    # Set a title of the current axes.
    plt.title('Traning and Validation loss curves')
    # show a legend on the plot
    plt.legend()
    # Display a figure.
    plt.show()
    
    print('Finished Training')

In [None]:
model = Net("absolute_substraction").cuda()
num_epochs = 30
best_val_loss = None
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
threshold = 0.5
save_path = f'cifar_net.pt'
model = model.to(device)


TRAIN(model, train_loader, valid_loader, num_epochs, criterion, optimizer, best_val_loss,threshold, device, save_path)

### 4.2.4 Evaluation

In [None]:
def eval(net, valid_loader):
    max_valid_acc = 0
    optimal_threshold = 0
    threshold_array = np.arange(0.0, 1.0, 0.02)
    for threshold in threshold_array:
        print(threshold)
        with torch.no_grad():
            running_loss = 0.0
            running_corrects = 0
            for input1,input2, labels in tqdm(valid_loader):

                input1 = input1.to(device)
                input2 = input2.to(device)

                labels = labels.to(device)
                labels = labels.unsqueeze(1)
                labels = labels.float()

                outputs = net(input1,input2)
                preds = torch.gt(outputs, threshold).int()

                running_corrects += torch.sum(preds == labels.data)

            valid_acc = running_corrects / float(len(valid_loader.dataset))
            print("valid_acc: ", valid_acc)
            if(valid_acc > max_valid_acc):
                max_valid_acc = valid_acc
                optimal_threshold = threshold
    return max_valid_acc,optimal_threshold
                    

In [None]:
max_valid_acc,optimal_threshold = eval(model, valid_loader)

In [None]:
print("validation accuracy: ",max_valid_acc," with threshold: ", optimal_threshold)

### Predict output

In [None]:
class ImageTestset(Dataset):
    def __init__(self, info_file, index_file, image_dir, transform=None):
        self.info_df = pd.read_csv(info_file)  
        self.index_df = pd.read_csv(index_file, sep=',',header =None,names=['index','source'])
        #self.classes = np.array(self.info_df.columns[1:]) # image labels 
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        img_index1 = self.info_df.iloc[idx, 0]
        img_name1 = os.path.join(self.image_dir, str(self.index_df.iloc[img_index1-1, 1])[5:])
        image1 = Image.open(img_name1)
        image1 = image1.convert('L')

        img_index2 = self.info_df.iloc[idx, 1]
        img_name2 = os.path.join(self.image_dir, str(self.index_df.iloc[img_index2-1, 1])[5:])
        image2 = Image.open(img_name2)
        image2 = image2.convert('L')
        
        if self.transform is not None:
            image1 = self.transform(image1)
            image2 = self.transform(image2)
        
        return image1, image2

    def __len__(self):
        return len(self.info_df)

In [None]:
test_set = ImageTestset(PATH + 'test.csv', PATH + 'index.txt', PATH, transform=transform)
test_loader = DataLoader(test_set,
                         num_workers = 0,
                         batch_size=1,
                         shuffle=False)
test_df = pd.read_csv(PATH + 'test.csv')

In [None]:
predictions=[]
i = 0
with torch.no_grad():
    for input1,input2 in tqdm(test_loader):
        input1 = input1.to(device)
        input2 = input2.to(device)

        outputs = model(input1,input2)
        preds = torch.gt(outputs, optimal_threshold).int()
    
        predictions.append(preds.cpu().numpy().ravel()[0])
        
test_df['target'] = predictions

test_df.to_csv("submission_QA8_Optimization_1.csv", index=False)#submission.csv should be placed directly in current fold.
test_df.head(50)#show the result to be committed