In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

In [None]:
cuda = True if torch.cuda.is_available() else False
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
print(cuda)
print(torch.cuda.get_device_name(0))

True
Tesla T4


In [None]:
# using a pre-process transformation for resizing and normalizing
pre_process = transforms.Compose([transforms.Resize(256), 
                                  transforms.CenterCrop(224), 
                                  transforms.ToTensor(), 
                                  transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                                       std=[0.229, 0.224, 0.225])])


#creating data loaders for training and validation data
ImageData = torchvision.datasets.ImageFolder(root = 'classification_data/train_data', transform = pre_process)
ImageLoader = DataLoader(ImageData, batch_size=64, shuffle=True, num_workers=4)

ValData = torchvision.datasets.ImageFolder(root = 'classification_data/val_data', transform = pre_process)
ValLoader = DataLoader(ValData, batch_size=64, shuffle=True, num_workers=4)

In [None]:
print("train data length: {}, classes: {}".format(ImageData.__len__(), len(ImageData.classes)))
print("val data length: {}, classes: {}".format(ValData.__len__(), len(ValData.classes)))

train data length: 380638, classes: 4000
val data length: 8000, classes: 4000


In [None]:
# Basic Block for resnet
class BasicBlock(nn.Module):
    def __init__(self, channels_1, channels_2, stride):
        super(BasicBlock, self).__init__()
        self.stride = stride
        self.block = nn.Sequential(
            nn.Conv2d(channels_1, channels_2, kernel_size=3, stride=self.stride, padding=1, bias=False),
            nn.BatchNorm2d(channels_2),
            nn.ReLU(),
            nn.Conv2d(channels_2, channels_2, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(channels_2)
        )
        
        # use an identity skip connection 
        self.shortcut = nn.Sequential(
            nn.Conv2d(channels_1, channels_2, kernel_size=1, stride=self.stride, bias=False),
            nn.BatchNorm2d(channels_2)
        )

    def forward(self, x):
        x_out = self.block(x)
        if self.stride == 2:
            x_out += self.shortcut(x)
        return F.relu(x_out)

#xavier initialization
def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

#resnet 18 architecture following pytorch implementation
class Network(nn.Module):
    def __init__(self, in_channels, classes):
        super(Network, self).__init__()
        self.in_channels = in_channels
        self.classes = classes
        self.layer_seq = nn.Sequential(            
            nn.Conv2d(in_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            BasicBlock(64, 64, 1),
            BasicBlock(64, 64, 1),            
            BasicBlock(64, 128, 2),
            BasicBlock(128, 128, 1),
            BasicBlock(128, 256, 2),
            BasicBlock(256, 256, 1),
            BasicBlock(256, 512, 2),
            BasicBlock(512, 512, 1)
        )
        self.avgPool2d = nn.AvgPool2d(4)
        self.linear = nn.Linear(512, classes)

    def forward(self, x):
        x = self.layer_seq(x)
        x = self.avgPool2d(x)
        x = x.view(x.size(0), -1)
        return self.linear(x)
    
    def verify_forward(self, x):
        x = self.layer_seq(x)
        x = self.avgPool2d(x)
        x = x.view(x.size(0), -1)
        return x    

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [None]:
# defining model, criteria and optimizer
model = Network(3, 4000)
model.apply(init_weights)
model.to(device)
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.15, weight_decay = 5e-5, momentum = 0.9)

In [None]:
import time
def train (model, ImageLoader, criterion, optimizer):
    model.train()
    current_loss = 0
    current_correct = 0
    total = 0
    for batch, (images, labels) in enumerate(ImageLoader):
        if (batch+1)%500 == 0:
            print("batch: {} | accuracy: {}".format(batch+1, acc))
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
#         output = model.forward(images)
        output = model(images)
        y_prob = torch.argmax(output, dim = 1).to(device)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        current_loss += loss.item()*images.size(0)
        current_correct += (y_prob == labels).sum().item()
        total += len(labels)
        acc = current_correct/total
    epoch_loss = current_loss /len(ImageLoader.dataset)
    epoch_acc = acc
    return epoch_loss, epoch_acc

def validate(model, ValLoader):
    model.eval()
    current_correct = 0
    total = 0
    for batch, (images, labels) in enumerate(ValLoader):
        if (batch+1)%500 == 0:
            print("batch: {} | accuracy: {}".format(batch+1, acc))
        images = images.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            output = model(images)
            y_prob = torch.argmax(output, dim = 1).to(device)
            current_correct += (y_prob == labels).sum().item()
        total += len(labels)
        acc = current_correct/total
    epoch_acc = acc
    print("val accuracy: {}".format(acc))

In [None]:
# trained for 15 epochs
epochs = 15
for epoch in range(epochs):
    if epoch >= 1:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.85 * param_group['lr']
    start = time.time()
    epoch_train_loss, epoch_train_acc = train(model, ImageLoader, criteria, optimizer)
    print("Epoch: {}, Time: {}".format(epoch+1, int(time.time()-start)))
    print("Epoch: {} Train Loss : {:.4f}  Train Accuracy: {:.4f}".format(epoch+1,epoch_train_loss,epoch_train_acc))
    state = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch}
    torch.save(state, "HW2P2_classification_resnet18_epoch15.pt")
    if (epoch+1)%2 == 0:
        model.eval()
        validate(model, ValLoader)
        model.train()

In [None]:
# data loader for verification data
VerificationData = torchvision.datasets.ImageFolder(root = 'verification', transform = pre_process)
VerificationLoader = DataLoader(VerificationData, batch_size=128, shuffle=False, num_workers=4)

In [None]:
print("verification data length: {}, classes: {}".format(len(VerificationData), len(VerificationData.classes)))

verification data length: 69097, classes: 1


In [None]:
# using modified forward method in model for generating embeddings (removed linear layer)
def embeddings(model, VerificationLoader):
    embeddings = None
    model.eval()
    current_correct = 0
    total = 0
    for batch, (images, labels) in enumerate(VerificationLoader):
        images = images.to(device)
        with torch.no_grad():
            if batch == 0:
                embeddings = model.verify_forward(images)
            else:
                embeddings = torch.cat((embeddings, model.verify_forward(images)), dim=0)
        del images
        torch.cuda.empty_cache()
    return embeddings

In [None]:
# loading checkpointed model
checkpoint = torch.load("HW2P2_classification_resnet18_epoch15.pt")
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
epoch_start = checkpoint['epoch']

In [None]:
# generate embeddings
embeddings = embeddings(model, VerificationLoader)
print(embeddings.shape)

torch.Size([69097, 512])


In [None]:
# creating dict for mapping embeddings to image names
embed_dict = {VerificationData.imgs[i][0][13:]: embeddings[i] for i in range(len(VerificationData))}
print(len(embed_dict))
torch.save(embed_dict, "embed_dict.pt")

69097


In [None]:
embed_dict = torch.load("embed_dict.pt") 
print(len(embed_dict))

69097


In [None]:
# creating list of verification images pair names
verification_images = open("verification_pairs_val.txt").read().splitlines()
verification_images = [line.split() for line in verification_images]
print(verification_images[:2])

[['verification_data/00041961.jpg', 'verification_data/00044353.jpg', '0'], ['verification_data/00007133.jpg', 'verification_data/00060449.jpg', '1'], ['verification_data/00041961.jpg', 'verification_data/00020166.jpg', '0'], ['verification_data/00013102.jpg', 'verification_data/00055525.jpg', '1'], ['verification_data/00002921.jpg', 'verification_data/00041331.jpg', '0']]


In [None]:
# creating data loader for test data
TestData = torchvision.datasets.ImageFolder(root = 'classification_data/test_data', transform = pre_process)
TestLoader = DataLoader(TestData, batch_size=64, shuffle=False, num_workers=4)
print(len(TestData))
validate(model, TestLoader)

In [None]:
# using cosine similarity measure to compute distance between images
cos = nn.CosineSimilarity(dim = 0)

similarity_scores = np.array([cos(embed_dict[verification_images[i][0]], 
                                  embed_dict[verification_images[i][1]]).item() 
                              for i in range(len(verification_images))]) 

print(len(verification_images) == len(similarity_scores))

True


In [None]:
labels = np.array([int(verification_images[i][2]) for i in range(len(verification_images))])

In [None]:
from sklearn.metrics import roc_auc_score
print(len(labels))
print(type(labels))

8805
<class 'numpy.ndarray'>


In [None]:
print(roc_auc_score(labels, similarity_scores))

0.9345160740876468


In [None]:
test_images = open("verification_pairs_test.txt").read().splitlines()
test_images_separated = [line.split() for line in test_images]
print(test_images[:2])
print(test_images_separated[:2])

['verification_data/00020839.jpg verification_data/00035322.jpg', 'verification_data/00002921.jpg verification_data/00021567.jpg']
[['verification_data/00020839.jpg', 'verification_data/00035322.jpg'], ['verification_data/00002921.jpg', 'verification_data/00021567.jpg']]


In [None]:
test_similarity_scores = [cos(embed_dict[test_images_separated[i][0]], 
                                  embed_dict[test_images_separated[i][1]]).item() 
                              for i in range(len(test_images_separated))]

print(len(test_images_separated) == len(test_similarity_scores))

True


In [None]:
import pandas as pd
results = pd.DataFrame(list(zip(test_images, test_similarity_scores)), 
               columns =['Id', 'Category']) 

In [None]:
print(results.head())

                                                  Id  Category
0  verification_data/00020839.jpg verification_da...  0.764679
1  verification_data/00002921.jpg verification_da...  0.364209
2  verification_data/00011732.jpg verification_da...  0.409506
3  verification_data/00052778.jpg verification_da...  0.510879
4  verification_data/00053948.jpg verification_da...  0.640916


In [None]:
results.to_csv('results_1.csv', index=False)