In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
MODEL_PATH = 'model.pth.tar'

In [3]:
import os
import math
import pickle
import cv2
from glob import glob
import time

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import matplotlib.pyplot as plt
import matplotlib.image as img

In [4]:
# Reference: https://github.com/cydonia999/VGGFace2-pytorch
# ZQ. Cao, L. Shen, W. Xie, O. M. Parkhi, A. Zisserman, VGGFace2: A dataset for recognising faces across pose and age, 2018.
# https://arxiv.org/pdf/1710.08092.pdf

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

# This SEModule is not used.
class SEModule(nn.Module):

    def __init__(self, planes, compress_rate):
        super(SEModule, self).__init__()
        self.conv1 = nn.Conv2d(planes, planes // compress_rate, kernel_size=1, stride=1, bias=True)
        self.conv2 = nn.Conv2d(planes // compress_rate, planes, kernel_size=1, stride=1, bias=True)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = F.avg_pool2d(module_input, kernel_size=module_input.size(2))
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.sigmoid(x)
        return module_input * x


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

        # SENet
        compress_rate = 16
        # self.se_block = SEModule(planes * 4, compress_rate)  # this is not used.
        self.conv4 = nn.Conv2d(planes * 4, planes * 4 // compress_rate, kernel_size=1, stride=1, bias=True)
        self.conv5 = nn.Conv2d(planes * 4 // compress_rate, planes * 4, kernel_size=1, stride=1, bias=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)


        ## senet
        out2 = F.avg_pool2d(out, kernel_size=out.size(2))
        out2 = self.conv4(out2)
        out2 = self.relu(out2)
        out2 = self.conv5(out2)
        out2 = self.sigmoid(out2)
        # out2 = self.se_block.forward(out)  # not used

        if self.downsample is not None:
            residual = self.downsample(x)

        out = out2 * out + residual
        # out = out2 + residual  # not used
        out = self.relu(out)
        return out


class SENet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, include_top=True):
        self.inplanes = 64
        super(SENet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


def senet50(**kwargs):
    """Constructs a SENet-50 model.
    """
    model = SENet(Bottleneck, [3, 4, 6, 3], **kwargs)
    return model


def load_state_dict(model, fname):
    """
    Set parameters converted from Caffe models authors of VGGFace2 provide.
    See https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/.
    Arguments:
        model: model
        fname: file name of parameters converted from a Caffe model, assuming the file format is Pickle.
    """
    with open(fname, 'rb') as f:
        weights = pickle.load(f, encoding='latin1')

    own_state = model.state_dict()
    for name, param in weights.items():
        if name in own_state:
            try:
                own_state[name].copy_(torch.from_numpy(param))
            except Exception:
                raise RuntimeError('While copying the parameter named {}, whose dimensions in the model are {} and whose '\
                                   'dimensions in the checkpoint are {}.'.format(name, own_state[name].size(), param.size()))
        else:
            raise KeyError('unexpected key "{}" in state_dict'.format(name))

In [5]:
class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()
        N_IDENTITY = 8631
        include_top = True
        self.backbone = senet50(num_classes=N_IDENTITY, include_top=include_top)
        load_state_dict(self.backbone, '/gdrive/MyDrive/Kinship Recognition Starter/senet50_ft_weight.pkl')
        self.backbone.fc = nn.Linear(2048, 1024) # reset top layer
        self.embedding = nn.Linear(1024, 512)
        self.family_classifier = nn.Linear(1024, 192) 
        self.identity_classifier = nn.Linear(1024, 966)
        
    def forward(self, x):
        feature = self.backbone(x)
        feature = F.relu(feature)
        
        embedding = self.embedding(feature)
        embedding = F.normalize(embedding, p=2)
        
        family_pred = self.family_classifier(feature)
        identity_pred = self.identity_classifier(feature)
        
        return embedding, family_pred, identity_pred

In [6]:
model = BaseModel()

if torch.cuda.is_available():
    model.cuda()

model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

# Since it might take too much time to probe images in the train set, I intended to use images that belong to train pairs
train_pair = pd.read_csv('/gdrive/MyDrive/Kinship Recognition Starter/train_ds.csv')
train_pair_id = []
train_pair_id += list(train_pair.values[:, 1])
train_pair_id += list(train_pair.values[:, 2])
train_pair_id = list(set(train_pair_id))

In [8]:
class ImageDataset(Dataset):
    def __init__(self, data_root, transform=None, is_train=False):
        # Safe the entire path for data (list and label as well)
        super(ImageDataset, self).__init__()
        self.transform = transform

        self.data_root = data_root
        self.is_train = is_train
        if is_train:
            self.all_images = train_pair_id
        else:
            self.all_images = glob(data_root + "*.jpg")

        self.img_pathes = []

    def __len__(self):
        # size of the entire dataset
        return len(self.all_images)

    def __getitem__(self, idx):
        # function that returns data x, label (one by one) when there is an idx input
        if self.is_train:
            img_path = self.data_root + self.all_images[idx]
        else:   
            img_path = self.all_images[idx]
        img = Image.open(img_path).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)

        if img.size(0) == 1:
            img = img.repeat(3, 1, 1)

        return img, img_path[len(self.data_root):]

# bring a vector in the embedding space for each image
def get_embeddings(model, image_loader):
    with torch.no_grad():
        id_list = []
        embed_list = []
        for batch_idx, (data, image_path) in enumerate(image_loader):
            data = data.cuda()
            embedding, _, _ = model(data)  # size : Batch x 512

            if batch_idx == 0:
                embed_list = embedding.cpu().numpy()
                id_list = image_path
            else:
                embed_list = np.concatenate((embed_list, embedding.cpu().numpy()), axis=0)
                id_list = np.concatenate((id_list, image_path), axis=0)

            del embedding

    id_list = list(id_list)
    return embed_list, id_list


train_pair_dataset = ImageDataset(data_root= '/gdrive/MyDrive/Kinship Recognition Starter/train/train-faces/', transform=transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((131.0912/255., 103.8827/255., 91.4953/255.), (1, 1, 1))
    ]), is_train=True)


train_pair_loader = DataLoader(train_pair_dataset, num_workers=2, batch_size=32, shuffle=False)

train_pair_embed_list, train_pair_id_list = get_embeddings(model, train_pair_loader)

distances_list = []
labels = []

for i in range(len(train_pair)):
    id1, id2 = train_pair.values[i][1], train_pair.values[i][2]
    idx1, idx2 = train_pair_id_list.index(id1), train_pair_id_list.index(id2)
    embed1, embed2 = train_pair_embed_list[idx1], train_pair_embed_list[idx2]
    label = train_pair.values[i][3]
    distance = np.linalg.norm(embed1 - embed2)

    distances_list.append(distance)
    labels.append(label)
    
    if i % 1000 == 0:
        print (i, len(train_pair))
# Since we know this dataset is well balanced, I sorted the distance to create predictions (lowest half - positive / highest half - negative )
predictions = np.zeros_like(distances_list)
pos_list = np.argsort(distances_list)[:len(predictions)//2]
predictions[pos_list] = 1
correct = sum(predictions==labels)

# train pair accuracy is meaningful
print('train pair accuracy:', correct / len(train_pair))


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


0 30000
1000 30000
2000 30000
3000 30000
4000 30000
5000 30000
6000 30000
7000 30000
8000 30000
9000 30000
10000 30000
11000 30000
12000 30000
13000 30000
14000 30000
15000 30000
16000 30000
17000 30000
18000 30000
19000 30000
20000 30000
21000 30000
22000 30000
23000 30000
24000 30000
25000 30000
26000 30000
27000 30000
28000 30000
29000 30000
train pair accuracy: 0.9340666666666667


In [12]:
test_dataset = ImageDataset(data_root= '/gdrive/MyDrive/Kinship Recognition Starter/test/', transform=transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((131.0912/255., 103.8827/255., 91.4953/255.), (1, 1, 1))
    ]))

test_loader = DataLoader(test_dataset, num_workers=2, batch_size=128, shuffle=False)

embed_list, id_list = get_embeddings(model, test_loader)

submission = pd.read_csv('/gdrive/MyDrive/Kinship Recognition Starter/test_ds.csv')

distances_list = []

for i in range(len(submission)):
    id1, id2 = submission.values[i][1], submission.values[i][2]
    idx1, idx2 = id_list.index(id1), id_list.index(id2)
    embed1, embed2 = embed_list[idx1], embed_list[idx2]
    distance = np.linalg.norm(embed1 - embed2)
    #print(distance)

    if distance < 0.5:
        result = 1
    else:
        result = 0

    #predictions1.append(result)
    distances_list.append(distance)

#d = {'index': np.arange(0, 3000, 1), 'label':predictions1}
#submissionfile = pd.DataFrame(data=d)
#submissionfile = submissionfile.round()
#submissionfile.to_csv("predictions1.csv", index=False)

#submissionfile.astype("int64").to_csv("predictions1.csv", index=False)

predictions = np.zeros_like(distances_list)
pos_list = np.argsort(distances_list)[:len(predictions)//2]
predictions[pos_list] = 1

d = {'index': np.arange(0, 3000, 1), 'label':predictions}
submissionfile = pd.DataFrame(data=d)
#submissionfile = submissionfile.round()
#submissionfile.to_csv("predictions.csv", index=False)

#predictions -> This is the file to submit
submissionfile.astype("int64").to_csv("predictions.csv", index=False)