In [None]:
import torch
import gc
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models
from torch import nn
from torch import optim
import json
import PIL
from PIL import Image
import io
import cv2
import torchvision.transforms.functional as TF
from torchvision.utils import save_image
from torchvision.transforms import ToPILImage
from matplotlib import pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data
import torch.optim as optim
from torch.optim import lr_scheduler
from tqdm import tqdm
import time
import os
import copy
import pickle
import urllib.request
import requests
from matplotlib.pyplot import imshow
import random
from utils import *
from resnet50_ft_dims_2048 import *

In [None]:
#Function to display image and predicted class
def imstats(name):
    # read in image and view it
    x = readim(name, forward_normalize) 
    imshow_tensor(x[0], inv_normalize)

    #get predicted class and probability
    prob = lay2(pretrained_model(x.cuda()))
    maxcls = prob.max(1)
    print("Class is {} ({}) with confidence {}%".format(maxcls.indices.item(),class_dict[maxcls.indices.item()],round(maxcls.values.item()*100,4)))

In [None]:
model_img_size = 224
model_transform = transforms.Compose([transforms.Resize((model_img_size,model_img_size)),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=[91.4953, 103.8827, 131.0912],
                                                              std=[1, 1, 1])
                                     ])

In [None]:
EPOCHS = 5
BATCH_SIZE = 32
LEARNING_RATE = 0.1
NUM_FEATURES = 2048
NUM_CLASSES = 500

In [None]:
class ResNet50_Classifier(nn.Module):
    def __init__(self):
        super(ResNet50_Classifier, self).__init__()
        self.fc = nn.Linear(2048, 500)

    def forward(self, feats):
        # Get the flattened vector from the backbone of resnet50
        return self.fc(feats)

In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model_class = ResNet50_Classifier()

model = resnet50_ft("vgg_face_testimages/resnet50_ft_dims_2048.pth")
model.to(device)
model.eval()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_class.parameters(), lr=0.001, momentum=0.9)

In [None]:
model_class.to(device)
model_class.eval()

In [None]:
def bounding_crop(img, bounding_box):
    im_shape = np.array(img.size)
    x,y,w,h = bounding_box
    half_extension = 0.15
    area = (max(0,x-half_extension*w), max(0,y-half_extension*h),
            min(im_shape[0], x+w*(1+half_extension*2)), min(im_shape[1], y+h*(1+half_extension*2)))
    img = img.crop(area)
    return img

def load_data(img, shape=None, bounding_box=None):
    im_shape = np.array(img.size)    # in the format of (width, height, *)
    
    if bounding_box:
        x,y,w,h = bounding_box
        half_extension = 0.15
        area = (max(0,x-half_extension*w), max(0,y-half_extension*h),
                min(im_shape[0], x+w*(1+half_extension*2)), min(im_shape[1], y+h*(1+half_extension*2)))
        img = img.crop(area)
    
    #return model_transform(img)
    im_shape = np.array(img.size)

    short_size = 224.0
    crop_size = shape
    img = img.convert('RGB')

    ratio = float(short_size) / np.min(im_shape)
    img = img.resize(size=(int(np.ceil(im_shape[0] * ratio)),   # width
                           int(np.ceil(im_shape[1] * ratio))),  # height
                     resample=PIL.Image.BILINEAR)

    x = np.array(img)  # image has been transposed into (height, width)
    newshape = x.shape[:2]
    h_start = (newshape[0] - crop_size[0])//2
    w_start = (newshape[1] - crop_size[1])//2
    x = x[h_start:h_start+crop_size[0], w_start:w_start+crop_size[1]]
    x = x - mean
    return x

def image_encoding(model, images):
    #print('==> compute image-level feature encoding.')
    num_faces = len(images)
    im_array = np.array([load_data(img=i, shape=(224, 224, 3)) for i in images])
    im_tensor = torch.Tensor(im_array.transpose(0, 3, 1, 2))
    im_tensor = im_tensor.to(device)
    f  = model(im_tensor)
    classif = f[0]
    feat = f[1].detach().cpu().numpy()[:, :, 0, 0]
    face_feats = feat / np.sqrt(np.sum(feat ** 2, -1, keepdims=True))
    return classif, face_feats

In [None]:
def predict_features(images):
    out, face_feats = image_encoding(model, images)
    return out, face_feats

def fetch_images(paths):
    images = []
    for im in paths:
        images.append(Image.open(im))
    return images

def process_dataset(dataset):
    svm_input = []
    svm_labels = []
    for i in tqdm(range(0,len(dataset),30)):
        batch = dataset[i:min(i+30,len(dataset))]
        images = fetch_images(batch)
        out, feats = predict_features(images)
        for ind, path in enumerate(batch):
            svm_input.append(feats[ind])
            svm_labels.append(int(path.split("/")[-2].split("n00")[-1]))
        del images
        del feats
        del out
    return svm_input, svm_labels

class VGG2Dataset(Dataset):
    def __init__(self, image_paths, class_dict, transform=None):
        self.image_paths = image_paths
        self.class_dict = class_dict

    def __len__(self):
        return int(len(self.image_paths)/BATCH_SIZE)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        else:
            idx = [idx]
        idx = idx[0]
        image_path_list = self.image_paths[idx*BATCH_SIZE:(idx+1)*BATCH_SIZE]
        if len(image_path_list)!=BATCH_SIZE:
            image_path_list += random.choices(self.image_paths, k=(BATCH_SIZE-len(image_path_list)))
        images = fetch_images(image_path_list)
        
        num_faces = len(images)
        #im_tensor = torch.stack([load_data(img=i, shape=(224, 224, 3), bounding_box = bounding_dict[image_path_list[ind].split("test/")[1].split(".")[0]]) for ind, i in enumerate(images)])
        im_array = np.array([load_data(img=i, shape=(224, 224, 3), bounding_box = bounding_dict[image_path_list[ind].split("test/")[1].split(".")[0]]) for ind, i in enumerate(images)])
        im_tensor = torch.Tensor(im_array.transpose(0, 3, 1, 2))
        labels = [self.class_dict[int(path.split("/")[-2].split("n00")[-1])] for path in image_path_list]
        
        labels = torch.tensor(labels,dtype=torch.long)
        labels = labels.squeeze(0)
        return im_tensor, labels

In [None]:
def most_occuring_label(pred):
    #Use softmax to get predicted probability and view it
    lay2 = torch.nn.Softmax(dim=1)
    prob = lay2(pred)
    maxOcls = prob.max(1)
    labels, counts = maxOcls.indices.unique(return_counts=True)
    return labels[counts.max(0)[1]], counts[counts.max(0)[1]], counts.sum()

In [None]:
import csv

vgg_bounding = "/nobackup/vgg2face/bb_landmark/"
bounding_dict = {}

with open(vgg_bounding+'loose_bb_test.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            bounding_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3]), int(row[4])]
            line_count += 1

In [None]:
mean = (131.0912, 103.8827, 91.4953)
model_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.RandomCrop((224,224)),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=[131.0912, 103.8827, 91.4953],
                                                              std=[1,1,1])])
batch_size = 1

vgg_test_dir = "/nobackup/vgg2face/test/"
class_dir_list = os.listdir(vgg_test_dir)
train_dataset_paths = []
test_dataset_paths = []

for class_dir in class_dir_list:
    class_dir = vgg_test_dir+class_dir+"/"
    all_images = []
    for image in os.listdir(class_dir):
        all_images.append(class_dir+image)
    random.shuffle(all_images)
    test_dataset_paths += all_images[:50]
    train_dataset_paths += all_images[50:]
random.shuffle(test_dataset_paths)
random.shuffle(train_dataset_paths)
class_d = pickle.load(open("vgg2_testset_classdict.pk","rb"))
train_dataset = VGG2Dataset(image_paths=train_dataset_paths, class_dict = class_d)
test_dataset = VGG2Dataset(image_paths=test_dataset_paths, class_dict = class_d)

In [None]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=1
)
val_loader = DataLoader(dataset=test_dataset, batch_size=1)

In [None]:
train_in_memory = []
train_size = len(train_dataset)
print(train_size)
for i in tqdm(range(train_size)):
    train_in_memory.append((train_dataset[i][0],train_dataset[i][1]))

val_in_memory = []
val_size = len(test_dataset)
print(val_size)
for i in tqdm(range(val_size)):
    val_in_memory.append((test_dataset[i][0],test_dataset[i][1]))

In [None]:
transToPIL = transforms.ToPILImage()
count = 0
for x,y in train_in_memory:
    for xx,yy in zip(x,y):
        if yy==401:
            display(transToPIL(xx))
    count+=1
    if count == 20: break

In [None]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)

    acc = acc * 100
    
    return acc

accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

In [None]:
print("Begin training.")
EPOCHS = 5
for e in tqdm(range(1, EPOCHS+1)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    model_class.train()
    count = 0
    for X_train_batch, y_train_batch in train_in_memory:
        count+=1
        if count%1000==0: print(count)
        #X_train_batch = X_train_batch.squeeze(0)
        #y_train_batch = y_train_batch.squeeze(0)
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        optimizer.zero_grad()
        y_train_pred = model(X_train_batch)[1]
        y_train_pred = y_train_pred.squeeze(-1)
        y_train_pred = y_train_pred.squeeze(-1)
        y_train_pred = model_class(y_train_pred)
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        model_class.eval()
        for X_val_batch, y_val_batch in val_in_memory:
            X_val_batch = X_val_batch.squeeze(0)
            y_val_batch = y_val_batch.squeeze(0)
            X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
        
            y_val_pred = model(X_val_batch)[1]
            y_val_pred = y_val_pred.squeeze(-1)
            y_val_pred = y_val_pred.squeeze(-1)
            y_val_pred = model_class(y_val_pred)       
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = multi_acc(y_val_pred, y_val_batch)
            
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['val'].append(val_epoch_loss/len(val_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['val'].append(val_epoch_acc/len(val_loader))
                              
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(val_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Val Acc: {val_epoch_acc/len(val_loader):.3f}')

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
y_pred_list = []
y_test_list = []
with torch.no_grad():
    model.eval()
    for X_val_batch, y_val_batch in val_in_memory:
        X_val_batch = X_val_batch.to(device)
        y_val_pred = model(X_val_batch)[1]
        y_val_pred = y_val_pred.squeeze(-1)
        y_val_pred = y_val_pred.squeeze(-1)
        y_val_pred = model_class(y_val_pred)
        y_pred_softmax = torch.log_softmax(y_val_pred, dim = 1)
        _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
        x = y_pred_tags.cpu().numpy()
        y_pred_list += list(x)
        y_test_list += list(y_val_batch.numpy())
#y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [None]:
#torch.save(model_class.state_dict(), "vgg2_classifier_500.pt")

In [None]:
print(len([1 for x,y in zip(y_pred_list,y_test_list) if x==y]))

In [None]:
print(y_pred_list[2])
print(y_test_list[2])
print(classification_report(y_test_list, y_pred_list))