In [1]:
import glob
import os
import numpy as np
import tqdm
from PIL import Image

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

import copy, time
import argparse
from datasets import EarlyStopScheduler, FaceImageDataset, FaceLoadImageDataset

# evaluation for classification

In [2]:
from resnet34WOmax import ResNet, BasicBlock, test_model
resnet34 = ResNet([3, 4, 6, 3],BasicBlock , num_classes=4000, kernel_size = 3)
model = resnet34
device = torch.device("cuda:0")
model.to(device)
torch.autograd.set_detect_anomaly(True)
model = torch.load("./modelfcmore")

#### evaluate with the trained model

In [3]:
from torchvision import transforms, datasets

norm_transform = transforms.Compose([transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

val_data_root = "./val_data"
valset = FaceLoadImageDataset(val_data_root, "data/val", transform=norm_transform)
test_loader = DataLoader(valset, batch_size=256, shuffle=False, num_workers=8, drop_last=False)

criterion = nn.CrossEntropyLoss()
test_loss, test_acc = test_model(model.to(device), test_loader, criterion, device)

100%|██████████| 32/32 [00:38<00:00,  1.21s/it]

Testing Loss:  1.2707732170820236
Testing Accuracy:  74.775 %





### save the classification result with id label
* we need to transform the class back to the id usung the target dict

In [5]:
from datasets import FaceImageDatasetEvl

evl_set = FaceImageDatasetEvl("./test_data", target_dict = valset.target_dict)
eval_loader = DataLoader(evl_set, batch_size=1, shuffle=False, num_workers=8, drop_last=False)

def find_key(dictionary,value):
    for k, v in dictionary.items():  # for name, age in dictionary.iteritems():  (for Python 2.x)
        if v == value:
            return k

write_list = [] 
for batch_idx, (data, idx) in enumerate(tqdm.tqdm(eval_loader)):   
    data = data.to(device)

    outputs = model(data)

    _, predicted = torch.max(outputs.data, 1)
    out_label = find_key(evl_set.target_dict,predicted.item())
    
    write_list.append((idx[0],out_label))

100%|██████████| 8000/8000 [07:56<00:00, 16.81it/s]


In [6]:
with open("resnorm.csv", "ab") as f:
    np.savetxt(f, [('id','label')], delimiter=',', fmt='%s')

    for idx, target in write_list:
        np.savetxt(f, [(idx,target)], delimiter=',', fmt='%s')

# verification
Calculate the cosine similarity of the image pairs

In [15]:
class FaceImageDatasetVerification(Dataset):
    def __init__(self, pairs_val, test = False):
        
        self.pairs_val = pairs_val
        self.test = test

    def __len__(self):
        return len(self.pairs_val)

    def __getitem__(self, index):
        path1 = self.pairs_val[index][0]
        img1 = Image.open(path1)
        img1 = torchvision.transforms.ToTensor()(img1)
        
        path2 = self.pairs_val[index][1]
        img2 = Image.open(path2)
        img2 = torchvision.transforms.ToTensor()(img2)
        
        if self.test:
            return img1, img2, path1, path2
        else:
            idx = self.pairs_val[index][2]
            return img1, img2, path1, path2, idx


In [9]:
import pandas as pd

with open("./verification_pairs_val.txt", "r") as f:
    verification_pairs_val = f.read().split("\n")
ver_pairs_val = [l.split() for l in verification_pairs_val if l] ## last element will be an empty set due to  the \n

with open("./verification_pairs_test.txt", "r") as f:
    verification_pairs_test = f.read().split("\n")
ver_pairs_test = [l.split() for l in verification_pairs_test if l] ## last element will be an empty set due to  the \n

vervalset = FaceImageDatasetVerification(ver_pairs_val, test=False)
verification_val_loader = DataLoader(vervalset, batch_size=16, shuffle=False, num_workers=8, drop_last=False)

vertestset = FaceImageDatasetVerification(ver_pairs_test, test=True)
verification_test_loader = DataLoader(vertestset, batch_size=16, shuffle=False, num_workers=8, drop_last=False)

In [10]:
scores = []
path_list = []
cos = nn.CosineSimilarity(dim=1, eps=1e-6)

for img1,img2,path1,path2 in tqdm.tqdm(verification_test_loader):

    embed1 = model.verification(img1.to(device))
    embed2 = model.verification(img2.to(device))
    
    output = cos(embed1, embed2)
    scores.append(output.cpu().detach().numpy())
    path_list.append(np.array((path1,path2)))


100%|██████████| 3240/3240 [31:29<00:00,  1.71it/s]  


In [11]:
path1_list = [path[0] for path in path_list]
path2_list = [path[1] for path in path_list]
scores = np.concatenate(scores)
path1_array = np.concatenate(path1_list)
path2_array = np.concatenate(path2_list)

In [12]:
with open("resverification.csv", "ab") as f:
    np.savetxt(f, [('id','Category')], delimiter=',', fmt='%s')

    for path1, path2, score in zip(path1_array,path2_array,scores):
        np.savetxt(f, [(path1 + " " +path2, score)], delimiter=',', fmt='%s')

## for val

In [None]:
scores = []
path_list = []
true_labels = []
cos = nn.CosineSimilarity(dim=1, eps=1e-6)

for img1,img2,path1,path2,label in tqdm.tqdm(verification_val_loader):

    embed1 = model.verification(img1.to(device))
    embed2 = model.verification(img2.to(device))
    
    output = cos(embed1, embed2)
    scores.append(output.cpu().detach().numpy())
    true_labels.append(label)
    path_list.append(np.array((path1,path2)))

 93%|█████████▎| 512/551 [05:02<00:08,  4.61it/s]

In [None]:
path1_list = [path[0] for path in path_list]
path2_list = [path[1] for path in path_list]
scores = np.concatenate(scores)
path1_array = np.concatenate(path1_list)
path2_array = np.concatenate(path2_list)
true_labels = np.concatenate(true_labels)

In [None]:
from sklearn.metrics import roc_auc_score
roc_auc_score(true_labels, scores)