# 單純只使用simsiam 
## embedding判斷人臉

### import package

In [1]:
import os
import numpy as np
import torch
import glob
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch import nn
from torchvision import datasets, transforms
from os import listdir
from os import walk
from facenet_pytorch import MTCNN, InceptionResnetV1
import PIL.Image as Image
from matplotlib import pyplot as plt
import simsiam.loader
from torch.optim.lr_scheduler import MultiStepLR
from tqdm import tqdm 
from IPython.display import clear_output
import math

### check gpu

In [2]:
DATA_PATH_TRAIN = '../../dataset/face_labeled_data/train'
DATA_PATH_VAL = '../../dataset/face_labeled_data/val'
DATA_PATH_TEST = '../../dataset/face_labeled_data/test'
BATCH_SIZE = 32
WORKERS = 8
epochs = 100
init_learning_rate = 0.05 * BATCH_SIZE / 256
momentum_val = 0.9
weight_decay_val = 1e-4

print('torch version:' + torch.__version__)

if torch.cuda.is_available():
    device = torch.device('cuda')
    print('Available GPUs: ', end='')
    for i in range(torch.cuda.device_count()):
        print(torch.cuda.get_device_name(i), end=' ')
else:
    device = torch.device('cpu')
    print('CUDA is not available.')

torch version:1.8.0
Available GPUs: GeForce RTX 2080 Ti GeForce GTX 1080 Ti 

### Data augmentation

In [3]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                          
def get_aug_trnsform(s=1.0):
    color_jitter = transforms.ColorJitter(0.8*s, 0.8*s, 0.8*s, 0.1)

    transform = transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
        transforms.RandomApply([
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
        ], p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.RandomApply([simsiam.loader.GaussianBlur([.1, 2.])], p=0.5),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])
    
    return transform

def ge_eval_trnsform(s=1.0):
    color_jitter = transforms.ColorJitter(0.8*s, 0.8*s, 0.8*s, 0.1)

    transform = transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])
    
    return transform

trans_aug = get_aug_trnsform(0.5)
trans_eval = get_aug_trnsform(0.5)

#### Define dataset, and dataloader

In [4]:
class FaceImages(Dataset):
    
    def __init__(self, img_dir, transform, specific = '**'):
        self.img_dir = img_dir
        self.img_path_list = glob.glob(os.path.join(img_dir, specific + '/*.jpg'))
        self.transform = transform
        
    def __len__(self):
        return len(self.img_path_list)
    
    def __getitem__(self, idx):
        img_path = self.img_path_list[idx]
        img = FaceImages.read_image(img_path)
        target = img_path.split('/')[5]
        return self.transform(img), self.transform(img), target
    
    @staticmethod
    def read_image(img_path):
        #return cv2.imread(img_path)
        return Image.open(img_path, mode='r').convert('RGB')

In [5]:
dataset_train = FaceImages(DATA_PATH_TRAIN, transform=trans_aug)
dataset_val = FaceImages(DATA_PATH_VAL, transform=trans_eval)
dataset_test = FaceImages(DATA_PATH_TEST, transform=trans_eval)

train_loader = DataLoader(
    dataset_train,
    num_workers=WORKERS,
    batch_size=BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    dataset_val,
    num_workers=WORKERS,
    batch_size=BATCH_SIZE,
    shuffle=False
)
test_loader = DataLoader(
    dataset_test,
    num_workers=WORKERS,
    batch_size=BATCH_SIZE,
    shuffle=False
)

### load model

In [6]:
val_model = InceptionResnetV1()
val_model.load_state_dict(torch.load("./model_facenet/checkpoint_0100.pth.tar")['state_dict'])
val_model.to(device)
val_model.eval()

InceptionResnetV1(
  (conv2d_1a): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2a): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2b): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2d_3b): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_4a): 

### collapse check(simularity matrix)

In [7]:
x, _, _ = next(iter(val_loader))
h = val_model(x.to(device))
h_norm = h / h.norm(dim=1)[:, None]
res = torch.mm(h_norm, h_norm.transpose(0,1))
print(res.cpu().detach().numpy())

[[1.0000001  0.58221966 0.876791   ... 0.33666027 0.27557865 0.5234895 ]
 [0.58221966 1.0000001  0.7297051  ... 0.26101744 0.87133354 0.3500441 ]
 [0.876791   0.7297051  1.         ... 0.19898784 0.4790702  0.40273082]
 ...
 [0.33666027 0.26101744 0.19898784 ... 1.         0.49113268 0.9032259 ]
 [0.27557865 0.87133354 0.4790702  ... 0.49113268 1.         0.50230026]
 [0.5234895  0.3500441  0.40273082 ... 0.9032259  0.50230026 1.        ]]


### validation

In [17]:
def validation(data_loader, model):
    y_pre_list = []
    y_list = []    
    for i_batch, image_batch in tqdm(enumerate(data_loader)):
        x = image_batch[0].to(device)
        y = image_batch[2]
        y_pre = model(x)
        y_pre = y_pre.cpu().detach().numpy()
        for j, data in enumerate(y_pre):
            y_pre_list.append(data)
            y_list.append(int(y[j]))
    return y_pre_list, y_list

In [18]:
size=dataset_val.__len__()
print('val data size = ', size)

y_pre, y = validation(val_loader, val_model)

val data size =  2153


68it [00:06, 11.16it/s]


### 計算兩embeddings之距離
distance_metric = 0 歐幾里得距離  (Euclidean distance)  
distance_metric = 1 餘弦相似性 (Cosine similarity)

In [19]:
def distance(embeddings1, embeddings2, distance_metric=0):
    if distance_metric==0:
        # Euclidean distance
        diff = np.subtract(embeddings1, embeddings2)
        dist = np.sum(np.square(diff),0)
    elif distance_metric==1:
        # Distance based on cosine similarity
        dot = np.sum(np.multiply(embeddings1, embeddings2), axis=0)
        norm = np.linalg.norm(embeddings1, axis=0) * np.linalg.norm(embeddings2, axis=0)
        similarity = dot / norm
        dist = np.arccos(similarity) / math.pi
    else:
        raise 'Undefined distance metric %d' % distance_metric 
    return dist

### 計算Cluste  離散度
1.根據target分群  
2.計算每一群中心點  
3.每一群的所有點對中心點計算歐式距離  
4.上步驟所有距離取平均  
5.所有群的平均距離再取平均  


In [151]:
def calculateClusterDistance(y_pre, target):
    dist = []
    y_new = np.array(y_pre)
    for index, target_str in enumerate(np.unique(target)):
        cluster_dist = 0
        data = y_new[[x for x, y in list(enumerate(target))if y == target_str]]
        meanPoint = np.mean(data, axis=0)
        for embedding in data:
            cluster_dist += distance(embedding, meanPoint)
        dist.append(cluster_dist / len(data))
    return np.mean(dist)

In [200]:
def calculateClusterVAL(y_pre, target, d = 1):
    ta = []
    y_new = np.array(y_pre)
    for index, target_str in tqdm(enumerate(np.unique(target))):
        data = y_new[[x for x, y in list(enumerate(target))if y == target_str]]
        count = 0
        ta_count = 0
        if len(data) <= 1:
            continue
        for i in range(len(data) - 1):
            for j in range(i + 1, len(data)):
                count+=1
                if distance(data[i], data[j]) <= d:
                    ta_count+=1
        ta.append(ta_count / count)
    return np.mean(ta)
def calculateClusterFAR(y_pre, target, d = 1):
    fa = []
    y_new = np.array(y_pre)
    for index, target_str in tqdm(enumerate(np.unique(target))):
        data = y_new[[x for x, y in list(enumerate(target))if y == target_str]]
        other_data = y_new[[x for x, y in list(enumerate(target))if y != target_str]]
        count = 0
        fa_count = 0
        if len(data) <= 1:
            continue
        for i in range(len(data)):
            for j in range(len(other_data)):
                count+=1
                if distance(data[i], other_data[j]) <= d:
                    fa_count+=1
        fa.append(fa_count / count)
    return np.mean(fa)

def calculateClusterVAL_FAR(y_pre, target, val_d, far_d):
    y_new = np.array(y_pre)
    ta_count = 0
    ta_count_true = 0
    fa_count = 0
    fa_count_true = 0
    for i in range(len(y_new) - 1):
        for j in range(i, len(y_new)):
            dist = distance(y_new[i], y_new[j])
            if target[i] == target[j]:
                ta_count += 1
                if dist <= val_d:
                    ta_count_true += 1
            else:
                fa_count += 1
                if dist <= far_d:
                    fa_count_true +=1
    return ta_count_true / ta_count, fa_count_true / fa_count

### Acc公式
對每一筆data找k個最近的data 看有多少是相同的class

### TA 公式
<img src="img/TA.png" width="50%">  

### FA 公式
<img src="img/FA.png" width="50%">  

### VAL FAR
<img src="img/VAL_FAR.png" width="50%">  


### gpu 算距離

In [42]:
def pdist(v):
    dist = torch.norm(v[:, None] - v, dim=2, p=2)
    return dist

def makemask(targets):
    n = targets.shape[0]
    # find the hardest positive and negative
    mask_pos = targets.expand(n, n).eq(targets.expand(n, n).t())
    mask_neg = ~mask_pos
    mask_pos[torch.eye(n).byte().to(device)] = 0
    return mask_pos, mask_neg

def calculateClusterVAL_FAR_GPU(y_pre, target, val_d = 1., far_d = 1.):
    dist = pdist(torch.Tensor(y_pre).cpu())
    mask_pos, mask_neg = makemask(torch.Tensor(target).cpu())
    
#     print(dist * mask_pos.float())
#     print((0 < dist * mask_pos.float()) * (dist * mask_pos.float() <= val_d))
    dist[dist == 0] = float('nan')
    ta = torch.sum(dist * mask_pos.float() <= val_d)
    fa = torch.sum(dist * mask_neg.float() <= far_d)

    psame = torch.sum(mask_pos == True)
    pdiff = torch.sum(mask_neg == True)
    
#     print(mask_pos == True)
    val = ta / psame
    far = fa / pdiff
    return val.detach().numpy(), far.detach().numpy()

def calculateClusterACC_GPU(y_pre, target, k = 2):
    dist = pdist(torch.Tensor(y_pre).cpu())
    dist[dist == 0] = float('nan')
    mask_pos, mask_neg = makemask(torch.Tensor(target).cpu())
    print(dist)
    values, indices = torch.topk(dist, k, largest = False)
    
    print(indices)
    print(torch.gather(mask_pos, 1, indices))
    acc_count = torch.sum(torch.gather(mask_pos, 1, indices) == True, 1) / k
    acc = acc_count / k
    return acc.detach().numpy().mean()

# test_y_pre = [[1, 2, 3, 4, 5], [2, 2, 3, 4, 4], [1, 2, 3, 4, 6]]
# test_y = [1, 2, 1]
# calculateClusterVAL_FAR_GPU(test_y_pre, test_y)
#calculateClusterACC_GPU(test_y_pre, test_y)

In [43]:
valList = []
farList = []
accList = []
for i in tqdm(range(21)):
    val, far = calculateClusterVAL_FAR_GPU(y_pre, y, i / 10, i / 10)
    acc = calculateClusterACC_GPU(y_pre, y, i + 5)
    valList.append(val)
    farList.append(far)
    accList.append(acc)

fig, axs = plt.subplots(1, 2, figsize=(20, 18))
plt.title('VAL_FAR')
axs[0,0].title.set_text('VAL_FAR')
axs[0,1].title.set_text('acc(k)')

ax[0,0].set_xlabel('FAR')
ax[0,0].set_ylabel('VAL')

ax[0,1].set_xlabel('k')
ax[0,1].set_ylabel('acc')

# plt.legend(['model'], loc='center right')
axs[0,0].scatter(farList, valList)
axs[0,0].plot(farList, valList)
axs[0,1].scatter(accList)
plt.show()

  0%|          | 0/21 [00:00<?, ?it/s]


NameError: name 'y_pre' is not defined