# Face_Blur (Code by Jaechan Jo)
### - feature_inversion method
 - Conserving original face feature map as much as possible, Blur the face

### - Structure (1th draft)
 - Face-detector + Blur-feature_inversion
 - yolo_v5_face + squeeznet

# Setup

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision
import torchvision.transforms as T
import PIL
import cv2
import argparse
import numpy as np
import os
from imageio import imread
from collections import namedtuple
import matplotlib.pyplot as plt
import matplotlib.image as img

from cs231n.image_utils import SQUEEZENET_MEAN, SQUEEZENET_STD
%matplotlib inline

In [8]:
# gpu or cpu

if torch.cuda.is_available():
    device = torch.device('cuda')
    dtype = torch.cuda.FloatTensor
else:
    device = torch.device('cpu')
    dtype = torch.FloatTensor
    
device

device(type='cuda')

# Dataset

 - img_dir : Directory of image dataset
 - img_txt : Directory of image bbox coordinate txt
 - save_dir : Directory of saving images

In [None]:
#Example

img_dir = '/home/face_mosaic/yolo_v5+feature_inversion/face_data/crop/'
img_txt = '/home/face_mosaic/yolo_v5+feature_inversion/face_data/result/crop/'
save_dir = '/home/face_mosaic/yolo_v5+feature_inversion/face_data/crop/'

# Face-Detection
### - Yolo_v5_face

In [None]:
# Extract bounding box coordinate (x1, y1, x2, y2)

!python ./yolov5_face_detection/test_widerface.py \
--weight ./yolov5_face_detection/weights/face_l.pt \
--img-size 640 \
--dataset_folder ./face_data/crop/ \
--folder_pict ./face_data/dir/crop_dir.txt \
--save_folder ./face_data/result/

In [None]:
# Crop bbox & save

def crop_face(img_dir, img_txt, save_dir):
    img_list = os.listdir(img_dir)
    txt_list = os.listdir(img_txt)
    boxes = []
            
    for txt in txt_list:
        txt_path = img_txt + txt
        with open(txt_path, mode='r') as f:
            lines = f.readlines()

            for i  in range(len(lines)):
                lines[i]  = lines[i].strip('\n')

            obj = {'img_name':lines[0], 'obj_num':int(lines[1])}

            box_list = []
            for i in range(2, len(lines)):
                box_el = []
                x1, y1, w, h, score = lines[i].split(' ')
                box_el.append(int(x1))
                box_el.append(int(y1))
                box_el.append(int(w))
                box_el.append(int(h))
                box_el.append(float(score))
                box_list.append(box_el)

            obj['box'] = box_list
            boxes.append(obj)

    print('boxes : ', boxes)
    print('img_list : ', img_list)

    for img in img_list:
        img_path = img_dir + img
        img_name, _ = img.split('.')

        image = PIL.Image.open(img_path)
        copy_image = image.copy()

        obj = next((item for item in boxes if item['img_name'] == img_name), None)
        for i in range(0, obj['obj_num']):
            if obj['box'][i][4] >= 0.5:
                x1 = obj['box'][i][0]
                y1 = obj['box'][i][1]
                x2 = obj['box'][i][0] + obj['box'][i][2]
                y2 = obj['box'][i][1] + obj['box'][i][3]
                cropped_image = T.functional.crop(copy_image, top=y1, left= x1, height= y2-y1, width= x2-x1)
                cropped_image.save(save_dir+img_name+f'_crop{i+1}.jpg', 'JPEG')

# Face-Blur
### - Squeeznet_feature_inverison

In [None]:
def preprocess(img, size=512):
    transform = T.Compose([
        T.Scale(size),
        T.ToTensor(),
        T.Normalize(mean=SQUEEZENET_MEAN.tolist(), # 이부분은 각각 이미지에 맞게 평균,
                    #표준편차를 구할 수 있도록 코드를 일반화 시켜주도록 한다.
                    std=SQUEEZENET_STD.tolist()),
        T.Lambda(lambda x: x[None]),
    ])
    return transform(img)

def deprocess(img):
    transform = T.Compose([
        T.Lambda(lambda x: x[0]),
        
        # Unnormalize 표준편차를 곱해주고, 평균을 더해주는 과정
        T.Normalize(mean=[0, 0, 0], std=[1.0 / s for s in SQUEEZENET_STD.tolist()]),
        T.Normalize(mean=[-m for m in SQUEEZENET_MEAN.tolist()], std=[1, 1, 1]),
        T.Lambda(rescale),
        T.ToPILImage(),
    ])
    return transform(img)

def rescale(x): # MINMAX Standardization
    low, high = x.min(), x.max()
    x_rescaled = (x - low) / (high - low)
    return x_rescaled

def rel_error(x,y):
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

def features_from_img(imgpath, imgsize):
    img = preprocess(PIL.Image.open(imgpath), size=imgsize)
    img_var = Variable(img.type(dtype))
    return extract_features(img_var, cnn), img_var

In [None]:
# Load the pre-trained SqueezeNet model. (imagenet pretrained)
cnn = torchvision.models.squeezenet1_1(pretrained=True).features
cnn.type(dtype)
cnn.to(device)

# We don't want to train the model any further, so we don't want PyTorch to waste computation 
# computing gradients on parameters we're never going to update.
for param in cnn.parameters():
    param.requires_grad = False #학습을 원한다면 켜고 시작해야!

# We provide this helper code which takes an image, a model (cnn), and returns a list of
# feature maps, one per layer.
def extract_features(x, cnn):
    """
    Use the CNN to extract features from the input image x.
    
    Inputs:
    - x: A PyTorch Variable of shape (N, C, H, W) holding a minibatch of images that
      will be fed to the CNN.
    - cnn: A PyTorch model that we will use to extract features.
    
    Returns:
    - features: A list of feature for the input images x extracted using the cnn model.
      features[i] is a PyTorch Variable of shape (N, C_i, H_i, W_i); recall that features
      from different layers of the network may have different numbers of channels (C_i) and
      spatial dimensions (H_i, W_i).
    """
    features = []
    prev_feat = x.to(device)
    for i, module in enumerate(cnn._modules.values()):
        next_feat = module(prev_feat)
        features.append(next_feat)
        prev_feat = next_feat
    return features

# Face_Loss
### - Feature_inversion equation

![Feature Inversion Equation](./exp_img/feature_inversion_eq.png "Feature Inversion Equation")

 - l번째 original face와 output(random) image feature map을 유사하게 합니다.
 - 파란색 원: blur의 정도를 조절하는 hyper parameters
     - layer(l) : 유사하게 할 feature map의 층, 깊을 수록 blur
     - blur_weight : 값이 클수록 유사하게 하는 학습이 약해진다, 클수록 blur

In [None]:
def face_loss(blur_weight, starting_img, target_face):
    """
    Compute the content loss for style transfer.
    
    Inputs:
    - blur_weight: 비식별화 정도; 숫자가 커지면 커질수록 학습이 약해져,비식별화 정도가 강해진다
    - crop_face : 비식별화하고자 하는 얼굴 이미지만 가린 전체 이미지 feature ; this is a PyTorch Tensor of shape
      (1, C_l, H_l, W_l).
    - original_face : 비식별화하고자 하는 얼굴 이미지가 포함된 전체 이미지 feature ; Tensor with shape (1, C_l, H_l, W_l).
    
    Returns:
    - scalar feature_loss
    """
    feature_loss = (1/blur_weight) * torch.sum((torch.pow(starting_img - target_face, 2)))
    cos = torch.nn.CosineSimilarity(dim=0)
    cosine_distance = 1 - cos(torch.flatten(starting_img), torch.flatten(target_face))
    return feature_loss, cosine_distance

In [None]:
def face_feature_inversion(original_face, target_layer, image_size, 
                           blur_weight,  init_random = False):
    """
    Run style transfer!
    
    Inputs:
    - original_face: an image of what is trying to blur face

    - image_size: size of smallest image dimension (used for content loss and generated image)

    - content_layer: layer to use for content loss
    - content_weight: weighting on content loss

    - init_random: initialize the starting image to uniform random noise
    """
    print(f"target_layer: {target_layer}/ blur_weight: {blur_weight}")
    
    # Extract features for the content image
    face_img = preprocess(PIL.Image.open(original_face), size=image_size)
    face_img = face_img.type(dtype)
    feats = extract_features(face_img, cnn)
    feature_target = feats[target_layer].clone()
#     # Initialize output image to content image or nois
#     if init_random:
#         img = torch.Tensor(*content_img_var.size()).uniform_(0, 1).type(dtype)
#     else:
#         img = content_img.clone().type(dtype)
    
    # start img - uniform random ver. / black ver.(해보자!!)
    s_img = torch.Tensor(*face_img.size()).uniform_(0, 1).type(dtype)
    s_img.requires_grad_()
#     s_img = np.transpose(torch.squeeze(start_img), (1,2,0))

    # Set up optimization hyperparameters
    initial_lr = 3.0
    decayed_lr = 0.1
    decay_lr_at = 180

    # Note that we are optimizing the pixel values of the image by passing
    # in the s_img_var Torch variable, whose requires_grad flag is set to True
    optimizer = torch.optim.Adam([s_img], lr=initial_lr)
    
    f, axarr = plt.subplots(1,2)
    axarr[0].axis('off')
    axarr[1].axis('off')
    axarr[0].set_title('Original Face.')
    axarr[1].set_title('Starting img.')
    axarr[0].imshow(deprocess(face_img.cpu()))
    axarr[1].imshow(deprocess(s_img.data.cpu()))
    plt.show()
    plt.figure()
    
#     content_image = 'styles/tubingen.jpg'
# image_size =  192
# content_layer = 3
# content_weight = 6e-2
# # cnn(squeezenet1_1(pretrained=True))에서 추출한 feature, require_grad=True인 데이터모드
# c_feats, content_img_var = features_from_img(content_image, image_size)
# print([c_feats[i].size() for i in range(4)])

# bad_img = Variable(torch.zeros(*content_img_var.data.size()))
# feats = extract_features(bad_img, cnn)
# print([feats[i].size() for i in range(4)])

# f_l, c_d = face_loss(content_weight, c_feats[content_layer], feats[content_layer])
# # print(face_loss(content_weight, c_feats[content_layer], feats[content_layer]))
# print(f_l, c_d)
    
    
    for t in range(10000):
        if t < 190:
            s_img.data.clamp_(-1.5, 1.5)
        optimizer.zero_grad()

        feats = extract_features(s_img, cnn)
        
        # Compute loss
        feature_loss, cosine_distance = face_loss(blur_weight, feats[target_layer], feature_target)
        loss = feature_loss
        
        loss.backward()

        # Perform gradient descents on our image values (180번 iter에 lr를 0.1씩 decay 시키면서 학습)
        if t == decay_lr_at:
            optimizer = torch.optim.Adam([s_img], lr=decayed_lr)
        optimizer.step()

        if t % 1000 == 0:
            print('Iteration {}'.format(t))
            plt.axis('off')
            plt.imshow(deprocess(s_img.data.cpu()))
            plt.show()
            cos = torch.nn.CosineSimilarity(dim=0)
            cosine_similarity = cos(torch.flatten(s_img), torch.flatten(face_img))
            print(f'원본 이미지와 유사도: {cosine_similarity}')
    print('Iteration {}'.format(t))
    plt.axis('off')
    plt.imshow(deprocess(s_img.data.cpu()))
    plt.show()

# Synthesize
### - original image + cropped blur face

In [None]:
# 원본이미지, 크롭 이미지 불러오기
original_image = './face_data/img/face1.jpg'
cropped_blur_image = './face_data/result/result/face1_crop1_result10000.jpg'

o_img = cv2.imread(original_image)
ori_img = cv2.cvtColor(o_img, cv2.COLOR_BGR2RGB)

c_img = cv2.imread(cropped_blur_image)
crop_img = cv2.cvtColor(c_img, cv2.COLOR_BGR2RGB)

fig, axes = plt.subplots(1,2, figsize=(6,12))
ax = axes.flatten()
ax[0].imshow(ori_img)
ax[1].imshow(crop_img)
plt.show()

In [None]:
# Bounding box 좌표 값 불러오기
txt_path = './face_data/result/face_data/face1.txt'
boxes = []
with open(txt_path, mode='r') as f:
            lines = f.readlines()

            for i  in range(len(lines)):
                lines[i]  = lines[i].strip('\n')

            obj = {'img_name':lines[0], 'obj_num':int(lines[1])}

            box_list = []
            for i in range(2, len(lines)):
                box_el = []
                x1, y1, w, h, score = lines[i].split(' ')
                box_el.append(int(x1))
                box_el.append(int(y1))
                box_el.append(int(w))
                box_el.append(int(h))
                box_el.append(float(score))
                box_list.append(box_el)

            obj['box'] = box_list
            boxes.append(obj)

print('boxes : ', boxes)

In [None]:
# Box 부분 값을 비우고, blur 사진 크기 맞춰서 합성하기
new_ori = ori_img.copy()

save_dir = './face_data/result/synthesis/'

obj = [item for item in boxes if item['img_name'] == 'face1']
for i in range(0, obj[0]['obj_num']):
    if obj[0]['box'][i][4] >= 0.5:
        x1 = obj[0]['box'][i][0]
        y1 = obj[0]['box'][i][1]
        x2 = obj[0]['box'][i][0] + obj[0]['box'][i][2]
        y2 = obj[0]['box'][i][1] + obj[0]['box'][i][3]
        #To zero(black) in bbox
        new_ori[y1:y2, x1:x2, :] -= new_ori[y1:y2, x1:x2, :]
        
        #crop_img resize for syntheis
        pil_c_img=PIL.Image.fromarray(crop_img)
        rs_c_img = T.functional.resize(pil_c_img, size=[y2-y1, x2-x1])
        r_crop_img = np.array(rs_c_img)
        
        #Synthesize cropped image to original
        new_ori[y1:y2, x1:x2, :] += r_crop_img
        
#         cropped_image.save(save_dir+img_name+f'_crop{i+1}.jpg', 'JPEG')

plt.imshow(new_ori)
plt.axis('off')
plt.show()

#save
PIL.Image.fromarray(new_ori).save(save_dir+'test.jpg', 'JPEG')

# How much similiar between original and blur in feature map?
### - Cosine similiarity

In [None]:
#Cosine Similiarity
cos = torch.nn.CosineSimilarity(dim=0)
cs = cos(torch.flatten(torch.Tensor(new_ori)), torch.flatten(torch.Tensor(ori_img)))
print(cs)

# Final Image Show
fig, axes = plt.subplots(1,2,figsize=(6,12))
ax = axes.flatten()
ax[0].imshow(new_ori)
ax[0].axis('off')
ax[1].imshow(ori_img)
ax[1].axis('off')
plt.show()

![Feature Inversion Result](./exp_img/result.png "Feature Inversion Result")