# Misc

In [None]:
# changing theme
# !jt -t chesterish 
# !jt -t onedork -fs 100 -altp -tfs 14 -nfs 115 -T
# restore default theme
!jt -r

# Comparing fake and real faces

In [None]:
import sys 
sys.path.append(r'C:\Users\Jonas\PycharmProjects\IlkinThesis\ThesisExperiments')
from IPython.display import Image, Video

import cv2
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.cluster import AgglomerativeClustering

import config
import cleaning_with_face_recognition as rec

In [None]:
# finds multiple face folders
folders = glob.glob(config.FACE_IMAGES + '/*')
metadata = pd.read_json(config.METADATA_PATH).T

for f in folders:
    imgs = glob.glob(f + '/*')
    if len(imgs) < 30:
        continue
    if metadata.loc[os.path.basename(f)]['label'] == 'REAL':
        continue
    print(f, len(imgs), metadata.loc[os.path.basename(f)]['original'])
    fake_path = f
    real_path = os.path.join(config.FACE_IMAGES, metadata.loc[os.path.basename(f)]['original'])
    


## Helper classes & functions

In [None]:
class Face:
    def __init__(self, path):
        self.face = cv2.imread(path)
    
    def __sub__(self, other):
        diff = abs(self.face.astype('int') - other.face.astype('int'))
        return diff
    
    def get(self, type='rgb'):
        if type == 'bgr':
            return self.face
        elif type == 'rgb':
            return cv2.cvtColor(self.face, cv2.COLOR_BGR2RGB)
        elif type == 'gray':
            return cv2.cvtColor(self.face, cv2.COLOR_BGR2GRAY)

In [None]:
class Loader:
    def __init__(self, path):
        self.path = path
        self.names = glob.glob(path + '/*')
        
    def __getitem__(self, idx):
        return self.names[idx]
    
    def __len__(self):
        return len(self.names)

In [None]:
class Metadata:
    def __init__(self, path):
        self.metadata = pd.read_json(config.METADATA_PATH).T
    
    def __getitem__(self, name):
        return self.metadata.loc[name][['label','original']].values

In [None]:
def display_images(images, cols = 5):
    plt.figure(figsize=(32,16))

    rows = len(images) // cols + 1
    for i, img in enumerate(images):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(img)     
        
        plt.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom=False,      # ticks along the bottom edge are off
        top=False,         # ticks along the top edge are off
        labelbottom=False) # labels along the bottom edge are off
        plt.tick_params(
        axis='y',          # changes apply to the y-axis
        which='both',      # both major and minor ticks are affected
        left=False,      # ticks along the bottom edge are off
        labelleft=False) # labels along the bottom edge are off

## Initializing  

In [None]:
metadata = Metadata(config.METADATA_PATH)
label, original = metadata['zrobwltwxr.mp4']
print(label, original)

# zlukluvcyp.mp4 37 sccqbzyikm.mp4
# ---
fake_path = os.path.join(config.FACE_IMAGES, 'zlukluvcyp.mp4')
real_path = os.path.join(config.FACE_IMAGES, 'sccqbzyikm.mp4')

fakes = Loader(fake_path)
reals = Loader(real_path)

# --- 
idx = 6
f1 = Face(fakes[idx])
f2 = Face(reals[idx])
print(f'FAKE\tREAL\tDIFF, {np.sum(f1-f2)}')
display_images([f1.get('rgb'), f2.get('rgb'), (f1-f2).astype('uint8')], 3)

## Clustering + face diff

**Steps**
1. Extract encoding for real and fake
2. Cluster real and fake seperately
3. Relate clusters
4. Diff clusters

In [None]:
real_encodings = [rec.get_encoding(r) for r in reals]
real_clusters = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, linkage='average').fit(real_encodings)

In [None]:
clustered_images = {}
for label, p in zip(real_clusters.labels_, reals):
    if clustered_images.get(label, None) is None:
        clustered_images[label] = []
    
    clustered_images[label].append(Face(p))
    
clustered_fake_images = {}
for label, p in zip(real_clusters.labels_, fakes):
    if clustered_fake_images.get(label, None) is None:
        clustered_fake_images[label] = []
    
    clustered_fake_images[label].append(Face(p))

In [None]:
plt.figure(figsize=(16, 8))
for cluster, imgs in clustered_images.items():
    fake_imgs = clustered_fake_images[cluster]
    difs = [np.sum(r_img - f_img) for r_img, f_img in zip(imgs, fake_imgs)]
    plt.plot(difs, label=cluster, marker='o')
    
plt.legend()
fake_name = os.path.basename(fake_path)
real_name = os.path.basename(real_path)
plt.title(f'Difference between cluster images fake: {fake_name} real: {real_name}')
plt.savefig(f'outputs/img_dif_{fake_name}-{real_name}.jpg')
print(fake_name, real_name)

In [None]:
for cluster, imgs in clustered_images.items():
    print(f'{cluster}: {len(imgs)}')

In [None]:
for cluster, imgs in clustered_images.items():
    print(cluster)
    display_images([i.get('rgb') for i in imgs], 18)
    display_images([i.get('rgb') for i in clustered_fake_images[cluster]], 18)

In [None]:
# adrcjenxlz.mp4 - bad example, keep to display
# xifuovpydw.mp4

# print(fake_name)
Video(f'D:\\DFDC\\dfdc_train_part_48/sccqbzyikm.mp4', embed=True, width=640)

## Plotting face difference between fake and real

In [None]:
diff_fakes = []
diff_reals = []
diff_other = []

for f, r in zip(fakes, reals):
    assert f.split('mp4_')[-1] == r.split('mp4_')[-1], f'Fake: {f}, real: {r}'
        
    fake_face = Face(f)
    real_face = Face(r)
    diff = np.sum((fake_face - real_face).astype('int')) 
    
    identifier = f.split('_')[-1]
    if identifier == '0.jpg':
        diff_reals.append(diff)
        
    elif identifier == '1.jpg':
        diff_fakes.append(diff)
    else:
        diff_other.append(diff)

plt.figure(figsize=(16,8))
plt.plot(diff_reals, label='0')
plt.plot(diff_fakes, label='1')
plt.plot(diff_other, label='other')
plt.legend()

## Analyzing face position data

In [None]:
metadata['sccqbzyikm.mp4']

In [None]:
face_coordinates = pd.read_json(config.FACE_COORDINATES_PATH).T

x = []
y = []
for frame in face_coordinates.loc['sccqbzyikm.mp4'].values:
    try:
        face1, face2 = frame[:2]
    except Exception as e:
        print(e, frame)
        continue
    
    x1,y1,x2,y2 = face1
    x.append((x1+x2)/2)
    y.append((y1+y2)/2)
#     print()
#     print(f'Second face: {frame[1]}')

In [None]:
for frame in face_coordinates.loc['sccqbzyikm.mp4'].values:
    print(frame)

In [None]:
plt.plot(y, x, marker='x')

## Distance between face encodings

In [None]:
for i, (f, r) in enumerate(zip(fakes, reals)):
    fake_encoding = rec.get_encoding(f)
    real_encoding = rec.get_encoding(r)
    distance = np.linalg.norm(fake_encoding - real_encoding)
    
    img1 = cv2.imread(f)
    img2 = cv2.imread(f)
    cv2.imwrite(f'test/{i}_{distance}.jpg', np.hstack((img1, img2)))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(24,8))
fake_encoding = rec.get_encoding(fakes[0])
real_encoding = rec.get_encoding(reals[0])

plt.scatter(np.arange(len(fake_encoding)), fake_encoding)
plt.scatter(np.arange(len(real_encoding)), real_encoding)

# Possible cases


1. Single cluster
     1. single face - keep all
     2. multiple faces 
     3. single face & non-face
    
    
4. Multiple clusters
    1. Single face divided
    2. Single face cluster majority + artifact - keep majority
    3. Single face cluster + equally big artifact 
    4. Same length face clusters
    5. 2 same length face clusters + smaller artifact cluster
