# Preliminary operations

We import libraries and datasets.

In [7]:
# Utility
from google.colab import drive
from shutil import copyfile
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import subprocess
import tkinter as tk
import random

# Image processing
import cv2
from PIL import Image

# KDTree
from sklearn.neighbors import KDTree
import joblib

# Keras
from tensorflow import keras
from tensorflow.keras.preprocessing import image as kimage
from tensorflow.keras.applications import mobilenet_v2

In [2]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


Loading the tree.

In [4]:
tree = joblib.load('/content/gdrive/MyDrive/Digital Signal/Models/Image Retrieval/mob_tree.joblib')

Loading the MobileNetV2.

In [None]:
mobilenet = keras.applications.MobileNetV2(input_shape = (224, 224, 3), weights = 'imagenet', include_top = False, pooling = 'max')

Loading the dataframes for computing accuracy.

In [None]:
df_actors = pd.read_csv('/content/gdrive/MyDrive/Digital Signal/Dataset/list_attr_actors.csv')
df_celeba = pd.read_csv('/content/gdrive/MyDrive/Digital Signal/Dataset/list_attr_celeba.csv')

# Acquisition

In [None]:
root= tk.Tk()

canvas1 = tk.Canvas(root, width=400, height=300, relief='raised')
canvas1.pack()

label1 = tk.Label(root, text='Select clip to analize')
label1.config(font=('helvetica', 16))
canvas1.create_window(200, 25, window=label1)

label2 = tk.Label(root, text='Number from 0 to 6:')
label2.config(font=('helvetica', 11))
canvas1.create_window(200, 100, window=label2)

def display_text():
   global example
   example = int(example.get())
   root.destroy

example = tk.Entry(root)
example.pack()
canvas1.create_window(200, 140, window=example)

    
button1 = tk.Button(text='Select', command=lambda: [display_text(), root.destroy()], font=('helvetica', 12, 'bold'))
canvas1.create_window(200, 180, window=button1)

root.mainloop()

In [None]:
fn = os.listdir('/content/gdrive/MyDrive/Digital Signal/Dataset/Actor_faces/')
actor_path = '/content/gdrive/MyDrive/Digital Signal/Dataset/Actor_faces/' + fn[example]

In [None]:
#player = subprocess.call([path per lettore immagini, actor_path, '--play-and-exit'])

# Processing

In order to crop the background as much as possible, we use a Haar cascade classifier to detect and crop faces in both datasets.

In [None]:
# Load the cascade classifier
face_cascade = cv2.CascadeClassifier('/content/gdrive/MyDrive/Digital Signal/Dataset/haarcascade_frontalface_default.xml')

def crop_face(path, scaleFactor, minNeighbors):
  # Read the input image
  im = cv2.imread(path)
  # Convert the image to grayscale
  gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  # Detect faces in the image
  faces = face_cascade.detectMultiScale(gray, scaleFactor, minNeighbors)
  if len(faces) == 0:
    # We may try to decrease the scaleFactor parameter
    faces = face_cascade.detectMultiScale(gray, scaleFactor - 0.1, minNeighbors)
    if len(faces) == 0:
      return (0, 0)
    elif len(faces) > 1:
      return (1, 0)
    else:
      # Draw rectangles around the faces
      for (x, y, w, h) in faces:
        a, b, c = 0, 0, 0
        # Crop the face
        if(y-30 >= 0):
          a = 30
        if(y+h+10 < 218):
          b = 10
        if(x-5 >= 0 and x+w+5 < 178):
          c = 5
        face = im[y-a:y+h+b, x-c:x+w+c]
        return (2, face)
  elif len(faces) > 1:
    # We may try to increase the scaleFactor parameter
    faces = face_cascade.detectMultiScale(gray, scaleFactor + 0.1, minNeighbors)
    if len(faces) == 0:
      return (0, 0)
    elif len(faces) > 1:
      return (1, 0)
    else:
      # Draw rectangles around the faces
      for (x, y, w, h) in faces:
        a, b, c = 0, 0, 0
        # Crop the face
        if(y-30 >= 0):
          a = 30
        if(y+h+10 < 218):
          b = 10
        if(x-5 >= 0 and x+w+5 < 178):
          c = 5
        face = im[y-a:y+h+b, x-c:x+w+c]
        return (2, face)
  else:
  # Draw rectangles around the faces
    for (x, y, w, h) in faces:
      a, b, c = 0, 0, 0
      # Crop the face
      if(y-30 >= 0):
        a = 30
      if(y+h+10 < 218):
        b = 10
      if(x-5 >= 0 and x+w+5 < 178):
        c = 5
      face = im[y-a:y+h+b, x-c:x+w+c]
      return (2, face)

We crop (by means of the function crop_face) the actor face.

In [None]:
res = crop_face(actor_path, 1.12, 9)
if res[0] == 2:
  im = res[1]
else:
  print('Didn\'t found a unique face for ' + actor_path[:-12])

Now, we can add a custom background to our query image (i.e. the actor), in order to make it more realistic and less biased.

In [None]:
# First of all we make the background transparent
im = im.convert("RGBA")
data = im.getdata()
newData = []
# Set alpha = 0 if the pixel is white (or almost white)
for item in data:
  if item[0] >= 240 and item[1] >= 240 and item[2] >= 240:
    newData.append((255, 255, 255, 0))
  else:
    newData.append(item)
im.putdata(newData)
# Now we paste the actor into the background
r = str((random.randint(0, 23))).zfill(2)
bg = Image.open('/content/gdrive/MyDrive/Digital Signal/Dataset/Backgrounds/' + r + '.jpg') 
# Resize the background image according to the same actor image dimension
bg = bg.resize(im.size)
bg.paste(im, (0, 0), im)

# Query

We query the tree that we previously loaded. Specifically, we take the first 2 nearest-neighbors for each actor.

In [None]:
def features_func(im):
  # Convert into numpy array
  x = kimage.img_to_array(im)
  # Preprocessing according to MobileNetV2
  x = keras.applications.mobilenet_v2.preprocess_input(x)
  # Expand dimensions
  x = np.expand_dims(x, axis = 0)
  # Extract features
  feat = mobilenet.predict(x, verbose = False)
  # Return features
  return feat.flatten()

In [None]:
features_actors = features_func(bg)
features_actors = np.array(features_actors)

In [None]:
dist, ind = tree.query(features_actors, k = 2)

# Evaluation

In [None]:
# We don't consider the first column because is the id of the image
cols = df_actors.columns[1:]

# Compute accuracy and distance for each actor
count_1 = 0
count_2 = 0
i = df_actors.index[df_actors['image_id'] == example].tolist()
# Save distances between actor and celebrity for both images
dist_1 = round(dist[0][0], 2)
dist_2 = round(dist[0][1], 2)
# Save indices related to the actor for both images
ind_1 = ind[0][0]
ind_2 = ind[0][1]
# Now we compute the matches between the binary attributes
for j in cols:
  # Compute matches for the first image
  if(df_actors[j][i] == df_celeba[j][ind_1]):
    count_1+=1
  # Compute matches for the second image
  if(df_actors[j][i] == df_celeba[j][ind_2]):
    count_2+=1

# Compute accuracy for the first image
acc_1 = round((count_1/len(cols))*100, 2)
print('Accuracy for first image: ' + str(acc_1) + '%')
print('Distance for first image: ' + str(dist_1))

# Compute accuracy for the second image
acc_2 = round((count_2/len(cols))*100, 2)
print('Accuracy for second image: ' + str(acc_2) + '%')
print('Distance for second image: ' + str(dist_2) + '\n')

Displaying results.

In [None]:
im_celeb_1 = kimage.load_img('/content/gdrive/MyDrive/Digital Signal/Dataset/celeba/img_align_celeba' + str(ind[0][0]).zfill(6) + '.jpg', target_size = (224, 224))
im_celeb_2 = kimage.load_img('/content/gdrive/MyDrive/Digital Signal/Dataset/celeba/img_align_celeba' + str(ind[0][1]).zfill(6) + '.jpg', target_size = (224, 224))

fig = plt.figure(figsize = (10, 7))

fig.add_subplot(3, 1, 1)
plt.imshow(im)
plt.axis('off')
plt.title(example)
fig.add_subplot(3, 1, 2)
plt.imshow(im_celeb_1)
plt.axis('off')
plt.title(str(acc_1) + '%')
fig.add_subplot(3, 1, 3)
plt.imshow(im_celeb_2)
plt.axis('off')
plt.title(str(acc_2) + '%')

In [None]:
# Print results
root = tk.Tk()

# Root window title and dimension
root.title("Face retrieval")

canvas1 = tk.Canvas(root, width=400, height=300, relief='raised')
canvas1.pack()

# Display images with accuracy as caption, like in the cell above
'''
label1 = tk.Label(root, text=f'Video prediction:\t{emotions[pred_video.argmax()]}')
label2 = tk.Label(root, text=f'Audio prediction:\t{emotions[pred_audio.argmax()]}')
label3 = tk.Label(root, text=f'Global prediction:\t{emotions[pred_global.argmax()]}')
label4 = tk.Label(root, text=f'Ground truth:\t{emotions[label]}')

label1.config(font=('helvetica', 14))
label2.config(font=('helvetica', 14))
label3.config(font=('helvetica', 14))
label4.config(font=('helvetica', 14), fg='gray')

canvas1.create_window(20, 25, window=label1, anchor='w')
canvas1.create_window(20, 50, window=label2, anchor='w')
canvas1.create_window(20, 75, window=label3, anchor='w')
canvas1.create_window(20, 120, window=label4, anchor='w')
'''

button1 = tk.Button(text='Close', command=lambda: root.destroy(), font=('helvetica', 12, 'bold'))
canvas1.create_window(200, 200, window=button1)

root.mainloop()