In [1]:

import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from imageio import imread
from skimage.transform import resize
from scipy.spatial import distance
from keras.models import load_model

%matplotlib inline


Using TensorFlow backend.


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
cascade_path = '/content/drive/My Drive/DCGAN/facial_recognition/haarcascade_frontalface_alt2.xml'

image_size = 160


In [0]:
def prewhiten(x):
    if x.ndim == 4:
        axis = (1, 2, 3)
        size = x[0].size
    elif x.ndim == 3:
        axis = (0, 1, 2)
        size = x.size
    else:
        raise ValueError('Dimension should be 3 or 4')

    mean = np.mean(x, axis=axis, keepdims=True)
    std = np.std(x, axis=axis, keepdims=True)
    std_adj = np.maximum(std, 1.0/np.sqrt(size))
    y = (x - mean) / std_adj
    return y

def l2_normalize(x, axis=-1, epsilon=1e-10):
    output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
    return output

In [0]:
def load_and_align_images(filepaths, margin):
    cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml')
    cascade.load(cascade_path)

    print(cascade_path)
    aligned_images = []
    for filepath in filepaths:
        img = imread(filepath)
        faces = cascade.detectMultiScale(img,
                                         scaleFactor=1.1,
                                         minNeighbors=3)
        # if (faces == ()):
        #   continue
        # print(filepath,"ssss" ,faces)
        (x, y, w, h) = faces[0]
        cropped = img[y-margin//2:y+h+margin//2,
                      x-margin//2:x+w+margin//2, :]
        aligned = resize(cropped, (image_size, image_size), mode='reflect')
        aligned_images.append(aligned)
            
    return np.array(aligned_images)

In [0]:

def calc_embs(filepaths, margin=0, batch_size=1):
    aligned_images = prewhiten(load_and_align_images(filepaths, margin))
    pd = []
    for start in range(0, len(aligned_images), batch_size):
        pd.append(model.predict_on_batch(aligned_images[start:start+batch_size]))
    embs = l2_normalize(np.concatenate(pd))

    return embs

In [0]:

def calc_dist(img_name0, img_name1):
    return distance.euclidean(data[img_name0]['emb'], data[img_name1]['emb'])

def calc_dist_plot(img_name0, img_name1):
    print(calc_dist(img_name0, img_name1))
    plt.subplot(1, 2, 1)
    plt.imshow(imread(data[img_name0]['image_filepath']))
    plt.subplot(1, 2, 2)
    plt.imshow(imread(data[img_name1]['image_filepath']))

In [0]:
target_dir = 'database2'

# Delete images with faces not detected


In [0]:
names  = [target_dir]
image_dir_basepath = '/content/drive/My Drive/DCGAN/'
for name in names:
    image_dirpath = image_dir_basepath + name
    image_filepaths = list()
    for f in os.listdir(image_dirpath):
        if f != '.DS_Store':
             image_filepaths.append(os.path.join(image_dirpath, f))
cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml')
cascade.load(cascade_path)
count =0
for filepath in image_filepaths:
    img = imread(filepath)
    faces = cascade.detectMultiScale(img,
                                      scaleFactor=1.1,
                                      minNeighbors=3)
    count = count + 1
    print(count)
    # print(filepath,"ddd",faces.dtype)
    if(faces == ()):
      os.remove(filepath)

# Reshape Images to 3 channel

In [0]:
from PIL import Image
image_dir_basepath = '/content/drive/My Drive/DCGAN/'+target_dir+'/'
image_dirs = os.listdir(image_dir_basepath)
for image_dir in image_dirs:
    image_dirpath = image_dir_basepath + image_dir
    image_filepaths = list()
    for f in os.listdir(image_dirpath):
        if f != '.DS_Store':
          filepath = os.path.join(image_dirpath, f)
          img = cv2.imread(filepath)
          if (img.shape != (64,64,3)):
            img = Image.open(filepath)
            img = img.convert('RGB')
            img = img.save(filepath)
            print(filepath)

# Seperate Dataset to Batches

In [0]:
dir_base = '/content/drive/My Drive/DCGAN/'+target_dir+'/'
dir_count = 0
dir_current = dir_base + str(dir_count)

# check current repo
while(os.path.isdir(dir_current) ):
  dir_count = dir_count + 1
  dir_current = dir_base + str(dir_count)

# create new repo
os.mkdir(dir_current)
# loop 
max_size = 1000
file_count = 0
image_paths = os.listdir(dir_base)
for image_path in image_paths:
  # dir full
  if file_count == max_size:
    file_count = 0
    dir_count = dir_count + 1
    dir_current = dir_base + str(dir_count)
    os.mkdir(dir_current)
  # move file
  if os.path.isfile(dir_base + image_path):
    os.rename(dir_base + image_path,dir_current+'/'+image_path)
    file_count = file_count + 1
    print(image_path)