In [15]:
# Disable warnings
import warnings
warnings.filterwarnings("ignore")

# Import pyplot from matplotlib
import matplotlib.pyplot as plt

# Import TensorFlow image preprocessing 
from tensorflow.keras.preprocessing import image as image_preprocessing
import tensorflow.keras.backend as tf_backend 
tf_backend.set_image_data_format('channels_last')

# Importing fetch_lfw_people dataset from the sklearn.datasets package
from sklearn.datasets import fetch_lfw_people

# Import all other needed packages
import os
import csv
import keras
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import face_recognition
import glob
from PIL import Image

# Internal helper imports
from helpers import graphics as help_graph
from helpers import binary_converter as help_binary
from helpers import accuracy_calculator as help_calc
from helpers import shaper as help_shaper
from helpers import vector_extractor as help_extract
from helpers import segmenter as help_segmenter

%matplotlib inline

### Step 2. Load Labels

Load set of image paths and labels. For example, $\texttt{(000345.png, 25)}$ pair corresponds to an image with filename $\texttt{000345.png}$ and person Id of $\texttt{25}$

In [33]:
# Specifying path to the images
celeba_images_path = './images/celeb_faces/img_align_celeba'
celeba_labels_path = './images/celeb_faces/identity_CelebA.txt'
csv_path = './csv_datasets/face_recognition_celeba.csv'

people_images, people_labels = [], []

# Loading txt file
with open(celeba_labels_path) as labels_file:
    labels_file_lines = labels_file.read().splitlines()

image_paths = [labels_file_line.split(" ")[0] for labels_file_line in labels_file_lines]
labels = [labels_file_line.split(" ")[1] for labels_file_line in labels_file_lines]

### Step 3. Load Images to the CSV file

Each entry in the table has the file path, person's ID and a feature vector. One could also set the precision $p$ so that each vector in the table would be in form
$$
\mathbf{f}' = \frac{1}{10^p}\left\lfloor 10^{p} \cdot \mathbf{f}\right\rfloor
$$

In [31]:
precision = None
def form_row_csv(entry_id, path, person_id, feature_vector):
    """
    Returns an array that corresponds to a single row in the csv file
    """
    
    if precision != None:
        feature_vector = (feature_vector*(10**precision)).astype(int)/(10**precision)
    
    return [entry_id, path, person_id, *feature_vector]

In [37]:
def load_csv(verbose=True):
    with open(csv_path, 'a') as csv_file:
        number_of_images = len(labels)
        number_of_processed_images = 0
        logging_frequency = 1000
        
        writer = csv.writer(csv_file)
        
        for i in range(number_of_images):
            
            path = celeba_images_path + '/' + image_paths[i]
            image = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
            encodings = face_recognition.face_encodings(image.astype('uint8'))
            if len(encodings) != 1:
                continue
            
            if number_of_processed_images%logging_frequency == 0 and verbose:
                print('Successfully processed {} images'.format(number_of_processed_images))
                
            number_of_processed_images += 1
            row = form_row_csv(i, path, labels[i], encodings[0])
            writer.writerow(row)

load_csv(verbose=True)

Successfully processed 0 images
Successfully processed 1000 images
Successfully processed 2000 images
Successfully processed 3000 images
Successfully processed 4000 images
Successfully processed 5000 images
Successfully processed 6000 images
Successfully processed 7000 images
Successfully processed 8000 images
Successfully processed 9000 images
Successfully processed 10000 images
Successfully processed 11000 images
Successfully processed 12000 images
Successfully processed 13000 images
Successfully processed 14000 images
Successfully processed 15000 images
Successfully processed 16000 images
Successfully processed 17000 images
Successfully processed 18000 images
Successfully processed 19000 images
Successfully processed 20000 images
Successfully processed 21000 images
Successfully processed 22000 images
Successfully processed 23000 images
Successfully processed 24000 images
Successfully processed 25000 images
Successfully processed 26000 images
Successfully processed 27000 images
Succe