# Hijab Prediction

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn.model_selection
import tensorflow as tf
import glob
import cv2
import os
from PIL import Image
import dlib, face_recognition_models, face_recognition, openface
from keras.models import load_model
from keras.preprocessing.image import img_to_array, load_img

## Load in the Data & Models

### Data

In [None]:
# Read in the scraped Instagram post data
data = pd.read_csv("/home/gridsan/groups/irancovid/data/iran_instagram_p1and2_pre-processed.csv")
# Parse out the image name from the Instagram post link
data["image_name"] = data.post_link.apply(lambda x: x.split("/")[-1])

# Gather all of the image names from the labeled Hijab images
face_paths = glob.glob("/home/gridsan/groups/irancovid/data/iran_instagram_p1and2_images_s0/hijab-faces/*.png")
# Subset the data to the labeled images
data = data[data.image_name.isin([path.split("/")[-1][:-4] for path in face_paths])]

# Add the path to the extracted hijab image to the dataframe
data["image_path"] = data.image_name.apply(lambda x: f"/home/gridsan/groups/irancovid/data/iran_instagram_p1and2_images_s0/hijab-faces/{x}.png")

### Models
Some pre-trained (third-party) facial recognition/alignment models as well as homebrewed gender and hijab detection networks.

In [None]:
# Initialize the face detector
face_detector = dlib.get_frontal_face_detector()
# And face aligner models
predictor_68_point_model = face_recognition_models.pose_predictor_model_location()
face_aligner = openface.AlignDlib(predictor_68_point_model)

# Load in the two pre-trained neural network models
gender_detection = load_model("/home/gridsan/groups/irancovid/models/gender_detection", compile=True)
hijab_detection = load_model("/home/gridsan/akapl/irancovid/models/hijab_detection", compile=True)

## Prediction Functions

### Helper Functions
Two basic helper functions to predict the gender and hijab based on the pre-trained models.

In [29]:
# Helper function to predict the gender of a face using the gender_detection network
def predict_gender(faces_array):
    gender_probabilities = gender_detection.predict(scaled_faces)
    gender_predictions = [{0: "Male", 1: "Female"}[g] for g in np.argmax(gender_probabilities, axis=1)]
    return gender_predictions

# Helper function to predict whether or not the person in the picture is wearing a hijab
def predict_hijab(faces_array, gender_predictions):
    # Initialize an array to store the predictions as well as IDs of images of women
    hijab_predictions = []
    female_faces = []
    # For each of the gender predictions
    for i in range(0, len(gender_predictions)):
        # Mark the hijab prediction as 0 if person is predicted to be "Male"
        if gender_predictions[i] == "Male":
            hijab_predictions[i] = 0
        # Otherwise, mark it as unknown and append the face to the female faces array
        else:
            hijab_predictions[i] = -1
            female_faces.append(i)
    
    # For each of the predicted female faces, predict whether they are wearing a hijab
    hijab_probabilities = hijab_detection.predict(faces_array[female_faces])
    hijab_predictions_subset = np.argmax(hijab_probabilities, axis=1)
    # NOTE: We need to update the original predictions array
    for i in range(0, len(female_faces)):
        hijab_predictions[female_faces[i]] = hijab_predictions_subset[i]
    return hijab_predictions

### Key Function
The main function of this notebook. Takes in an image path, and outputs all the faces and whether or not we believe a hijab is present in the image.

In [None]:

# The main function, which given an image predicts whether or not the person is wearing a hijab
def process_image(image_source_path, scaling_factor=0.1, size=200):
    try:
        # Load the file
        image = face_recognition.load_image_file(image_source_path)
        # Extract the image name
        image_name = image_source_path.split('/')[-1].split('.')[0]

        # Initialize an array of scaled faces and face rectangles
        scaled_faces = []
        face_rectangles = []
        # For each detected face in the image
        for i, face_rectangle in enumerate(face_detector(image, 1)):
            # Scale the face boundary box by scaling_factor to get a slightly less zoomed in face
            # For details about +/- see the dlib.grow_rect() function
            l = int(face_rectangle.left() - scaling_factor * face_rectangle.left())
            r = int(face_rectangle.right() + scaling_factor * face_rectangle.right())
            t = int(face_rectangle.top() - scaling_factor * face_rectangle.top())
            b = int(face_rectangle.bottom() + scaling_factor * face_rectangle.bottom())
            face_rectangles.append(face_rectangle)
            # NOTE: Landmark indices are different from non-scaled face aligning
            scaled_face_aligned = face_aligner\
                .align(size, image, dlib.rectangle(l,t,r,b),
                       landmarkIndices=openface.AlignDlib.INNER_EYES_AND_BOTTOM_LIP)
            # Append the scaled image to the array
            scaled_faces.append(tf.keras.preprocessing.image.img_to_array(Image.fromarray(scaled_face_aligned)))
        # Now predict the gender of the found faces
        scaled_faces = np.asarray(scaled_faces)
        gender_predictions = predict_gender(scaled_faces)
        # And then whether they are wearing a Hijab
        hijab_predictions = predict_hijab(scaled_faces, gender_predictions)
        # Finally, return the face rectangle and hijab predictions for plotting
        return (list(image_name * len(face_rectangles)), range(1, len(face_rectangles) + 1), face_rectangles, hijab_predictions)
    except:
        print(f"Error with image: {image_source_path}", file=sys.stderr)
        print(f"Message: {sys.exc_info()[0]}", file=sys.stderr)

## Example

In [None]:
# Predict hijab wearing for all the images in our data set in parallel
data["output"] = data[image_path_column_name].swifter.progress_bar(True)\
    .apply(lambda path: process_image(path))

# Change the index to be the image path
data = data.set_index(image_path_column_name)

# And create an output dataset, which contains the image name, face number, rectangle and prediction
output = pd.DataFrame(data.output.tolist(), index = data.index,
                      columns = ["image_name", "face_number", "face_rectangle", "hijab_prediction"])\
    .swifter.progress_bar(True).apply(pd.Series.explode)
# Drop any NAs and reset the index
output = output.dropna().reset_index()
        
# Save the output
output.to_csv(spreadsheet_output_path, index = False)