In [1]:
import pandas as pd
import os
import shutil
import cv2, glob, random, math, numpy as np, dlib, itertools

# read and write all data
cwd = os.getcwd()
df = pd.read_csv(cwd + "/data/legend.csv")
df.to_csv(cwd + "/data_csv/all_data.csv")

# change emotion label to same base
df["emotion"].replace({"anger": "ANGER", "contempt": "CONTEMPT", "disgust": "DISGUST", "fear": "FEAR", \
                        "happiness": "HAPPINESS", "neutral": "NEUTRAL", "sadness": "SADNESS", "surprise": "SURPRISE"}, inplace=True)

# replace string emotion label with integer
df['emotion'].replace({'ANGER': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, \
                        'HAPPINESS': 4,  'NEUTRAL': 5, 'SADNESS': 6, 'SURPRISE': 7}, inplace=True)

In [2]:
df

Unnamed: 0,user_id,image,emotion
0,628,facial-expressions_2868588k.jpg,0
1,628,facial-expressions_2868585k.jpg,7
2,628,facial-expressions_2868584k.jpg,2
3,628,facial-expressions_2868582k.jpg,3
4,dwdii,Aaron_Eckhart_0001.jpg,5
...,...,...,...
13685,jhamski,SharmilaTagore_80.jpg,4
13686,jhamski,SharmilaTagore_81.jpg,4
13687,jhamski,SharmilaTagore_82.jpg,4
13688,jhamski,SharmilaTagore_83.jpg,4


In [3]:
df.groupby('emotion').count()

Unnamed: 0_level_0,user_id,image
emotion,Unnamed: 1_level_1,Unnamed: 2_level_1
0,252,252
1,9,9
2,208,208
3,21,21
4,5696,5696
5,6868,6868
6,268,268
7,368,368


In [4]:
# create folder
dir_img = cwd + '/cleaned_images'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

all_image = []

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(cwd + "/predictor/shape_predictor_68_face_landmarks.dat")

for idx, row in df.iterrows():
    
    imagePath = cwd + "/images/" + row.image

    image = cv2.imread(imagePath)
    height, width, channels = image.shape

    # check size image
    if(width != 350 or height != 350):
        continue

    # change color
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # check blurry
    fm = cv2.Laplacian(gray, cv2.CV_64F).var()
    if fm < 5:
        continue
    
    # detect face with haarcascade
    face_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_frontalface_alt.xml')
    eye_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_eye_tree_eyeglasses.xml')
    smile_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_smile.xml')

    face = face_cascade.detectMultiScale(
        gray,
        scaleFactor = 1.1,
        minNeighbors = 4,
        minSize = (200, 200),
        flags = cv2.CASCADE_SCALE_IMAGE
    )
    
    for (x, y, w, h) in face:
        roi_gray = gray[y:y+h, x:x+w]

    smile = smile_cascade.detectMultiScale(
        roi_gray,
        scaleFactor = 1.16,
        minNeighbors = 35,
        minSize = (25, 25),
        flags = cv2.CASCADE_SCALE_IMAGE
    )

    eyes = eye_cascade.detectMultiScale(roi_gray)

    if len(face) != 1 or len(smile) < 1 or len(eyes) < 2:
        continue
    
    # detect face with shape predictor
    rects = detector(image, 0)

    if len(rects) == 0:
        continue

    # collect preprocessed data
    all_image.append([row.image, row.emotion])

    # write preprocessed image 
    cv2.imwrite(os.path.join(dir_img, row.image), gray)

In [5]:
len(all_image)

4814

In [9]:
# export proprocessed data to preprocessing_data.csv
new_df = pd.DataFrame(all_image, columns=["image", "emotion"])
new_df.to_csv(cwd + "/data_csv/preprocessing_data.csv", index=False)

In [10]:
new_df

Unnamed: 0,image,emotion
0,Aaron_Peirsol_0003.jpg,4
1,Aaron_Sorkin_0001.jpg,4
2,Aaron_Sorkin_0002.jpg,4
3,Abbas_Kiarostami_0001.jpg,5
4,Abdel_Madi_Shabneh_0001.jpg,5
...,...,...
4809,Zoran_Djindjic_0001.jpg,5
4810,Zoran_Djindjic_0002.jpg,5
4811,Zorica_Radovic_0001.jpg,5
4812,Zulfiqar_Ahmed_0001.jpg,5


In [8]:
new_df.groupby('emotion').count()

Unnamed: 0_level_0,image
emotion,Unnamed: 1_level_1
0,78
1,5
2,6
3,8
4,2359
5,2236
6,36
7,86
