In [1]:
import pandas as pd
import os
import shutil
import cv2, glob, random, math, numpy as np, dlib, itertools

# read and write all data
cwd = os.getcwd()
df = pd.read_csv(cwd + "/data_csv/all_data.csv")

# change emotion label to same base
df["emotion"].replace({"anger": "ANGER", "contempt": "CONTEMPT", "disgust": "DISGUST", "fear": "FEAR", \
                        "happiness": "HAPPINESS", "neutral": "NEUTRAL", "sadness": "SADNESS", "surprise": "SURPRISE"}, inplace=True)

# replace string emotion label with integer
df['emotion'].replace({'ANGER': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, \
                        'HAPPINESS': 4,  'NEUTRAL': 5, 'SADNESS': 6, 'SURPRISE': 7}, inplace=True)

In [2]:
df

Unnamed: 0,image,emotion
0,facial-expressions_2868588k.jpg,0
1,facial-expressions_2868585k.jpg,7
2,facial-expressions_2868584k.jpg,2
3,facial-expressions_2868582k.jpg,3
4,Aaron_Eckhart_0001.jpg,5
...,...,...
89940,SURPRISE/surprise (997).png,7
89941,SURPRISE/surprise (998).jpg,7
89942,SURPRISE/surprise (998).png,7
89943,SURPRISE/surprise (999).jpg,7


In [3]:
df.groupby('emotion').count()

Unnamed: 0_level_0,image
emotion,Unnamed: 1_level_1
0,11072
1,2970
2,5492
3,9190
4,21428
5,17534
6,11571
7,10688


In [4]:
# create folder
dir_img = cwd + '/cleaned_images'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']
for emo in dictionary:
    os.makedirs(dir_img + "/" + emo)


all_image = []

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(cwd + "/predictor/shape_predictor_68_face_landmarks.dat")

# set size
image = cv2.imread(cwd + "/images/Aaron_Eckhart_0001.jpg")
scale_percent = 100
width = int(image.shape[1] * scale_percent / 100)
height = int(image.shape[0] * scale_percent / 100)
dim = (width, height)

for idx, row in df.iterrows():
    imagePath = cwd + "/images/" + row.image

    image = cv2.imread(imagePath)

    # resize
    image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)

    # change color
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # check blurry
    fm = cv2.Laplacian(gray, cv2.CV_64F).var()
    if fm < 5:
        continue
    
    # detect face with haarcascade
    face_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_frontalface_alt.xml')
    eye_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_eye_tree_eyeglasses.xml')
    smile_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_smile.xml')

    face = face_cascade.detectMultiScale(
        gray,
        scaleFactor = 1.1,
        minNeighbors = 4,
        minSize = (200, 200),
        flags = cv2.CASCADE_SCALE_IMAGE
    )
    
    for (x, y, w, h) in face:
        roi_gray = gray[y:y+h, x:x+w]

    smile = smile_cascade.detectMultiScale(
        roi_gray,
        scaleFactor = 1.16,
        minNeighbors = 35,
        minSize = (25, 25),
        flags = cv2.CASCADE_SCALE_IMAGE
    )

    eyes = eye_cascade.detectMultiScale(roi_gray)

    if len(face) != 1 or len(smile) < 1 or len(eyes) < 2:
        continue
    
    # detect face with shape predictor
    rects = detector(image, 0)

    if len(rects) == 0:
        continue

    # collect preprocessed data
    all_image.append([row.image, row.emotion])

    # write preprocessed image 
    cv2.imwrite(os.path.join(dir_img, row.image), gray)

In [5]:
len(all_image)

33668

In [6]:
# export proprocessed data to preprocessing_data.csv
new_df = pd.DataFrame(all_image, columns=["image", "emotion"])
new_df.to_csv(cwd + "/data_csv/preprocessing_data.csv", index=False)

In [7]:
new_df

Unnamed: 0,image,emotion
0,facial-expressions_2868585k.jpg,7
1,facial-expressions_2868582k.jpg,3
2,Aaron_Peirsol_0003.jpg,4
3,Aaron_Sorkin_0001.jpg,4
4,Aaron_Sorkin_0002.jpg,4
...,...,...
33663,SURPRISE/surprise (984).jpg,7
33664,SURPRISE/surprise (985).jpg,7
33665,SURPRISE/surprise (997).jpg,7
33666,SURPRISE/surprise (998).jpg,7


In [8]:
new_df.groupby('emotion').count()

Unnamed: 0_level_0,image
emotion,Unnamed: 1_level_1
0,3312
1,2099
2,2368
3,3434
4,8660
5,6654
6,3198
7,3943
