In [11]:
import pandas as pd
import os
import shutil
import cv2, glob, random, math, numpy as np, dlib, itertools

# read and write all data
cwd = os.getcwd()
df = pd.read_csv(cwd + "/data/legend.csv")
df.to_csv(cwd + "/data_csv/all_data.csv")

# change emotion label to same base
df["emotion"].replace({"anger": "ANGER", "contempt": "CONTEMPT", "disgust": "DISGUST", "fear": "FEAR", \
                        "happiness": "HAPPINESS", "neutral": "NEUTRAL", "sadness": "SADNESS", "surprise": "SURPRISE"}, inplace=True)

# drop column and rows
df.drop("user_id", axis=1, inplace=True)
df.drop(df[df.emotion == "CONTEMPT"].index)
df.drop(df[df.emotion == "FEAR"].index)
df.drop(df[df.emotion == "DISGUST"].index)

In [12]:
df.groupby('emotion').count()

Unnamed: 0_level_0,image
emotion,Unnamed: 1_level_1
ANGER,252
HAPPINESS,5696
NEUTRAL,6868
SADNESS,268
SURPRISE,368


In [13]:
# create folder
dir_img = cwd + '/cleaned_images'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

all_image = []

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(cwd + "/predictor/shape_predictor_68_face_landmarks.dat")

for idx, row in df.iterrows():
    
    imagePath = cwd + "/images/" + row.image

    image = cv2.imread(imagePath)
    height, width, channels = image.shape

    # check size image
    if(width != 350 or height != 350):
        continue

    # change color
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # check blurry
    fm = cv2.Laplacian(gray, cv2.CV_64F).var()
    if fm < 5:
        continue
    
    # detect face with haarcascade
    face_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_frontalface_alt.xml')
    eye_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_eye_tree_eyeglasses.xml')
    smile_cascade = cv2.CascadeClassifier('haarcascade/haarcascade_smile.xml')

    face = face_cascade.detectMultiScale(
        gray,
        scaleFactor = 1.1,
        minNeighbors = 4,
        minSize = (200, 200),
        flags = cv2.CASCADE_SCALE_IMAGE
    )
    
    for (x, y, w, h) in face:
        roi_gray = gray[y:y+h, x:x+w]

    smile = smile_cascade.detectMultiScale(
        roi_gray,
        scaleFactor = 1.16,
        minNeighbors = 35,
        minSize = (25, 25),
        flags = cv2.CASCADE_SCALE_IMAGE
    )

    eyes = eye_cascade.detectMultiScale(roi_gray)

    if len(face) != 1 or len(smile) < 1 or len(eyes) < 2:
        continue
    
    # detect face with shape predictor
    rects = detector(image, 0)

    if len(rects) == 0:
        continue

    # collect preprocessed data
    all_image.append([row.image, row.emotion])

    # write preprocessed image 
    cv2.imwrite(os.path.join(dir_img, row.image), gray)

In [14]:
len(all_image)

4795

In [15]:
# export proprocessed data to preprocessing_data.csv
new_df = pd.DataFrame(all_image, columns=["image", "emotion"])
new_df.to_csv(cwd + "/data_csv/preprocessing_data.csv")

In [16]:
new_df.groupby('emotion').count()

Unnamed: 0_level_0,image
emotion,Unnamed: 1_level_1
ANGER,78
HAPPINESS,2359
NEUTRAL,2236
SADNESS,36
SURPRISE,86
