# Compile datasets and pre-process the images

## 1. Combining Dataset
- Copy apex frames and combine micro-expression datasets (CASMEII, SAMM, SMIC) into 1 folder (for pre-proc)
- Copy apex frames and combine macro-expression datasets (CK+ and Oulu-Casia) into 1 folder (for pre-proc)
- Generate a csv file with filename and class label to be used in data_pre_processing.ipynb

## 2. Pre-process datasets
- Perform face alignment and cropping for all apex frames

### Note
- We'll take samples for these 7 classes only and ignore the rest (Happiness, Disgust, Surprise, Sadness, Fear, Anger, Contempt)

## 1. Combining Micro-Expression datasets

In [1]:
import shutil, os, csv
import pandas as pd

ignore = ['other', 'others']
dest = 'C:/Users/User/Documents/UKM/Project/Dataset/Consolidated Dataset/original/'
dest_proc = 'C:/Users/User/Documents/UKM/Project/Dataset/Consolidated Dataset/pre-proc/'
csv_path = 'C:/Users/User/Documents/UKM/Project/Models/EfficientNet/dataset-csv/'
dataset_hdr = ['filename','class']
dataset = []

In [8]:
# copied filenames format:
# <dataset>-<subj>_<sample>-<label>.jpg

def copyApexSamm():
    with open(csv_path + 'samm.csv') as file:
        rdr = csv.reader(file, delimiter=',')
        first = True
        for row in rdr:
            label = row[1].lower()
            if(first): #skip header row
                first = False
            elif(label not in ignore):
                try:
                    subj = row[0].split('_')[0]
                    inpPath = row[3] + '/' + row[0] + '/' + subj + '_' + row[2] + '.jpg'
                    filename = 'samm-' + row[0] + '-' + label + '.jpg'
                    if(os.path.exists(inpPath)):
                        shutil.copy(inpPath, dest + filename)
                    else:
                        inpPath = row[3] + '/' + row[0] + '/' + subj + '_0' + row[2] + '.jpg'
                        if(os.path.exists(inpPath)):
                            shutil.copy(inpPath, dest + filename)
                        else:
                            inpPath = row[3] + '/' + row[0] + '/' + subj + '_00' + row[2] + '.jpg'
                            shutil.copy(inpPath, dest + filename)
                    dataset.append([filename, label])
                except:
                    print('Error: ' + row[0])
 
copyApexSamm()                   

Error: 028_4_1
Error: 032_3_1


In [2]:
def copyApexCasme():
    with open(csv_path + 'casme.csv') as file:
            rdr = csv.reader(file, delimiter=',')
            first = True
            for row in rdr:
                label = row[0].lower()
                if(first): #skip header row
                    first = False
                elif(label not in ignore):   
                    try:
                        str = row[1].split('\\')
                        filename =  'casme2-' + str[8] + '_' + str[9]  + '-' + label + '.jpg'
                        inpPath = row[1]
                        if(not os.path.exists(dest+filename)):
                            shutil.copy(inpPath, dest + filename)
                            dataset.append([filename, label])
                    except:
                        print('Error: ' + row[1])
                        
copyApexCasme()

In [9]:
from pandas.core import base
import numpy as np
import pandas as pd
import os

# work of Zhou et al. [55], the mid-position frame between the onset and offset is
# approximated apex frame in the SMIC–HS dataset
# L. Zhou, Q. Mao, L. Xue, Dual-inception network for cross-database microexpression recognition, in: 2019 14th IEEE International Conference on
# Automatic Face & Gesture Recognition (FG 2019), IEEE, 2019, pp. 1–5.
# http://staff.ustc.edu.cn/~tongxu/Papers/Sirui_NeuroC21.pdf
def get_apex(p):
    frames = os.listdir(p)
    frames.sort()
    ind = len(frames)//2
    return frames[ind]

#get from original folder in order to do data pre-proc
def preproc_smic_sub():
    classifyPath = 'C:/Users/User/Documents/UKM/Project/Dataset/SMICdatabase/SMICdatabase/classify/5class'
    basePath = 'C:/Users/User/Documents/UKM/Project/Dataset/SMICdatabase/SMICdatabase/original/micro'
    data = []
    #the foldername for happy and sad was renamed manually to standardize
    for label in os.listdir(classifyPath):
        labelPath = classifyPath + '/' + label
        samples = os.listdir(labelPath)
        for sample in samples:
            subject = sample[:2]
            sample_s = sample[3:]
            apex = get_apex(labelPath + '/' + sample)
            p = (basePath + '/' + subject + '/' + sample_s + '/image' + apex)
            data.append([label, p])
    smic_df = pd.DataFrame.from_records(data, columns=['class','filepath'])
    smic_df.to_csv('dataset-csv/smic.csv', index=None)
    
preproc_smic_sub()

In [12]:
def copyApexSmic():
    with open(csv_path + 'smic.csv') as file:
            rdr = csv.reader(file, delimiter=',')
            first = True
            for row in rdr:
                label = row[0].lower()
                if(first): #skip header row
                    first = False
                elif(label not in ignore):   
                    try:
                        str = row[1].split('/')
                        filename =  'smic-' + str[11] + '_' + str[12] + '-' + label + '.bmp'
                        inpPath = row[1]
                        shutil.copy(inpPath, dest + filename)
                        dataset.append([filename, label])
                    except:
                        print('Error: ' + row[1])
                        
copyApexSmic()

In [3]:
#write the copied files filenames and label to a csv file

import csv
with open(csv_path + 'cde.csv', 'w', newline='') as f:
    write = csv.writer(f)
    write.writerow(dataset_hdr)
    write.writerows(dataset)

## 2. Pre-process images

In [4]:
# FACE ALIGNMENT - returns cropped and rotated colored images

from mtcnn import MTCNN
import numpy as np
import cv2

face_detector = MTCNN()

def eyesAlignment(img, left_eye, right_eye):
	left_eye_x, left_eye_y = left_eye
	right_eye_x, right_eye_y = right_eye
	  
	#get angle of rotation
	delta_x = right_eye_x - left_eye_x
	delta_y = right_eye_y - left_eye_y
	angle=np.arctan(delta_y/delta_x)
	angle = (angle * 180) / np.pi
	
	# Width and height of the image
	h, w = img.shape[:2]
	# Calculating a center point of the image
	# Integer division "//"" ensures that we receive whole numbers
	center = (w // 2, h // 2)
	# Defining a matrix M and calling cv2.getRotationMatrix2D method
	M = cv2.getRotationMatrix2D(center, (angle), 1.0)
	# Applying the rotation to our image using the cv2.warpAffine method
	rotated_img_c = cv2.warpAffine(img, M, (w, h))
	
	return rotated_img_c

def align_mtcnn(imgPath):
	img = cv2.imread(imgPath)

	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #mtcnn expects RGB but OpenCV read BGR
	detections = face_detector.detect_faces(img_rgb)
	if len(detections) > 0:
		detection = detections[0]
		x, y, w, h = detection["box"]

		keypoints = detection["keypoints"]
		left_eye = keypoints["left_eye"]
		right_eye = keypoints["right_eye"]
		rotated_face = eyesAlignment(img_rgb, left_eye, right_eye)
		rotated_face = cv2.cvtColor(rotated_face, cv2.COLOR_RGB2BGR)
  
		sqr_x = x - ((h-w)//2)
		#cv2.rectangle(img, [sqr_x,y], [sqr_x + h , y+h], (0,255,0),2)
  
		cropped = rotated_face[y:y + h, sqr_x: sqr_x + h]
		r = 224.0 / cropped.shape[1]
		dim =  (224, int(cropped.shape[0] * r))
		resized = cv2.resize(cropped, dim, interpolation = cv2.INTER_AREA)

	#cv2.imshow('ori', img)
	#cv2.imshow('rotated', rotated_face)
	#cv2.imshow('cropped', cropped)
	#cv2.imshow('resized', resized)
	#cv2.waitKey()
	return resized

In [21]:
import os
import cv2

cde_path = r"C:\Users\User\Documents\UKM\Project\Dataset\Consolidated Dataset\original"
cde_proc_path = r"C:\Users\User\Documents\UKM\Project\Dataset\Consolidated Dataset\pre-proc"

samples = os.listdir(cde_path)
try:
    for sample in samples:
        res = align_mtcnn(cde_path + '\\' + sample)
        filename = cde_proc_path + '\\' + sample.split('.')[0] + '.jpg'
        cv2.imwrite(filename, res)
except:
    print(sample)

In [5]:
import os
import cv2

cde_path = r"C:\Users\User\Documents\UKM\Project\Dataset\Consolidated Dataset\original"
cde_proc_path = r"C:\Users\User\Documents\UKM\Project\Dataset\Consolidated Dataset\pre-proc"

samples = ["casme2-sub02_EP01_11f-repression.jpg",
"casme2-sub02_EP02_04f-repression.jpg",
"casme2-sub02_EP03_02f-repression.jpg",
"casme2-sub02_EP06_01f-repression.jpg",
"casme2-sub02_EP06_02f-repression.jpg",
"casme2-sub08_EP13_01f-repression.jpg",
"casme2-sub09_EP06_01f-repression.jpg",
"casme2-sub09_EP09_04-repression.jpg",
"casme2-sub09_EP09_05-repression.jpg",
"casme2-sub09_EP13_01-repression.jpg",
"casme2-sub09_EP17_08-repression.jpg",
"casme2-sub16_EP01_08-repression.jpg",
"casme2-sub17_EP02_18f-repression.jpg",
"casme2-sub17_EP05_03f-repression.jpg",
"casme2-sub17_EP05_04-repression.jpg",
"casme2-sub17_EP05_09-repression.jpg",
"casme2-sub17_EP05_10-repression.jpg",
"casme2-sub17_EP06_08-repression.jpg",
"casme2-sub17_EP10_06-repression.jpg",
"casme2-sub17_EP12_03-repression.jpg",
"casme2-sub17_EP15_04-repression.jpg",
"casme2-sub21_EP05_02-repression.jpg",
"casme2-sub22_EP01_12-repression.jpg",
"casme2-sub22_EP13_08-repression.jpg",
"casme2-sub23_EP04_03f-repression.jpg",
"casme2-sub23_EP05_24f-repression.jpg",
"casme2-sub23_EP13_04-repression.jpg",
]
try:
    for sample in samples:
        res = align_mtcnn(cde_path + '\\' + sample)
        filename = cde_proc_path + '\\' + sample.split('.')[0] + '.jpg'
        cv2.imwrite(filename, res)
except:
    print(sample)

## Save filenames and labels of our dataset in csv file

1. 5 labels
2. 3 labels

In [3]:
#5 labels

import csv

dest_proc = 'C:/Users/User/Documents/UKM/Project/Dataset/Consolidated Dataset/pre-proc'
samples = os.listdir(dest_proc)

#Encode labels as integers
label_array = ["disgust","anger", "fear", "sadness", "surprise", "happiness", "contempt"]
label_to_index = dict((name, index) for index,name in enumerate(label_array))

with open('dataset-csv/combinedDataset.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    for sample in samples:
        label = sample.rsplit('-',1)[1].rstrip('.jpg')
        label = label_to_index[label]
        filepath = "{}/{}".format(dest_proc , sample)
        writer.writerow([filepath, label])
    

In [6]:
#3 labels

import csv

dest_proc = 'C:/Users/User/Documents/UKM/Project/Dataset/Consolidated Dataset/pre-proc'
samples = os.listdir(dest_proc)

#Encode labels as integers
label_dict = {
    "repression": 0,
    "anger":0 ,
    "contempt":0, 
    "disgust":0, 
    "fear":0, 
    "sadness":0,
    "happiness": 1,
    "surprise" : 2
}

with open('dataset-csv/combinedDataset3class.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    for sample in samples:
        label = sample.rsplit('-',1)[1].rstrip('.jpg')
        label = label_dict[label]
        filepath = "{}/{}".format(dest_proc , sample)
        writer.writerow([filepath, label])
    

## 3. Combining Macro-expression datasets

In [None]:
import shutil, os, csv
import pandas as pd

macro_dest = 'C:/Users/User/Documents/UKM/Project/Dataset/Consolidated Macro Dataset/'
csv_path = 'C:/Users/User/Documents/UKM/Project/Models/EfficientNet/dataset-csv/'
dataset_hdr = ['filename','class']
macro_dataset = []

In [None]:
#complete dataset retrieved from : https://www.kaggle.com/shareef0612/ckdataset

def copyCK():
    ckPath = 'C:/Users/User/Documents/UKM/Project/Dataset/macro-expression ds/ck+/'
    labels = os.listdir(ckPath) #happy label was manually changed to happiness to standardize 
    for label in labels:
        print(label)

In [None]:
def copyOuluCasia():
    ckPath = 'C:/Users/User/Documents/UKM/Project/Dataset/macro-expression ds/OuluCASIA/'

In [6]:
import csv
import numpy as np
import cv2

def fera2013():
    path = r'C:\Users\User\Documents\UKM\Project\Dataset\fer2013\\'
    emotion_cat = {0:'anger', 1:'disgust', 2:'fear', 3:'happiness', 4: 'sadness', 5: 'surprise', 6: 'neutral'}
    with open(path + 'fer2013.csv') as f:
        reader = csv.reader(f)
        rows = list(reader)
        testRow = rows[1]
        label = testRow[0]
        imData = np.array(testRow[1].split(' ')).reshape(48, 48, 1).astype('float32')
        img = np.stack(imData, axis=0)
        cv2.imshow('test',img)
        cv2.waitKey()

fera2013()      
        
        
        