In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import h5py
from tqdm import tqdm
from mtcnn.mtcnn import MTCNN
%matplotlib inline

# Set up paths

In [None]:
# TODO: change this to your local path
dataset_root = "~/data/datasets/fer-plus/"
fer_2013_csv = "fer2013.csv"
fer_plus_csv = "fer2013new.csv"

# DON'T TOUCH THIS!
dataset_root = os.path.expanduser(dataset_root)
fer_2013_csv = os.path.join(dataset_root, fer_2013_csv)
fer_plus_csv = os.path.join(dataset_root, fer_plus_csv)

# Inspect

In [None]:
fer2013_df = pd.read_csv(fer_2013_csv)
ferplus_df = pd.read_csv(fer_plus_csv)
assert len(fer2013_df) == len(ferplus_df)

In [None]:
# TODO: change this if you want
rows, cols = 10, 30
inch_per_img = 0.7

indices = np.arange(len(fer2013_df))
np.random.shuffle(indices)
collage = None
for i in range(rows):
    row = None
    for j in range(cols):
        idx = indices[i * cols + j]
        img = np.array([int(p) for p in fer2013_df['pixels'][idx].split()], dtype=np.uint8)
        img = np.reshape(img, (48,  48))
        img = cv2.resize(img, (32, 32))
        if row is None:
            row = img
        else:
            row = np.concatenate([row, img], axis=1)
    if collage is None:
        collage = row
    else:
        collage = np.concatenate([collage, row], axis=0)

plt.figure(figsize=(int(inch_per_img * cols), int(inch_per_img * rows)))
plt.axis('off')
plt.imshow(collage, 'gray')

# Filtering and saving

In [None]:
emotions = {'neutral':0, 'happiness':0, 'surprise':0, 'sadness':0, 'anger':0}
indices = {'neutral':None, 'happiness':None, 'surprise':None, 'sadness':None, 'anger':None}
thresh = {'neutral':8, 'happiness':9, 'surprise':6, 'sadness':5, 'anger':5}
for emotion in emotions:
    indices[emotion] = np.where(ferplus_df[emotion] > thresh[emotion])[0]
    emotions[emotion] = indices[emotion].shape[0]
emotions

In [None]:
images = dict([(key, np.empty((item.shape[0], 32, 32), dtype=np.uint8)) for key, item in indices.items()])
confidence = dict([(key, np.empty((item.shape[0],), dtype=np.float64)) for key, item in indices.items()])
for emotion, ind in indices.items():
    for i in tqdm(range(ind.shape[0])[:], desc=emotion, ascii=True):
        img = np.array([int(p) for p in fer2013_df['pixels'][ind[i]].split()], dtype=np.uint8)
        img = np.reshape(img, (48,  48))
        images[emotion][i] = cv2.resize(img, (32, 32))

In [None]:
splits = {
    'neutral': (2800, 500),
    'happiness': (3000, 700),
    'surprise': (2500, 397),
    'sadness': (2600, 358),
    'anger': (1800, 296)
}
train_total = sum([s[0] for k, s in splits.items()])
val_total = sum([s[1] for k, s in splits.items()])

train_set = h5py.File(os.path.join(dataset_root, 'train-filtered.h5'), 'w')
train_set.create_dataset('images', (train_total, 32, 32, 1), dtype=np.uint8)
train_set.create_dataset('classes', (train_total, 5), dtype=np.uint8)
x_train = train_set['images']
y_train = train_set['classes']

val_set = h5py.File(os.path.join(dataset_root, 'val-filtered.h5'), 'w')
val_set.create_dataset('images', (train_total, 32, 32, 1), dtype=np.uint8)
val_set.create_dataset('classes', (train_total, 5), dtype=np.uint8)
x_val = train_set['images']
y_val = train_set['classes']

head_train, head_val = 0, 0
for emo in indices:
    split = splits[emo]
    x_train[head_train:head_train + split[0], :, :, 0] = images[emo][:split[0]]
    y_train[head_train:head_train + split[0]] = ferplus_df.loc[
        indices[emo][:split[0]], 'neutral':'anger'].values
    x_val[head_val:head_val + split[1], :, :, 0] = images[emo][split[0]:split[0] + split[1]]
    y_val[head_val:head_val + split[1]] = ferplus_df.loc[
        indices[emo][split[0]:split[0]+split[1]], 'neutral':'anger'].values
    head_train += split[0]
    head_val += split[1]

train_set.close()
val_set.close()

with open(os.path.join(dataset_root, 'class_names.txt'), 'w') as f:
    for s in emotions:
        f.write(s + '\n')