In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import h5py
from tqdm import tqdm
from mtcnn.mtcnn import MTCNN
%matplotlib inline

# Set up paths

In [None]:
# TODO: change this to your local path
dataset_root = "~/data/datasets/fer-plus/"
fer_2013_csv = "fer2013.csv"
fer_plus_csv = "fer2013new.csv"

# DON'T TOUCH THIS!
dataset_root = os.path.expanduser(dataset_root)
fer_2013_csv = os.path.join(dataset_root, fer_2013_csv)
fer_plus_csv = os.path.join(dataset_root, fer_plus_csv)

# Inspect

In [None]:
fer2013_df = pd.read_csv(fer_2013_csv)
ferplus_df = pd.read_csv(fer_plus_csv)
assert len(fer2013_df) == len(ferplus_df)

In [None]:
# TODO: change this if you want
rows, cols = 10, 30
inch_per_img = 0.7

indices = np.arange(len(fer2013_df))
np.random.shuffle(indices)
collage = None
for i in range(rows):
    row = None
    for j in range(cols):
        idx = indices[i * cols + j]
        img = np.array([int(p) for p in fer2013_df['pixels'][idx].split()], dtype=np.uint8)
        img = np.reshape(img, (48,  48))
        img = cv2.resize(img, (32, 32))
        if row is None:
            row = img
        else:
            row = np.concatenate([row, img], axis=1)
    if collage is None:
        collage = row
    else:
        collage = np.concatenate([collage, row], axis=0)

plt.figure(figsize=(int(inch_per_img * cols), int(inch_per_img * rows)))
plt.axis('off')
plt.imshow(collage, 'gray')

# Filtering and saving

In [None]:
emotions = {'neutral':0, 'happiness':0, 'surprise':0, 'sadness':0, 'anger':0}
indices = {'neutral':None, 'happiness':None, 'surprise':None, 'sadness':None, 'anger':None}
thresh = {'neutral':8, 'happiness':9, 'surprise':6, 'sadness':5, 'anger':5}

train_indices = None
test_indices = None

for emotion in emotions:
    samples = ferplus_df.loc[ferplus_df[emotion] > thresh[emotion]]
    train_samples = np.array(samples.loc[samples['Usage'] == 'Training'].index)
    test_samples = np.array(samples.loc[samples['Usage'] == 'PrivateTest'].index)
    
    if train_indices is None:
        train_indices = train_samples
    else:
        train_indices = np.concatenate([train_indices, train_samples], axis=0)
        
    if test_indices is None:
        test_indices = test_samples
    else:
        test_indices = np.concatenate([test_indices, test_samples], axis=0)
        
    print('%s: %d train, %d test' % (emotion, train_samples.shape[0], test_samples.shape[0]))

print('-------------------------------')
print('total:', train_indices.shape[0], 'train,', test_indices.shape[0], 'test')

In [None]:
train_images = np.empty((train_indices.shape[0], 32, 32), dtype=np.uint8)
test_images = np.empty((test_indices.shape[0], 32, 32), dtype=np.uint8)

for i in tqdm(range(train_indices.shape[0])[:], desc='train', ascii=True):
    img = np.array([int(p) for p in fer2013_df['pixels'][train_indices[i]].split()], dtype=np.uint8)
    img = np.reshape(img, (48,  48))
    train_images[i] = cv2.resize(img, (32, 32))

for i in tqdm(range(test_indices.shape[0])[:], desc='train', ascii=True):
    img = np.array([int(p) for p in fer2013_df['pixels'][test_indices[i]].split()], dtype=np.uint8)
    img = np.reshape(img, (48,  48))
    test_images[i] = cv2.resize(img, (32, 32))

In [None]:
train_set = h5py.File(os.path.join(dataset_root, 'train.h5'), 'w')
x_train = train_set.create_dataset('X', (train_indices.shape[0], 32, 32, 1), dtype=np.uint8)
y_train = train_set.create_dataset('Y', (train_indices.shape[0], 5), dtype=np.float32)

val_set = h5py.File(os.path.join(dataset_root, 'test.h5'), 'w')
x_val = val_set.create_dataset('X', (test_indices.shape[0], 32, 32, 1), dtype=np.uint8)
y_val = val_set.create_dataset('Y', (test_indices.shape[0], 5), dtype=np.float32)

x_train[:, :, :, 0] = train_images[:, :, :]
y = ferplus_df.loc[train_indices, 'neutral':'anger'].values
y = y.astype(np.float32)
y = np.divide(y.T, np.sum(y, axis=1)).T
y_train[:, :] = y[:, :]

x_val[:, :, :, 0] = test_images[:, :, :]
y = ferplus_df.loc[test_indices, 'neutral':'anger'].values
y = y.astype(np.float32)
y = np.divide(y.T, np.sum(y, axis=1)).T
y_val[:, :] = y[:, :]

train_set.close()
val_set.close()

with open(os.path.join(dataset_root, 'class_names.txt'), 'w') as f:
    for s in emotions:
        f.write(s + '\n')