In [1]:
# imports
import pandas as pd
import os
import errno
import numpy as np
import scipy.misc
import cv2
import dlib

In [2]:
# constants
PATH_TO_DATASET = 'fer2013.csv'
DATASET_FOLDER = 'dataset'
IMAGE_FOLDER = 'images'
IMAGE_HEIGHT = 48
IMAGE_WIDTH = 48
PATH_TO_PREDICTOR = 'shape_predictor_68_face_landmarks.dat'

In [3]:
# globals
dict_images = {'Training':[], 'PublicTest':[], 'PrivateTest':[]}
dict_labels = {'Training':[], 'PublicTest':[], 'PrivateTest':[]}
dict_landmarks = {'Training':[], 'PublicTest':[], 'PrivateTest':[]}
predictor = dlib.shape_predictor(PATH_TO_PREDICTOR)

In [4]:
# read data from csv into memory
data = pd.read_csv(PATH_TO_DATASET)

In [5]:
# print dataset
print(data)


       emotion                                             pixels        Usage
0            0  70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...     Training
1            0  151 150 147 155 148 133 111 140 170 174 182 15...     Training
2            2  231 212 156 164 174 138 161 173 182 200 106 38...     Training
3            4  24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...     Training
4            6  4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...     Training
5            2  55 55 55 55 55 54 60 68 54 85 151 163 170 179 ...     Training
6            4  20 17 19 21 25 38 42 42 46 54 56 62 63 66 82 1...     Training
7            3  77 78 79 79 78 75 60 55 47 48 58 73 77 79 57 5...     Training
8            3  85 84 90 121 101 102 133 153 153 169 177 189 1...     Training
9            2  255 254 255 254 254 179 122 107 95 124 149 150...     Training
10           0  30 24 21 23 25 25 49 67 84 103 120 125 130 139...     Training
11           6  39 75 78 58 58 45 49 48 103 156 81 4

In [6]:
# create folder structure
# Usages
for usage in data['Usage'].unique():
    print("Current usage: " + usage)
    if not os.path.exists(DATASET_FOLDER + '/' + usage):
        try:
            os.makedirs(DATASET_FOLDER + '/' + usage)
        except OSError as exception:
            if exception.errno == errno.EEXIST:
                pass
            else:
                raise
                
# Images
for emotion in data['emotion'].unique():
    print("Current emotion: " + str(emotion))
    if not os.path.exists(DATASET_FOLDER + '/' + IMAGE_FOLDER + '/' + str(emotion)):
        try:
            os.makedirs(DATASET_FOLDER + '/' + IMAGE_FOLDER + '/' + str(emotion))
        except OSError as exception:
            if exception.errno == errno.EEXIST:
                pass
            else:
                raise

Current usage: Training
Current usage: PublicTest
Current usage: PrivateTest
Current emotion: 0
Current emotion: 2
Current emotion: 4
Current emotion: 6
Current emotion: 3
Current emotion: 5
Current emotion: 1


In [7]:
# one hot encoding; 7 = 7 emotions
# emotion 2 = [0, 0, 1, 0, 0, 0, 0]
# emotion 4 = [0, 0, 0, 0, 1, 0, 0]
def one_hot_encoding(emotion):
    label = list(np.zeros(7, 'uint8'))
    label[emotion] = 1
    return label

In [8]:
# extract landmarks from face, if not exactly one face skip
def get_landmarks(image, faces):
    if len(faces) > 1:
        print("FoundTooManyFaces")
        raise BaseException("FoundTooManyFaces")
    if len(faces) == 0:
        print("FoundNoFace")
        raise BaseException("FoundNoFace")
    
    return np.matrix([[landmark.x, landmark.y] for landmark in predictor(image, faces[0]).parts()])

In [9]:
# image counter
image_number = 0
# convert pixels from csv to image, save labels = emotions and landmarks
for index, row in data.iterrows():
    emotion, pixels, usage = row
    
    # pixels into numpy array and reshape
    image = np.fromstring(pixels, dtype=int, sep=" ").reshape((IMAGE_HEIGHT, IMAGE_WIDTH))
        
    # save image
    image_path = DATASET_FOLDER + '/' + IMAGE_FOLDER + '/' + str(emotion) + '/' + str(image_number) + '.jpg'
    image_number = image_number + 1
    scipy.misc.imsave(image_path, image)
    
    # emotion one hot encoding to label
    one_hot = one_hot_encoding(emotion)
    
    
    # extract landmarks from image
    image_jpg = cv2.imread(image_path)
    faces = [dlib.rectangle(left=1, top=1, right=47, bottom=47)]
    landmarks = get_landmarks(image_jpg, faces)
    
    # add everything to their arrays
    dict_images[usage].append(image)
    dict_labels[usage].append(one_hot)
    dict_landmarks[usage].append(landmarks)

`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
  del sys.path[0]


In [10]:
# save np arrays for learning
# training data
current_set = 'Training'
current_folder = DATASET_FOLDER + '/' + current_set + '/'
np.save(current_folder + 'images.npy', dict_images[current_set])
np.save(current_folder + 'labels.npy', dict_labels[current_set])
np.save(current_folder + 'landmarks.npy', dict_landmarks[current_set])
# public test data
current_set = 'PublicTest'
current_folder = DATASET_FOLDER + '/' + current_set + '/'
np.save(current_folder + 'images.npy', dict_images[current_set])
np.save(current_folder + 'labels.npy', dict_labels[current_set])
np.save(current_folder + 'landmarks.npy', dict_landmarks[current_set])
# private test data
current_set = 'PrivateTest'
current_folder = DATASET_FOLDER + '/' + current_set + '/'
np.save(current_folder + 'images.npy', dict_images[current_set])
np.save(current_folder + 'labels.npy', dict_labels[current_set])
np.save(current_folder + 'landmarks.npy', dict_landmarks[current_set])