In [1]:
from keras.utils.np_utils import to_categorical
import pandas as pd
import numpy as np
import random
import sys

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# fer2013 dataset:
# Training       28709
# PrivateTest     3589
# PublicTest      3589

In [3]:
# emotion labels from FER2013:
emotion = {'Angry': 0, 'Disgust': 1, 'Fear': 2, 'Happy': 3,
           'Sad': 4, 'Surprise': 5, 'Neutral': 6}
emo     = ['Angry', 'Fear', 'Happy',
           'Sad', 'Surprise', 'Neutral']

In [4]:
def reconstruct(pix_str, size=(48,48)):
    pix_arr = np.array(map(int, pix_str.split()))
    return pix_arr.reshape(size)

In [5]:
def emotion_count(y_train, classes, verbose=True):
    emo_classcount = {}
    print 'Disgust classified as Angry'
    y_train.loc[y_train == 1] = 0
    classes.remove('Disgust')
    for new_num, _class in enumerate(classes):
        y_train.loc[(y_train == emotion[_class])] = new_num
        class_count = sum(y_train == (new_num))
        if verbose:
            print '{}: {} with {} samples'.format(new_num, _class, class_count)
        emo_classcount[_class] = (new_num, class_count)
    return y_train.values, emo_classcount

In [6]:
def load_data(sample_split=0.3, usage='Training', to_cat=True, verbose=True,
              classes=['Angry','Happy'], filepath='fer2013.csv'):
    df = pd.read_csv(filepath)
    # print df.tail()
    # print df.Usage.value_counts()
    df = df[df.Usage == usage]
    frames = []
    classes.append('Disgust')
    for _class in classes:
        class_df = df[df['emotion'] == emotion[_class]]
        frames.append(class_df)
    data = pd.concat(frames, axis=0)
    rows = random.sample(data.index, int(len(data)*sample_split))
    data = data.ix[rows]
    print '{} set for {}: {}'.format(usage, classes, data.shape)
    data['pixels'] = data.pixels.apply(lambda x: reconstruct(x))
    x = np.array([mat for mat in data.pixels]) # (n_samples, img_width, img_height)
    X_train = x.reshape(-1, 1, x.shape[1], x.shape[2])
    y_train, new_dict = emotion_count(data.emotion, classes, verbose)
    print new_dict
    if to_cat:
        y_train = to_categorical(y_train)
    return X_train, y_train, new_dict

In [7]:
def save_data(X_train, y_train, fname='', folder=''):
    np.save(folder + 'X_train' + fname, X_train)
    np.save(folder + 'y_train' + fname, y_train)

In [8]:
if __name__ == '__main__':
    # makes the numpy arrays ready to use:
    print 'Making moves...'
    emo = ['Angry', 'Fear', 'Happy',
           'Sad', 'Surprise', 'Neutral']
    X_train, y_train, emo_dict = load_data(sample_split=1.0,
                                           classes=emo,
                                           verbose=True)
    print 'Saving...'
    save_data(X_train, y_train, fname='_privatetest6_100pct')
    print X_train.shape
    print y_train.shape
    print 'Done!'

Making moves...


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Training set for ['Angry', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral', 'Disgust']: (35887, 3)
Disgust classified as Angry
0: Angry with 5500 samples
1: Fear with 5121 samples
2: Happy with 8989 samples


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


3: Sad with 6077 samples
4: Surprise with 4002 samples
5: Neutral with 6198 samples
{'Angry': (0, 5500), 'Sad': (3, 6077), 'Neutral': (5, 6198), 'Surprise': (4, 4002), 'Fear': (1, 5121), 'Happy': (2, 8989)}
Saving...
(35887, 1, 48, 48)
(35887, 6)
Done!
