Combine the cleaned FER2013 and CMU Face Images datasets into a single CSV file

In [40]:
import pandas as pd

In [41]:
fer = pd.read_csv("FER2013.csv")
cmu = pd.read_csv("CMU Face Images.csv")

In [42]:
fer.head()

Unnamed: 0,emotion,pixels
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [43]:
cmu.head()

Unnamed: 0,person,direction,emotion,eyes,pixels
0,an2i,left,angry,open,34 3 2 0 3 29 96 105 92 75 75 77 80 83 82 82 9...
1,an2i,left,angry,sunglasses,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
2,an2i,left,happy,open,34 3 0 0 3 29 98 108 95 75 77 78 83 85 85 83 9...
3,an2i,left,happy,sunglasses,36 3 2 2 5 32 102 112 97 78 80 81 86 86 86 86 ...
4,an2i,left,neutral,open,49 110 118 108 103 100 98 100 101 103 108 116 ...


In [44]:
cmu.drop(["person", "direction", "eyes"], axis=1, inplace=True)

# Map emotions to ints to match the FER2013 dataset
cmu_emotions = { "angry":0, "happy":3, "neutral":6, "sad":4 }
cmu.replace(cmu_emotions, inplace=True)

  cmu.replace(cmu_emotions, inplace=True)


In [45]:
# Add dimensions of the image as columns to the dataframes

# Images from the FER dataset are all 48x48 px
fer['width'] = fer.shape[0]*[48]
fer['height'] = fer.shape[0]*[48]

# Images from the CMU dataset were all chosen to be 128x120 px
cmu['width'] = cmu.shape[0]*[120]
cmu['height'] = cmu.shape[0]*[128]

In [48]:
df = pd.concat([fer, cmu])

In [49]:
# Make sure no entries were lost in merging
assert( df.shape[0] == fer.shape[0] + cmu.shape[0] )

In [50]:
# Make sure the emotion categories are as expected
assert( set(df.emotion.unique())==set([0,1,2,3,4,5,6]) )

In [51]:
# Handle duplicates (if any)
df.drop_duplicates(inplace=True)

In [52]:
# Check if there are any nulls/NaN values
df[df.isna().any(axis=1)]

Unnamed: 0,emotion,pixels,width,height


In [53]:
df.head()

Unnamed: 0,emotion,pixels,width,height
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,48,48
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,48,48
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,48,48
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,48,48
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,48,48


In [54]:
# Write the df to a CSV
df.to_csv("Face Images.csv", index=False)