In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
ds = pd.read_csv('fer2013/fer2013.csv')

In [3]:
# Drop the usage column
ds.drop("Usage", axis=1, inplace=True)

In [None]:
ds[ds.duplicated()]

In [5]:
# Drop duplicate rows (keep the only the first occurrence)
ds.drop_duplicates(inplace=True)

In [None]:
ds[ds.isna().any(axis=1)]

In [7]:
expressions = {0:"Angry", 1:"Disgust", 2:"Fear", 3:"Happy", 4:"Sad", 5:"Surprise", 6:"Neutral"}

In [None]:
# Get frequency of different emotions in FER2013
emotion_freq = dict()

for i in range(7):
  emotion_freq[expressions[i]] = ds['emotion'].value_counts()[i]

fig, ax = plt.subplots()
ax.bar(emotion_freq.keys(), emotion_freq.values())
ax.set_title("Frequency of emotions in FER2013 dataset")

plt.show()

In [9]:
# Number of images that are classified as more than one emotion
duplicates = ds[ds["pixels"].duplicated(keep=False)] # All rows with duplicate pixels are considered duplicates
duplicate_pixels = duplicates.pixels.unique() # 1 row for each image that has multiple classifications

In [None]:
# Display an image which is classified as being more than 1 emotion
# Print both emotions the image was classified as

img = 56

pxs = duplicate_pixels[img] 
rows = ds[ds.pixels == pxs]
emotions = rows.emotion
for emotion in emotions:
    print(expressions[emotion], end=" ")

im = np.fromstring(pxs, dtype=int, sep=" ")
plt.imshow(im.reshape(48,48,1).astype('float32'))

Emotion Categories:
- Angry: 4740 images
- Disgust: 461 images
- Fear: 4835 images
- Happy: 8800 images
- Sad: 5934 images
- Surprise: 3267 images
- Neutral: 6057 images

Total Images: 34094

1793 images were dropped from the initial 35887 images during preprocessing/cleaning

57 images were assigned more than one emotion

Display an image

In [11]:
labels = ds.iloc[:,[0]].values # Array containing the 'emotion' column of ints (0-6)
pixel_list = ds['pixels']

# Convert each string of pixel values into an np array with shape (48^2,) and store these arrays in an array
images = np.array([np.fromstring(pixels, dtype=int, sep=" ") for pixels in pixel_list])

In [None]:
# Reshape the image to display
img_num = 15000
image = images[img_num].reshape(48,48,1).astype('float32')

# Display the image
plt.imshow(image, cmap="gist_gray")

Save the cleaned data

In [13]:
# Save as a CSV
ds.to_csv("testing123.csv", index=False)

In [14]:
# Save as npy files

width, height = 48, 48

datapoints = ds['pixels'].tolist()

#getting features for training
X = []
for xseq in datapoints:
    xx = [int(xp) for xp in xseq.split(' ')]
    xx = np.asarray(xx).reshape(width, height)
    X.append(xx.astype('float32'))

X = np.asarray(X)
X = np.expand_dims(X, -1)

#getting labels for training
y = pd.get_dummies(ds['emotion']).values

#storing them using numpy
np.save('fdataX', X)
np.save('flabels', y)