# The Data

The data consists of 48x48 pixel grayscale images of faces. The faces have been automatically registered so that the face is more or less centred and occupies about the same amount of space in each image.

The task is to categorize each face based on the emotion shown in the facial expression into one of seven categories (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral). The training set consists of 28,709 examples and the public test set consists of 3,589 examples.
https://www.kaggle.com/competitions/challenges-in-representation-learning-facial-expression-recognition-challenge/data?select=icml_face_data.csv

## Preprocessing in one step

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

emotions = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral'}

path = '../../../project_data/fer2013/icml_face_data.csv'
df = pd.read_csv(path)

# creating a dataframe that only contains training data
train = df[df[' Usage']=='Training'].copy()

# dropping the usage column
train.drop([' Usage'], axis = 1, inplace = True)

# combining public and private tests into a one dataframe
public = df[df[' Usage']=='PublicTest'].copy()
private = df[df[' Usage']=='PrivateTest'].copy()

# concatenating into one test dataframe
test = pd.concat([public, private])

# dropping usage column
test.drop([' Usage'], axis = 1, inplace = True)

# reseting the index
test.reset_index(inplace = True, drop = True)

def get_pixels_and_scale(data):
    '''
    Function for extracting pixel values from the pixel column and scaling them between 0 and 1
    
    get values from the column
    split on the whitespace
    convert to float16 datatype
    save as as numpy array
    scale the values between 0 and 1
    '''
    
    img_array = np.array(data.split(' '), dtype = 'float16')
    img_array = img_array / 255.0
    
    return img_array

def reshape_images(data):
    '''
    reshape pixels column into desired format
    return a numpy array
    '''

    reshaped_array = np.reshape(data[' pixels'].to_list(),(data.shape[0],48,48,1))
    
    return reshaped_array

# def plot_random_images(dataset): 
#     if np.array_equal(dataset, train_images):
#         labels = train_labels
#     else:
#         labels = test_labels

#     # Checking 25 random images from the train set across all classes
#     plt.figure(figsize=(12,12))
#     for ind, img_num in enumerate(np.random.randint(1,len(dataset),size=25)):
#         plt.subplot(5,5,ind+1)
#         plt.xticks([])
#         plt.yticks([])
#         plt.grid(False)
#         plt.imshow(dataset[img_num], cmap='gray')
#         plt.xlabel(emotions[labels[img_num]])
#     plt.show()
    

# extracting pixel values from the pixel column and scaling them between 0 and 1    
train[' pixels'] = train[' pixels'].apply(get_pixels_and_scale)   
test[' pixels'] = test[' pixels'].apply(get_pixels_and_scale)

# reshaping pixels column into a desired format and getting a numpy array
train_images = reshape_images(train)
test_images = reshape_images(test)

# getting the labels
train_labels = train['emotion']
test_labels = test['emotion']

print(train_images.shape, train_labels.shape)
print(test_images.shape, test_labels.shape)

(28709, 48, 48, 1) (28709,)
(7178, 48, 48, 1) (7178,)


In [3]:
import tensorflow as tf
print(tf.__version__)

2.8.0


In [4]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(48, 48)), # shape of the input data
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(7, activation='softmax') # 7 output layers
])

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=10)

2022-05-07 08:17:06.326486: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-07 08:17:06.326886: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1
Epoch 1/10


2022-05-07 08:17:06.810723: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
  return dispatch_target(*args, **kwargs)
2022-05-07 08:17:06.978035: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2992b6130>

In [5]:
#compare how the model performs on the test dataset:
test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

print('\nTest accuracy:', test_acc)

2022-05-07 08:17:47.783078: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


225/225 - 1s - loss: 1.6351 - accuracy: 0.3593 - 700ms/epoch - 3ms/step

Test accuracy: 0.35929229855537415
