<a href="https://colab.research.google.com/github/amilkh/cs230-fer/blob/class-weight/fer2013.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%tensorflow_version 1.x

In [0]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import *

%matplotlib inline

In [3]:
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/drive/My Drive/cs230 project/collab/fer2013/fer2013.csv')

#print('Number of samples in the dataset: ', data.shape[0])
# Transform images from strings to lists of integers. TODO: use an array cast
data['pixels'] = data['pixels'].apply(lambda x: [int(pixel) for pixel in x.split()])

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [4]:
data_train = data[data['Usage']=='Training']
data_dev = data[data['Usage']=='PublicTest']

print('Number samples in the training dataset: ', data_train.shape[0])
print('Number samples in the development dataset: ', data_dev.shape[0])

# Retrieve train input and target
X_train, y_train = data_train['pixels'].tolist(), data_train['emotion'].values
# Reshape images to 4D (num_samples, width, height, num_channels)
X_train = np.array(X_train, dtype='float32').reshape(-1,48,48,1)
# Normalize images with max (the maximum pixel intensity is 255)
X_train = X_train/255.0

# Retrieve dev input and target
X_dev, y_dev = data_dev['pixels'].tolist(), data_dev['emotion'].values
X_dev = np.array(X_dev, dtype='float32').reshape(-1,48,48,1)
X_dev = X_dev/255.0

Number samples in the training dataset:  28709
Number samples in the development dataset:  3589


In [5]:
emotion_cat = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral'}

# See the training distribution
train_counts = data_train['emotion'].value_counts().reset_index(drop=False)
train_counts.columns = ['emotion', 'number_samples']
train_counts['emotion'] = train_counts['emotion'].map(emotion_cat)
train_counts

# print(list(emotion_cat.values()))
# print(np.bincount(y_train))
# print(np.bincount(y_dev))

Unnamed: 0,emotion,number_samples
0,Happy,7215
1,Neutral,4965
2,Sad,4830
3,Fear,4097
4,Angry,3995
5,Surprise,3171
6,Disgust,436


In [6]:
# Baseline model
model = tf.keras.models.Sequential([
    InputLayer(input_shape=(48,48,1),name="input"),
    Conv2D(filters=32,kernel_size=3,activation='relu',padding='same',name="conv1"),
    Dropout(0.25),
    Conv2D(filters=32,kernel_size=3,activation='relu',padding='same',name="conv2"),
    Dropout(0.25),
    MaxPool2D(pool_size=(2,2),name="maxpool1"),
    Conv2D(filters=64,kernel_size=3,activation='relu',padding='same',name="conv3"),
    Dropout(0.25),
    Conv2D(filters=64,kernel_size=3,activation='relu',padding='same',name="conv4"),
    Dropout(0.25),
    Flatten(),
    Dense(1024,input_shape=(24*24*64,1),activation='relu',name='fc1'),
    Dense(7,input_shape=(1024,1),activation='softmax',name='fc-softmax')
])

model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [7]:
model.fit(X_train,y_train,batch_size=32,epochs=5,validation_data=(X_dev, y_dev))

Train on 28709 samples, validate on 3589 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f7b7ff066d8>

In [24]:
# Confusion matrix
from sklearn.metrics import confusion_matrix

y_pred = np.argmax(model.predict(X_dev), axis=1)

print(pd.DataFrame(
  confusion_matrix(y_dev, y_pred,normalize='true'), 
  index=emotion_cat.values(), 
  columns=emotion_cat.values()
  ))

[['41.11%' '0.86%' '8.14%' '12.85%' '19.06%' '4.07%' '13.92%']
 ['10.71%' '44.64%' '12.50%' '14.29%' '12.50%' '0.00%' '5.36%']
 ['10.08%' '1.01%' '35.08%' '9.88%' '21.37%' '9.07%' '13.51%']
 ['3.80%' '0.45%' '2.01%' '75.87%' '6.82%' '1.90%' '9.16%']
 ['14.09%' '0.61%' '9.95%' '11.79%' '44.26%' '1.84%' '17.46%']
 ['4.82%' '0.00%' '7.71%' '7.95%' '5.54%' '68.19%' '5.78%']
 ['11.04%' '0.66%' '6.59%' '12.69%' '20.26%' '1.81%' '46.95%']]


In [9]:
# Balance class weights
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
print(list(emotion_cat.values()))
print(class_weights)

['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
[1.02660468 9.40661861 1.00104606 0.56843877 0.84912748 1.29337298
 0.82603942]


In [10]:
# Retrain with balanced class weights
model.fit(X_train,y_train,batch_size=32,epochs=5,validation_data=(X_dev, y_dev),class_weight=class_weights)

Train on 28709 samples, validate on 3589 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f7a667dfe48>