# Load dataset

In [2]:
import numpy as np

In [3]:
xy = np.load("openface_and_labels_by_frame.npz")
X, y = xy["x"], xy["y"]

In [4]:
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (604901, 713)
y shape: (604901, 7)


In [5]:
print(X[1].shape)

(713,)


# Exploration of data

In [108]:
X_sample = X[np.random.choice(len(X))]
y_sample = y[np.random.choice(len(y))]
print("random X min:", np.min(X_sample))
print("random X max:", np.max(X_sample))
print("random y min:", np.min(y_sample))
print("random y max:", np.max(y_sample))

random X min: -116.95789
random X max: 422.15787
random y min: 0.0
random y max: 0.6666667


In [7]:
print("x min:", np.min(X))
print("x max:", np.max(X))

x min: -34065332.0
x max: 48234530.0


In [10]:
def summarize_y(y):
    for i in range(y.shape[1]):
        print("min y, index", i, ":", np.min(y[:,i]))
        print("max y, index", i, ":", np.max(y[:,i]), "\n")

    print("y min:", np.min(y[:,4]))
    print("y max:", np.max(y))
    print("y mean:", np.mean(y))
    print("y std:", np.std(y), "\n")

In [11]:
summarize_y(y)

min y, index 0 : -3.0
max y, index 0 : 3.0 

min y, index 1 : 0.0
max y, index 1 : 3.0 

min y, index 2 : 0.0
max y, index 2 : 3.0 

min y, index 3 : 0.0
max y, index 3 : 3.0 

min y, index 4 : 0.0
max y, index 4 : 3.0 

min y, index 5 : 0.0
max y, index 5 : 3.0 

min y, index 6 : 0.0
max y, index 6 : 1.6666666 

y min: 0.0
y max: 3.0
y mean: 0.17889453
y std: 0.56088483 



Note: per the paper, the integral [-3, 3] for the target values, as implied above (min/max) represents sentiment, with +3 being highly positive, -3 highly negative, 0 neutral. the mean value is also consistent with the distribution shown in the paper (pp 2240 fig 2), which shows that most annotations are neutral or weakly positive (0-1). do we need to balance the classes at some point? **note that the first index (i=0) of any given target array represents this integral.

The paper mentions a second integral [0,3] which measures the presence of emotion (0 = no presence, 3 = highest presence). It looks like indices 1-5 of the target arrays are of this integral.

In [12]:
np.unique(y[:,1])

array([0.        , 0.16666667, 0.33333334, 0.5       , 0.6666667 ,
       0.8333333 , 1.        , 1.1666666 , 1.3333334 , 1.6666666 ,
       2.        , 2.1666667 , 2.3333333 , 2.6666667 , 3.        ],
      dtype=float32)

Note: it looks like the values are specifically sixths (1/6) between the intervals I specified above for each of the 7 labels. once I was sure of this, I went ahead and converted them to integer representations.

In [102]:
y_encoded = y * 6
np.unique(y_encoded)

array([-18., -16., -14., -12., -10.,  -8.,  -6.,  -4.,  -3.,  -2.,  -1.,
         0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,
        11.,  12.,  13.,  14.,  15.,  16.,  18.], dtype=float32)

In [103]:
y_encoded = np.round(y_encoded).astype(np.int64)

In [104]:
summarize_y(y_encoded)

min y, index 0 : -18
max y, index 0 : 18 

min y, index 1 : 0
max y, index 1 : 18 

min y, index 2 : 0
max y, index 2 : 18 

min y, index 3 : 0
max y, index 3 : 18 

min y, index 4 : 0
max y, index 4 : 18 

min y, index 5 : 0
max y, index 5 : 18 

min y, index 6 : 0
max y, index 6 : 10 

y min: 0
y max: 18
y mean: 1.0733673774716854
y std: 3.365308890115125 



# Split data

We can decide later exactly how much of the data ought to be used, but for the sake of testing things for now, I've truncated the dataset to something more manageable from a time perspective (sample a number (n_samples * m_percent) of random indices and extract those indices from the features/targets). I can perform a larger-in-scale training session over break on my GPU desktop (hopefully without interruption due to hardware limitations) once we're satisfied with the model parameterization

In [34]:
def truncate_data(X, y, n_samples):
    random_i = np.random.choice(len(X), size = n_samples)
    X_trunc = X[random_i]
    y_trunc = y[random_i]
    return X_trunc, y_trunc

In [94]:
X_truncated, y_truncated_emotion = truncate_data(X, y_encoded[:,1:], int(X.shape[0] * 0.5))

In [93]:
y_truncated_sentiment = truncate_data(X, y_encoded[:,0], int(X.shape[0] * 0.5))[1]

In [101]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_truncated, y_truncated_emotion, test_size = 0.2)

print("Train:", X_train.shape[0])
print("Test:", X_test.shape[0])
print("y shape:", y_train.shape)

Train: 241960
Test: 60490
y shape: (241960, 6)


# Model

## Predicting presence of emotions

In [84]:
import tensorflow as tf

emotion_model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(X_train.shape[1],)),
  tf.keras.layers.Dense(712, activation='relu'),
  tf.keras.layers.Dense(712, activation='relu'),
  tf.keras.layers.Dense(356, activation='relu'),
  tf.keras.layers.Dense(178, activation='relu'),
  tf.keras.layers.Dense(6, activation='softmax')
])

In [85]:
mse_loss = tf.keras.losses.MeanSquaredError()

In [86]:
emotion_model.compile(optimizer='adam',
  loss=mse_loss,
  metrics=tf.keras.metrics.Precision())

In [87]:
emotion_model.fit(X_train, y_train, epochs=10, validation_split=0.2, shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x28427033850>

In [110]:
emotion_model.save_weights("em1/em1_checkpoint")

In [89]:
test_loss, test_acc = emotion_model.evaluate(X_test, y_test)
print(test_loss)
print(test_acc)

Loss: 5.924108505249023
Accuracy 0.5599437952041626


## Sentiment prediction

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X_truncated, y_truncated_sentiment, test_size = 0.2)

print("Train:", X_train.shape[0])
print("Test:", X_test.shape[0])
print("y shape:", y_train.shape)

Train: 241960
Test: 60490
y shape: (241960,)
