In [205]:
import numpy as np
import pandas as pd
from scipy.io.arff import loadarff
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential, load_model
from sklearn.model_selection import train_test_split

In [206]:
# util functions
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result


def pattern_accuracy(y_true, y_pred):
    return np.sum(np.all(y_true == y_pred, axis=1)) / y_true.shape[0]


binary_accuracy = tf.keras.metrics.BinaryAccuracy(
    name="binary_accuracy", dtype=None, threshold=0.5
)

In [207]:
# import data
raw_data = loadarff("emotions_train.arff")
df = pd.DataFrame(raw_data[0])
df.head()

Unnamed: 0,Mean_Acc1298_Mean_Mem40_Centroid,Mean_Acc1298_Mean_Mem40_Rolloff,Mean_Acc1298_Mean_Mem40_Flux,Mean_Acc1298_Mean_Mem40_MFCC_0,Mean_Acc1298_Mean_Mem40_MFCC_1,Mean_Acc1298_Mean_Mem40_MFCC_2,Mean_Acc1298_Mean_Mem40_MFCC_3,Mean_Acc1298_Mean_Mem40_MFCC_4,Mean_Acc1298_Mean_Mem40_MFCC_5,Mean_Acc1298_Mean_Mem40_MFCC_6,...,BH_HighLowRatio,BHSUM1,BHSUM2,BHSUM3,amazed-suprised,happy-pleased,relaxing-calm,quiet-still,sad-lonely,angry-aggresive
0,0.034741,0.089665,0.091225,-73.302422,6.215179,0.615074,2.03716,0.804065,1.301409,0.558576,...,2.0,0.245457,0.105065,0.405399,b'0',b'1',b'1',b'0',b'0',b'0'
1,0.081374,0.272747,0.085733,-62.584437,3.183163,-0.218145,0.163038,0.620251,0.458514,0.041426,...,2.0,0.343547,0.276366,0.710924,b'1',b'0',b'0',b'0',b'0',b'1'
2,0.110545,0.273567,0.08441,-65.235325,2.794964,0.639047,1.281297,0.757896,0.489412,0.627636,...,3.0,0.188693,0.045941,0.457372,b'0',b'1',b'0',b'0',b'0',b'1'
3,0.042481,0.199281,0.093447,-80.305152,5.824409,0.648848,1.75487,1.495532,0.739909,0.809644,...,2.0,0.102839,0.241934,0.351009,b'0',b'0',b'1',b'0',b'0',b'0'
4,0.07455,0.14088,0.079789,-93.697749,5.543229,1.064262,0.899152,0.890336,0.702328,0.490685,...,2.0,0.195196,0.310801,0.683817,b'0',b'0',b'0',b'1',b'0',b'0'


In [208]:
# preprocess data, normalize input to 0-1 scale
X = df.iloc[:,:-6]
X = normalize(X)

y = df.iloc[:, 72:]
for col in y.columns:
    y[col] = y[col].apply(lambda x: int(x.decode('utf-8')))

In [209]:
# split data into train/val
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [210]:
# build model
model = Sequential()
model.add(Dense(512, input_dim=X_train.shape[1], kernel_initializer='uniform', activation='sigmoid'))
model.add(Dense(y_train.shape[1], kernel_initializer='uniform', activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
model.summary()

# define callback
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_42 (Dense)            (None, 512)               37376     
                                                                 
 dense_43 (Dense)            (None, 6)                 3078      
                                                                 
Total params: 40,454
Trainable params: 40,454
Non-trainable params: 0
_________________________________________________________________


In [211]:
# train
history = model.fit(X_train, y_train,
                    batch_size=8,
                    epochs=100,
                    verbose=1,
                    validation_split=.2,
                    callbacks=[callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


In [212]:
# save or load model
model.save('output/emotions-model.h5')
model = load_model('output/emotions-model.h5')

In [213]:
# predict
y_pred = model.predict(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
random_model = np.random.randint(2, size=(y_pred.shape[0], y_pred.shape[1]))

In [214]:
# evaluate random model
print('Random Model')
print("Binary accuracy: ", binary_accuracy(y_test, random_model))
print("Pattern accuracy: ",pattern_accuracy(y_test, random_model))

# evaluate NN
print("\nNN")
print("Binary accuracy: ", binary_accuracy(y_test, y_pred))
print("Pattern accuracy: ",pattern_accuracy(y_test, y_pred))

Random Model
Binary accuracy:  tf.Tensor(0.6166667, shape=(), dtype=float32)
Pattern accuracy:  0.075

NN
Binary accuracy:  tf.Tensor(0.7041667, shape=(), dtype=float32)
Pattern accuracy:  0.25
