In [51]:
import numpy as np
import pandas as pd
from scipy.io.arff import loadarff
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential, load_model
from sklearn.model_selection import train_test_split

In [52]:
# util functions
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result


def pattern_accuracy(y_true, y_pred):
    return np.sum(np.all(y_true == y_pred, axis=1)) / y_true.shape[0]


tf.keras.metrics.BinaryAccuracy(
    name="binary_accuracy", dtype=None, threshold=0.5
)
binary_accuracy = tf.keras.metrics.BinaryAccuracy()

In [53]:
# import data
raw_data = loadarff("emotions_train.arff")
df = pd.DataFrame(raw_data[0])

In [54]:
# preprocess data, normalize input to 0-1 scale
X = df.iloc[:,:-6]
norm_X = normalize(X)

y = df.iloc[:, 72:]
for col in y.columns:
    y[col] = y[col].apply(lambda x: int(x.decode('utf-8')))

In [55]:
# split data into train/val
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [56]:
# build model
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], kernel_initializer='uniform', activation='sigmoid'))
model.add(Dense(y_train.shape[1], kernel_initializer='uniform', activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 128)               9344      
                                                                 
 dense_11 (Dense)            (None, 6)                 774       
                                                                 
Total params: 10,118
Trainable params: 10,118
Non-trainable params: 0
_________________________________________________________________


In [57]:
# train
history = model.fit(X_train, y_train,
                    batch_size=4,
                    epochs=100,
                    verbose=1,
                    validation_split=.2,
                    callbacks=[callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


In [58]:
# save or load model
model.save('output/emotions-model.h5')
model = load_model('output/emotions-model.h5')

In [59]:
# predict
y_pred = model.predict(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)



In [60]:
# evaluate
print("Binary accuracy: ", binary_accuracy(y_test, y_pred))
print("Pattern accuracy: ",pattern_accuracy(y_test, y_pred))

Binary accuracy:  tf.Tensor(0.7625, shape=(), dtype=float32)
Pattern accuracy:  0.225
