In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
from tqdm import tqdm
from sklearn.model_selection import train_test_split, KFold

# Filter out all TensorFlow INFO and WARNING messages
tf.get_logger().setLevel('ERROR')

In [None]:
# Set seed
np.random.seed(42)
tf.random.set_seed(42)

# Data

In [None]:
df = pd.read_csv('./data/labeled/youtube_labeled.csv', usecols=['text', 'emotion'])

df

In [None]:
x = df['text']
y = df['emotion']

In [None]:
EMOTIONS = df['emotion'].unique()
N_EMOTIONS = len(EMOTIONS)
N_EMOTIONS

### Categorical

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
y = to_categorical(y)

In [None]:
encoder.classes_

In [None]:
decode_map = {
    0: 'constructive feedback/idea',
    1: 'negative',
    2: 'neutral/other', 
    3: 'positive', 
    4: 'sadness', 
}

# Model and Training

In [None]:
# Path to model in TensorFlow Hub
model_hub_path = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3"


# Build first module layers using TensorFlow Hub model
hub_layer = hub.KerasLayer(model_hub_path, input_shape=[], dtype=tf.string, trainable=False)

In [None]:
tf.keras.backend.clear_session()

acc_per_fold = []
loss_per_fold = []

# Define K-fold
kfold = KFold(n_splits=5)

fold_n = 0

for train_index, test_index in kfold.split(x):

    # Define model
    model = tf.keras.models.Sequential([
        hub_layer,
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(N_EMOTIONS, activation='sigmoid')
    ])

    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.Adam(0.001),
        metrics=['accuracy']
    )

    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_n} ...')

    results = model.fit(
        x[train_index],
        y[train_index],
        epochs=100,
        batch_size=256,
        verbose=0
    )
    scores = model.evaluate(x[test_index], y[test_index], verbose=0)
    print(f'Score for fold {fold_n}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold
    fold_n += 1

print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

## Save Model

In [None]:
tf.saved_model.save(model, './models/emotion_crossval.h5')

# Test Model with New Predictions

In [None]:
sentence = """“He’s a famous YouTube bounty hunter, who also fakes his bounty hunts” lmao"""

prediction = np.argmax(model.predict(np.array([sentence])))

decode_map[prediction]