In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
import tqdm
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# Get Universal Sentence Encoder from TensorFlow Hub

In [None]:
USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3")

# Get Dataset

In [None]:
df = pd.read_csv('./data/edited/train.csv')

df

In [None]:
EMOTIONS = df['Emotion'].unique()
N_EMOTIONS = len(EMOTIONS)
N_EMOTIONS

# Process Dataset

## One-hot encode the senteces

In [None]:
one_hot = OneHotEncoder(sparse=False).fit_transform(
    df['Emotion'].to_numpy().reshape(-1, 1)
)

## Train and Test Split

In [None]:
train, test, y_train, y_test = train_test_split(
    df.Sentence,
    one_hot,
    test_size=0.1,
    random_state=42
)

### Convert Sentence to Embedding Vector

In [None]:
X_train = []

for i in train:
    emb = USE(i)
    sentence_emb = tf.reshape(emb, [-1]).numpy()
    X_train.append(sentence_emb)

X_train = np.array(X_train)

X_test = []

for i in test:
    emb = USE(i)
    sentence_emb = tf.reshape(emb, [-1]).numpy()
    X_test.append(sentence_emb)

X_test = np.array(X_test)

# Build the Model

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, input_shape=(X_train.shape[1],), activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(N_EMOTIONS, activation='softmax')
])

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

### Train

In [None]:
results = model.fit(
    X_train,
    y_train,
    epochs=20,
    validation_split=0.1,
    shuffle=True,
    batch_size=32
)

### Evaluate

In [None]:
model.evaluate(X_test, y_test)

### New Predictions

In [None]:
sentence = 'You are great'
emb = USE(sentence)
sentence_emb = tf.reshape(emb, [-1]).numpy()
sentence_emb = np.array(sentence_emb).reshape(1,-1)

prediction = np.argmax(model.predict(sentence_emb))
EMOTIONS[prediction]