In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Get Universal Sentence Encoder from TensorFlow Hub

In [None]:
USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3")

# Get Dataset

In [None]:
df = pd.read_csv('./data/edited/train.csv')

df

### Split features and labels

In [None]:
x = df['Sentence']
y = df['Emotion']

### Number of unique labels to classify

In [None]:
EMOTIONS = df['Emotion'].unique()
N_EMOTIONS = len(EMOTIONS)
N_EMOTIONS

# Process Dataset

## Convert Sentence to Embedding Vector

In [None]:
x_emb = []

for i in tqdm(x):
    emb = USE(i)
    sentence_emb = tf.reshape(emb, [-1]).numpy()
    x_emb.append(sentence_emb)

x_emb = np.array(x_emb)

## One-hot encode the labels

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [None]:
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
y = to_categorical(y)

## Train and Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    x_emb,
    y,
    test_size=0.2,
    #random_state=42
)

# Build the Model

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, input_shape=(X_train.shape[1],), activation='relu'),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(N_EMOTIONS, activation='softmax')
])

model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(0.003),
    metrics=['accuracy']
)

model.summary()

### Train

In [None]:
results = model.fit(
    X_train,
    y_train,
    epochs=30,
    validation_split=0.2,
    shuffle=True,
    batch_size=32
)

### Evaluate

In [None]:
model.evaluate(X_test, y_test)

### New Predictions

In [None]:
sentence = 'You are great'
emb = USE(sentence)
sentence_emb = tf.reshape(emb, [-1]).numpy()
sentence_emb = np.array(sentence_emb).reshape(1,-1)

prediction = to_categorical(np.argmax(model.predict(sentence_emb)))

prediction = encoder.inverse_transform([len(prediction)-1])
prediction[0]