In [106]:
import pandas as pd

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Embedding, CuDNNLSTM, SpatialDropout1D
import tensorflow as tf
from keras import regularizers

In [107]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [108]:
import pickle

emojis = pd.read_csv('data/Emoji_Sentiment_Data_v1.0.csv')
sentiment = pd.read_csv('data/sentiment.csv')
with open('data/sentiment_texts.pickle', 'rb') as f:
    texts = pickle.load(f)

x, y = texts.MessageText, texts.SentimentScore

In [109]:
from sklearn.model_selection import train_test_split

max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(x)
x = tokenizer.texts_to_sequences(x)
x = pad_sequences(x)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.05, random_state=42)
y_train = pd.get_dummies(y_train).values
y_val = pd.get_dummies(y_test).values

In [110]:
# 
# 
# embed_dim = 128
# lstm_out = 32
# 
# model = Sequential()
# model.add(Embedding(max_features, embed_dim, input_length=X_train.shape[1]))
# model.add(SpatialDropout1D(0.2))
# model.add(CuDNNLSTM(lstm_out, kernel_regularizer=regularizers.L1L2(l1=1e-4, l2=1e-2)))
# model.add(Dense(6, activation='softmax'))
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.build(X_train.shape[1])
# print(model.summary())

In [111]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [112]:
from matplotlib import pyplot

# batch_size = 32
# print(X_train.shape, y_train.shape)
# history = model.fit(X_train, y_train, epochs=10, batch_size=batch_size, verbose=1, validation_data=(X_test, y_val))
# 
# pyplot.plot(history.history['loss'])
# pyplot.plot(history.history['val_loss'])
# pyplot.title('model train vs validation loss')
# pyplot.ylabel('loss')
# pyplot.xlabel('epoch')
# pyplot.legend(['train', 'validation'], loc='upper right')
# pyplot.show()

In [113]:
from sklearn.metrics import accuracy_score, f1_score


def final_score(y_test, y_pred):
    f1 = f1_score(y_test, y_pred, average='weighted')
    accuracy = accuracy_score(y_test, y_pred)
    return (f1 + accuracy) / 2

# y_pred = model.predict(X_test).argmax(axis=1)
# print(final_score(y_test, y_pred))

In [114]:
from keras.layers import Bidirectional, Conv1D, MaxPooling1D
from keras.optimizers import RMSprop, Adam
import optuna
from optuna.integration import TFKerasPruningCallback


def objective(trial):
    #embed_dim = trial.suggest_int('embed_dim', 100, 110, step=2)
    embed_dim = 105
    #lstm1_out = trial.suggest_int('lstm1_out', 45, 55, step=2)
    lstm1_out = 47
    #lstm2_out = trial.suggest_int('lstm2_out', 20, 100, step=10)

    #spat_dropout = trial.suggest_float('dropout', 0, 0.15)
    spat_dropout = 0.1

    #l1 = trial.suggest_float('l1', 0.003, 0.007)
    l1 = 0.05

    #l2 = trial.suggest_float('l2', 0.007, 0.012)
    l2 = 0.01
    
    beta1 = trial.suggest_float('beta1', 0.9, 1)
    beta2 = trial.suggest_float('beta1', 0.9, 1)

    #learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.5, log=True)
    learning_rate = 0.005
    batch_size = 256

    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X_train.shape[1]))
    #model.add(SpatialDropout1D(spat_dropout))
    model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Bidirectional(CuDNNLSTM(lstm1_out, kernel_regularizer=regularizers.L1L2(l1=l1, l2=l2))))
    #model.add(CuDNNLSTM(lstm2_out, kernel_regularizer=regularizers.L1L2(l1=l1, l2=l2)))
    model.add(Dense(6, activation='softmax'))
    # model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=learning_rate), metrics=['accuracy'])
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate, beta_1=beta1, beta_2=beta2), metrics=['accuracy'])
    model.build(X_train.shape[1])
    #y_pred = model.predict(X_test).argmax(axis=1)

    callbacks = [
        #tf.keras.callbacks.EarlyStopping("val_accuracy", patience=3, min_delta=0.003),
        TFKerasPruningCallback(trial, "val_accuracy"),
    ]
    history = model.fit(X_train, y_train, epochs=20, batch_size=batch_size, verbose=1, validation_data=(X_test, y_val),
                        callbacks=callbacks, )
    return history.history["val_accuracy"][-1]

In [None]:
study = optuna.create_study(direction="maximize",
                            pruner=optuna.pruners.MedianPruner(n_startup_trials=2, n_min_trials=3, n_warmup_steps=3))
study.optimize(objective, n_trials=30)

df = study.trials_dataframe()
df.to_csv('data/study_two_layers_lr.csv')
study.best_params  

[I 2024-04-14 07:49:49,476] A new study created in memory with name: no-name-dcf76ddf-e96f-4670-aca0-e67bc3137adb


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:50:22,739] Trial 0 finished with value: 0.5591397881507874 and parameters: {'beta1': 0.9443623585865634, 'learning_rate': 0.06799169380948811}. Best is trial 0 with value: 0.5591397881507874.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:50:56,212] Trial 1 finished with value: 0.602150559425354 and parameters: {'beta1': 0.9878168256338388, 'learning_rate': 0.0052763532332524956}. Best is trial 1 with value: 0.602150559425354.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:51:29,001] Trial 2 finished with value: 0.6172043085098267 and parameters: {'beta1': 0.9245230677938587, 'learning_rate': 0.0019836882466623953}. Best is trial 2 with value: 0.6172043085098267.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:51:41,268] Trial 3 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:52:15,395] Trial 4 finished with value: 0.6215053796768188 and parameters: {'beta1': 0.9491555369395157, 'learning_rate': 0.006097295904944688}. Best is trial 4 with value: 0.6215053796768188.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:52:27,594] Trial 5 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:52:39,596] Trial 6 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:52:51,198] Trial 7 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:53:24,500] Trial 8 finished with value: 0.6236559152603149 and parameters: {'beta1': 0.9231624838316728, 'learning_rate': 0.002306599683777887}. Best is trial 8 with value: 0.6236559152603149.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:53:35,809] Trial 9 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:54:09,473] Trial 10 finished with value: 0.6150537729263306 and parameters: {'beta1': 0.9000137392477332, 'learning_rate': 0.008010148166888902}. Best is trial 8 with value: 0.6236559152603149.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:54:21,379] Trial 11 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20

[I 2024-04-14 07:54:43,906] Trial 12 pruned. Trial was pruned at epoch 12.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:54:55,738] Trial 13 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20

[I 2024-04-14 07:55:18,147] Trial 14 pruned. Trial was pruned at epoch 12.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:55:29,566] Trial 15 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:55:41,968] Trial 16 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:55:53,806] Trial 17 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

[I 2024-04-14 07:56:05,262] Trial 18 pruned. Trial was pruned at epoch 5.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:56:38,448] Trial 19 finished with value: 0.5978494882583618 and parameters: {'beta1': 0.9141047582362605, 'learning_rate': 0.0033303820029678033}. Best is trial 8 with value: 0.6236559152603149.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

[I 2024-04-14 07:56:53,047] Trial 20 pruned. Trial was pruned at epoch 7.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2024-04-14 07:57:26,870] Trial 21 finished with value: 0.5935483574867249 and parameters: {'beta1': 0.9292815195760659, 'learning_rate': 0.0029114117571686403}. Best is trial 8 with value: 0.6236559152603149.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20