In [2]:
import os

import keras_nlp
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from utils import *
import numpy as np

policy = keras.mixed_precision.Policy("mixed_float16")
keras.mixed_precision.set_global_policy(policy)

--ip=127.0.0.1
The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


In [3]:
train_labels, dev_labels, _ = load_labels()

In [4]:
label_replacement = {
    'NOT': 0,
    'OFF': 1,
}

In [5]:
# Preprocessing params.
BATCH_SIZE = 32
SEQ_LENGTH = 128

# Model params.
LSTM_UNITS = 64
MODEL_DIM = 256
INTERMEDIATE_DIM = 512
NUM_HEADS = 4
DROPOUT = 0.1
NORM_EPSILON = 1e-5

# Training params.
LEARNING_RATE = 5e-5
EPOCHS = 3

## Finetuning

In [6]:
df_train = pd.read_csv("../Data/PreprocessedData/train_preprocessed.csv")
df_test = pd.read_csv("../Data/PreprocessedData/test_preprocessed.csv")
df_val = pd.read_csv("../Data/PreprocessedData/val_preprocessed.csv")

df_train = df_train[['preprocessed_text', 'label']]
df_test = df_test[['preprocessed_text', 'label']]
df_val = df_val[['preprocessed_text', 'label']]

In [7]:
# Convert DataFrame to tf.data.Dataset
train_ds = tf.data.Dataset.from_tensor_slices((df_train['preprocessed_text'].values, df_train['label'].values))
test_ds = tf.data.Dataset.from_tensor_slices((df_test['preprocessed_text'].values, df_test['label'].values))
val_ds = tf.data.Dataset.from_tensor_slices((df_val['preprocessed_text'].values, df_val['label'].values))

In [8]:
# Batch and shuffle the dataset
train_ds = train_ds.batch(BATCH_SIZE).shuffle(10000)
test_ds = test_ds.batch(BATCH_SIZE)
val_ds = val_ds.batch(BATCH_SIZE).shuffle(10000)

In [None]:
# Take as input the tokenized input.
inputs = keras.Input(shape=(SEQ_LENGTH,), dtype=tf.int32)

# Add an embedding layer
embedding_layer = keras.layers.Embedding()
embedding_outputs = embedding_layer(inputs)

# Add 2 LSTM layer
lstm_layer = keras.layers.LSTM(LSTM_UNITS, activation = "relu", return_sequences=True)(embedding_outputs)
lstm_layer = keras.layers.LSTM(LSTM_UNITS, activation = "relu")(lstm_layer)

# Predict an output label.
outputs = keras.layers.Dense(1, activation="sigmoid")(lstm_layer)

# Define and compile our finetuning model.
finetuning_model = keras.Model(inputs, outputs)
finetuning_model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    metrics=["accuracy"],
)

# Finetune the model for the SST-2 task.
finetuning_model.fit(
    train_ds, validation_data=val_ds, epochs=EPOCHS,
)

In [None]:
# Add our tokenization into our final model.
inputs = keras.Input(shape=(), dtype=tf.string)
tokens = tokenizer(inputs)
outputs = finetuning_model(tokens)
final_model = keras.Model(inputs, outputs)
final_model.save("final_model")

# This model can predict directly on raw text.
restored_model = keras.models.load_model("final_model", compile=False)
inference_data = tf.constant(["Terrible, no good, trash.", "So great; I loved it!"])
print(restored_model(inference_data))

In [None]:
# Predictions
train_pred = final_model.predict(finetune_train_ds)
test_pred = final_model.predict(finetune_test_ds)
val_pred = final_model.predict(finetune_val_ds)

# Convert predictions to labels
train_pred = np.where(train_pred > 0.5, 1, 0)
test_pred = np.where(test_pred > 0.5, 1, 0)
val_pred = np.where(val_pred > 0.5, 1, 0)

# Convert labels to numpy arrays
computeAllScores(train_pred, val_pred, test_pred)