### Check available GPUs

In [1]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

### Import necessary libraries

In [3]:
import s3fs
import h5py
import tensorflow_hub as hub
import tensorflow_text
import keras    

### BERT Config

In [17]:
class BertConfig:
    BERT_PREPROCESSOR="https://kaggle.com/models/tensorflow/bert/TensorFlow2/en-uncased-preprocess/3"
    BERT_MODEL="https://www.kaggle.com/models/tensorflow/bert/TensorFlow2/bert-en-uncased-l-12-h-768-a-12/2"
    SEQUENCE_LENGTH=13
    BATCH_SIZE=16
    EPOCHS=4
    LR=5e-5
    NUM_CLASSES=28
    SHUFFLE=42

### Utils function

In [None]:
def load_ds(dataset_type: str, key: str, secret: str, endpoint_url: str):
    s3 = s3fs.S3FileSystem(
        anon=False, 
        key=key, 
        secret=secret, 
        endpoint_url=endpoint_url
    )

    with s3.open(f's3://emotiai/goemotion/{dataset_type}.h5', 'rb') as f:
        h5_file = h5py.File(f, 'r')

        # Stack all tensors into a single tensor (if they have the same shape)
        features = h5_file["features"]
        tensored_features = tf.convert_to_tensor(features)

        labels = h5_file['labels']
        tensored_labels = tf.convert_to_tensor(labels[:], dtype=tf.float32)  
        
    return tf.data.Dataset.from_tensor_slices((tensored_features, tensored_labels)).shuffle(BertConfig.SHUFFLE).batch(BertConfig.BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


### Load preprocessed data

In [19]:
ACCESS_KEY="minio_access_key"
SECRET_KEY="minio_secret_key"
ENDPOINT_URL="http://localhost:9000"

In [28]:
train_ds = load_ds("train", key=ACCESS_KEY, secret=SECRET_KEY, endpoint_url=ENDPOINT_URL)
dev_ds = load_ds("dev", key=ACCESS_KEY, secret=SECRET_KEY, endpoint_url=ENDPOINT_URL)
test_ds = load_ds("test", key=ACCESS_KEY, secret=SECRET_KEY, endpoint_url=ENDPOINT_URL)

ValueError: I/O operation on closed file.

### Embed model

In [8]:
def build_bert_preprocessor():
    preprocessor = hub.load(BertConfig.BERT_PREPROCESSOR)
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
    tokenize = hub.KerasLayer(preprocessor.tokenize)
    tokenized_input = tokenize(text_input)
    packer = hub.KerasLayer(
        preprocessor.bert_pack_inputs,
        arguments=dict(seq_length=BertConfig.SEQUENCE_LENGTH)
    )
    encoder_inputs = packer([tokenized_input])

    return tf.keras.Model(text_input, encoder_inputs)

### Build model

In [9]:
def build_bert_model(bert_preprocessor, bert_model):
    inputs = tf.keras.layers.Input(shape=(), dtype="string")
    encoder_inputs = bert_preprocessor(inputs)
    bert_outputs = bert_model(encoder_inputs)
    outputs = tf.keras.layers.Dense(BertConfig.NUM_CLASSES, activation="sigmoid")(bert_outputs["pooled_output"])
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model

### Train the model

In [15]:
def classification_metrics(average: str = None):
    f1_name = f'_{average}'
    if average == None:
        f1_name = ''

    return [tf.keras.metrics.F1Score(
        name=f'f1_{f1_name}',
        average=average,
    ), tf.keras.metrics.BinaryAccuracy("binary_accuracy"), tf.keras.metrics.Precision(name="precision"), tf.keras.metrics.Recall(name="recall")]

In [24]:
tf.debugging.set_log_device_placement(True)

metrics = classification_metrics("macro")
bert_preprocessor = build_bert_preprocessor()
bert_model = hub.KerasLayer(BertConfig.BERT_MODEL, trainable=False)

model = build_bert_model(bert_preprocessor, bert_model)

model.compile(
    loss="binary_crossentropy",
    optimizer=tf.keras.optimizers.Adam(learning_rate=BertConfig.LR),
    metrics=metrics
)
model.fit(train_ds, epochs=BertConfig.EPOCHS, validation_data=dev_ds)



Epoch 1/4


ValueError: in user code:

    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1398, in train_function  *
        return step_function(self, iterator)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1381, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1370, in run_step  **
        outputs = model.train_step(data)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1152, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1246, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/engine/compile_utils.py", line 620, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/utils/metrics_utils.py", line 77, in decorated
        result = update_state_fn(*args, **kwargs)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/metrics/base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/Users/haiduong/miniforge3/envs/emotiai/lib/python3.11/site-packages/tf_keras/src/metrics/f_score_metrics.py", line 176, in update_state  **
        y_true = tf.convert_to_tensor(y_true, dtype=self.dtype)

    ValueError: Tensor conversion requested dtype float32 for Tensor with dtype int64: <tf.Tensor 'IteratorGetNext:1' shape=(None, 28) dtype=int64>
