# Argument Mining - Optuna

Hyperparams optimization with Optuna for AM task 1.

In [None]:
!python --version

Python 3.10.12


## Create a Bert-based pre-training model

In [None]:
!pip install optuna



In [None]:
import tensorflow as tf
tf.autograph.set_verbosity(0)
tf.random.set_seed(2)

In [None]:
!pip install tf_keras
%env TF_USE_LEGACY_KERAS=1

env: TF_USE_LEGACY_KERAS=1


In [None]:
!pip install transformers
import transformers



In [None]:
import keras
print(tf.__version__)
print(keras.__version__)
print(transformers.__version__)

2.15.0
2.15.0
4.38.2


In [None]:
from transformers import TFBertForSequenceClassification, BertTokenizer

In [None]:
hf_model_name = "dccuchile/bert-base-spanish-wwm-cased"
model = TFBertForSequenceClassification.from_pretrained(hf_model_name, num_labels=2)
tokenizer = BertTokenizer.from_pretrained(hf_model_name)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier', 'bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

## Dataset

In [None]:
# Mount Drive files
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

def load_sst_data(path: str):
  data = pd.read_csv(path)
  return data

drive_path = "drive/My Drive/Datasets/dm-2019/"
dataset_home = drive_path + "data/dm-2019-annotated.csv"
dataset = load_sst_data(dataset_home)
dataset

Unnamed: 0,sent_id,sent_text,sent_label1,sent_label2,sent_label3
0,109-0-0,Limpieza de graffitis y remodelación de aluche,YES,CLAIM,NONE
1,109-0-0,Los vecinos de Aluche vemos día tras día como ...,YES,CLAIM,NONE
2,109-0-0,Los vecinos de Aluche vemos día tras día como ...,YES,CLAIM,NONE
3,109-0-1,"Graffitis y basura por todas partes, aceras ro...",YES,CLAIM,NONE
4,109-17276-0,No a los ruidos.,YES,CLAIM,NONE
...,...,...,...,...,...
3249,89-78258-1,Os recuerdo que España es el país de la OCDE d...,YES,PREMISE,EXPLANATION
3250,89-78258-2,La desigualdad en España avanza a un ritmo que...,YES,PREMISE,EXPLANATION
3251,89-78258-3,La pobreza y la exclusión en España han aument...,NO,SPAM,NONE
3252,89-78258-4,"Por otra lado, estamos a la espera de que nos ...",NO,SPAM,NONE


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

def split_data(df, target_column, test_size=0.2, validation_size=0.1, random_state=None):

  # Split the data into a temporary train set and test/validation set
  train, temp = train_test_split(df, test_size=(test_size + validation_size), stratify=df[target_column], random_state=random_state)

  # Split the temp set into validation and test sets
  test, validation = train_test_split(temp, test_size=validation_size / (test_size + validation_size), stratify=temp[target_column], random_state=random_state)

  return train, validation, test

In [None]:
label_column = "sent_label1"
t1_label_dict = {"NO": 0, "YES": 1}
t1_num_labels = len(t1_label_dict)

t1_data = dataset[["sent_text", label_column]]
t1_data = t1_data.replace({label_column: t1_label_dict})

training_set, validation_set, test_set = split_data(t1_data, target_column=label_column, test_size=0.1, validation_size=0.1, random_state=42)

In [None]:
# Shuffle dataset
training_set = shuffle(training_set)
validation_set = shuffle(validation_set)
test_set = shuffle(test_set)

# Obtain text and label vectors
train_texts = training_set["sent_text"]
train_labels = training_set[label_column]

validation_texts = validation_set["sent_text"]
validation_labels = validation_set[label_column]

test_texts = test_set["sent_text"]
test_labels = test_set[label_column]

print("Training size: {}".format(len(training_set)))
print("Validation size: {}".format(len(validation_set)))
print("Test size: {}".format(len(test_set)))

Training size: 2603
Validation size: 326
Test size: 325


In [None]:
from transformers import InputFeatures

def convert_examples_to_features(texts, labels):
  labels = list(labels)
  batch_encoding = tokenizer.batch_encode_plus(texts, max_length=128, padding='longest')

  features = []
  for i in range(len(texts)):
    inputs = {k: batch_encoding[k][i] for k in batch_encoding}
    feature = InputFeatures(**inputs, label=labels[i])
    features.append(feature)

  return features

def convert_features_to_tf_dataset(features):
  def gen():
    for ex in features:
      yield ({
          "input_ids": ex.input_ids,
          "attention_mask": ex.attention_mask,
          "token_type_ids": ex.token_type_ids
          }, ex.label)

  output_types = ({
      "input_ids": tf.int32,
      "attention_mask": tf.int32,
      "token_type_ids": tf.int32
      }, tf.int32)

  output_shapes = ({
      "input_ids": tf.TensorShape([None]),
      "attention_mask": tf.TensorShape([None]),
      "token_type_ids": tf.TensorShape([None])
      }, tf.TensorShape([]))

  dataset = tf.data.Dataset.from_generator(gen, output_types, output_shapes)
  return dataset

In [None]:
batch_size = 32

In [None]:
train_features = convert_examples_to_features(train_texts, train_labels)
train_dataset = convert_features_to_tf_dataset(train_features)

validation_features = convert_examples_to_features(validation_texts, validation_labels)
validation_dataset = convert_features_to_tf_dataset(validation_features)

train_dataset = train_dataset.shuffle(100).batch(batch_size)
validation_dataset = validation_dataset.batch(batch_size)

## Introduce Optuna

In [None]:
import optuna

In [None]:
def objective(trial):
  model = TFBertForSequenceClassification.from_pretrained(hf_model_name, num_labels=t1_num_labels)

  # Adjustable hyperparameters
  hp_learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True)
  hp_epsilon = trial.suggest_float("epsilon", 1e-9, 1e-7, log=True)
  hp_epochs = trial.suggest_int("epochs", 2, 7, step=1)
  optimizer = tf.keras.optimizers.Adam(learning_rate=hp_learning_rate, epsilon=hp_epsilon, clipnorm=1.0)

  # Fixed hyperparameters
  loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
  # loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
  metric1 = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
  metric2 = tf.keras.metrics.F1Score(threshold=0.5)

  model.compile(optimizer=optimizer, loss=loss, metrics=[metric1])

  # Train and evaluate using tf.keras.Model.fit()
  history = model.fit(train_dataset, validation_data=validation_dataset, epochs=hp_epochs)

  return history.history['val_accuracy'][-1]

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=5)

[I 2024-03-17 16:26:09,652] A new study created in memory with name: no-name-fc29903e-6610-4130-8c0e-bc4c1c9158c4
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier', 'bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


Cause: for/else statement not yet supported


Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


[I 2024-03-17 16:42:21,808] Trial 0 finished with value: 0.7024539709091187 and parameters: {'learning_rate': 2.4133739026974336e-05, 'epsilon': 9.2636554389763e-08, 'epochs': 7}. Best is trial 0 with value: 0.7024539709091187.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier', 'bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


[I 2024-03-17 16:57:20,883] Trial 1 finished with value: 0.745398759841919 and parameters: {'learning_rate': 3.922101163703746e-06, 'epsilon': 6.029811608576017e-08, 'epochs': 7}. Best is trial 0 with value: 0.7024539709091187.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier', 'bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


[I 2024-03-17 17:09:56,749] Trial 2 finished with value: 0.7147239446640015 and parameters: {'learning_rate': 1.5823731963770544e-05, 'epsilon': 7.777887920274661e-08, 'epochs': 6}. Best is trial 0 with value: 0.7024539709091187.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier', 'bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


[I 2024-03-17 17:19:04,583] Trial 3 finished with value: 0.7300613522529602 and parameters: {'learning_rate': 5.478881295182637e-05, 'epsilon': 5.880198070445549e-09, 'epochs': 4}. Best is trial 0 with value: 0.7024539709091187.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier', 'bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


[I 2024-03-17 17:24:36,444] Trial 4 finished with value: 0.7269938588142395 and parameters: {'learning_rate': 6.141081321194217e-06, 'epsilon': 4.511574143779203e-09, 'epochs': 2}. Best is trial 0 with value: 0.7024539709091187.


In [None]:
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value:", trial.value)
print("  Best params:", study.best_params)
print("  Params:")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials:  5
Best trial:
  Value: 0.7024539709091187
  Best params: {'learning_rate': 2.4133739026974336e-05, 'epsilon': 9.2636554389763e-08, 'epochs': 7}
  Params:
    learning_rate: 2.4133739026974336e-05
    epsilon: 9.2636554389763e-08
    epochs: 7
