In [None]:
from huggingface_hub import notebook_login

notebook_login()


In [None]:
from transformers.utils import send_example_telemetry
send_example_telemetry("language_modeling_notebook_finetuning_nli", framework="tensorflow")

In [None]:
def clear_gpu_mem(): 
    from numba import cuda 
    device = cuda.get_current_device()
    device.reset()

#### Load finetuning data

In [None]:
from sklearn.model_selection import train_test_split # for more convenient data splitting
import numpy as np
import pandas as pd

from datasets import Dataset, DatasetDict # to create Dataset objects
import pprint
import tensorflow as tf

import mlflow # for ml tracking

from string import Template # to template the premise and hypothesis for the NLI task

In [None]:
pd.set_option("display.max_colwidth", None)
pd.set_option("colheader_justify", "left")

path = "../data"
dataset_files = ["question_avoidance_preprocessed_dataset.parquet"]
finetuning_datasets = {}
for i in dataset_files:
    finetuning_datasets[i.split(".parquet")[0]] = pd.read_parquet(f"{path}/{i}", engine="pyarrow")

In [None]:
print("Available datasets:", list(finetuning_datasets.keys()))

#### Initialize mlflow

To launch the ui:

```shell
poetry run mlflow ui
```

In [None]:
mlflow.set_experiment("Question Dodging 1")
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# autologging
mlflow.tensorflow.autolog()

#### Set up GPU

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    print(gpus)
    tf.config.experimental.set_memory_growth(gpus[0], True)
    """
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    """;
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)


It's important to reformulate the premise and hypothesis fed into the model. Example:

#### Load zero-shot model

There is a number of zero-shot classification models that could be used. 

One example is [typeform/distilbert-base-uncased-mnli](https://huggingface.co/typeform/distilbert-base-uncased-mnli). It supports TF/Keras as well and performs okay-ish.

Other good options:
- https://huggingface.co/facebook/bart-large-mnli (for English only)
- https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli (outperforms other models)
- https://huggingface.co/joeddav/xlm-roberta-large-xnli (multilingual)

In [None]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

In [None]:
# loading the model
model_name = "typeform/distilbert-base-uncased-mnli"

tokenizer = AutoTokenizer.from_pretrained(model_name)
print("loading model")

model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
num_labels = len(model.config.id2label)

print("loaded")

In [None]:
# reload the model wi th the correct config

id2label = model.config.id2label
print(id2label)
label2id = {val: key for key, val in id2label.items()}
del model

#with strategy.scope():

model = TFAutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id)

#### Load preprocesssed data

In [None]:
list_of_datasets = [finetuning_datasets[dataset] for dataset in finetuning_datasets]
data = pd.concat(list_of_datasets)

del finetuning_datasets
data.sample(3)

One could use the `train_test_split` method from `datasets` ([source](https://huggingface.co/docs/datasets/v2.14.5/en/package_reference/main_classes)) which readily splits a dataset object to a train and test set, but using the sklearn one makes it easier to get a train, test, and validation split. 

In [None]:
X = data[["question", "answer"]]
y = data[["label"]]

X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size=0.2, random_state=1)

X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
train_dataset = pd.concat([X_train, y_train], axis=1)
test_dataset = pd.concat([X_test, y_test], axis=1)
val_dataset = pd.concat([X_val, y_val], axis=1)

In [None]:
train_dataset = Dataset.from_pandas(train_dataset, preserve_index=False)
test_dataset = Dataset.from_pandas(test_dataset, preserve_index=False)
val_dataset = Dataset.from_pandas(val_dataset, preserve_index=False)

In [None]:
del data, X, y

In [None]:
dataset = DatasetDict({"train": train_dataset, "test": test_dataset, "val": val_dataset})

In [None]:
dataset

In [None]:
mlflow.start_run()

#### Preprocessing the input sequence

In [None]:
premise_template = Template("Question: $question. Answer: $answer")
hypothesis_template = Template("In this example, the answer evades or ignores the question.")

mlflow.log_params(
    {
        "premise_template": premise_template.safe_substitute(),
        "hypothesis_template": hypothesis_template.safe_substitute()
    }
)

def preprocess_function(row):
    #premise = f"Question: {row['premise']}"
    #hypothesis = f"This answer evades the question: {row['hypothesis']}"
    premise = premise_template.safe_substitute(question = row['question'], answer = row['answer'])
    hypothesis = hypothesis_template.safe_substitute()
    encoded = tokenizer(premise, hypothesis) #, truncation=True)
    encoded["labels"] = row["label"]
    encoded["input_sentence"] = tokenizer.decode(encoded.input_ids)
    return encoded

In [None]:
example = preprocess_function(dataset["train"][0])
print(example)

In [None]:
encoded_dataset = dataset.map(preprocess_function,
remove_columns=["question", "answer", "label"])

In [None]:
encoded_dataset["train"].features["labels"]

In [None]:
encoded_dataset

In [None]:
# a helper function to show the prediction results

def get_results(outputs, model, return_all_scores=True):
    scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
    if return_all_scores:
        return [
            [{"label": model.config.id2label[i], "score": score.item()} for i, score in enumerate(item)]
                for item in scores
            ]
    else:
        return [
            {"label": model.config.id2label[item.argmax()], "score": item.max().item()} for item in scores
        ]

In [None]:
dataset_batch_size = 4 # 16

tf_train_dataset = model.prepare_tf_dataset(
    encoded_dataset["train"],
    shuffle=True,
    batch_size=dataset_batch_size,
    tokenizer=tokenizer
)

tf_validation_dataset = model.prepare_tf_dataset(
    encoded_dataset["val"],
    shuffle=False,
    batch_size=dataset_batch_size,
    tokenizer=tokenizer,
)

tf_test_dataset = model.prepare_tf_dataset(
    encoded_dataset["test"],
    shuffle=False,
    batch_size=dataset_batch_size,
    tokenizer=tokenizer,
)


In [None]:
# now the dataset is ready to be fed into the model to fit
tf_train_dataset

In [None]:
tf_train_dataset

In [None]:
# del encoded_dataset

In [None]:
from transformers import create_optimizer

batch_size = 4
num_epochs = 5
number_of_training_examples = tf_train_dataset.cardinality().numpy()
batches_per_epoch = number_of_training_examples // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)

optimizer, schedule = create_optimizer(
    init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps
)

In [None]:
model.compile(optimizer=optimizer) # run_eagerly=True, 

In [None]:
# evaluating loss before finetuning the model on our "target data"
before_finetuning_history = model.evaluate(tf_test_dataset)

In [None]:
# we are looking at Mean loss
print(model.metrics)
print(before_finetuning_history)

In [None]:
mlflow.log_metric("loss before finetuning", before_finetuning_history)

In [None]:
model.summary()

In [None]:
#import os
#os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

In [None]:
from evaluate import load


In [None]:
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard
from transformers.keras_callbacks import KerasMetricCallback

# remember to install git-lfs
# !apt install git-lfs

def compute_metrics(eval_predictions):
    predictions, labels = eval_predictions
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

#metric = model.metrics[0]

metric = load("glue", "mnli")
metric_callback = KerasMetricCallback(
    metric_fn=compute_metrics, eval_dataset=tf_validation_dataset
)

push_to_hub_model_id = "question-dodging-finetuned-distilbert-base-uncased-mnli"
tensorboard_callback = TensorBoard(log_dir="./text_classification_model_save/logs")

push_to_hub_callback = PushToHubCallback(
    output_dir="./text_classification_model_save",
    tokenizer=tokenizer,
    hub_model_id=push_to_hub_model_id,
)

callbacks = [metric_callback, tensorboard_callback, push_to_hub_callback]


In [None]:
# clear_gpu_mem()

In [None]:
history = model.fit(
        tf_train_dataset,
        validation_data=tf_validation_dataset,
        epochs=num_epochs,
        batch_size=2,
        verbose=1,
        callbacks=callbacks
    )


In [None]:
after_finetuning_history = model.evaluate(tf_test_dataset)
after_finetuning_history

In [None]:
mlflow.end_run()