In [1]:
# from huggingface_hub import notebook_login
# 
# notebook_login()


In [2]:
# from transformers.utils import send_example_telemetry
# send_example_telemetry("language_modeling_notebook_finetuning_nli", framework="tensorflow")

In [3]:
# in case of problems with the gpu memory
def clear_gpu_mem(): 
    from numba import cuda 
    device = cuda.get_current_device()
    device.reset()

#### Load finetuning data

In [4]:
from sklearn.model_selection import train_test_split # for more convenient data splitting
import numpy as np
import pandas as pd

from datasets import Dataset, DatasetDict # to create Dataset objects
import pprint
import tensorflow as tf

# import mlflow # for ml tracking

from string import Template # to template the premise and hypothesis for the NLI task

2023-11-16 10:02:02.557071: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-16 10:02:02.588023: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-16 10:02:02.588049: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-16 10:02:02.588068: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-16 10:02:02.593890: I tensorflow/core/platform/cpu_feature_g

In [5]:
pd.set_option("display.max_colwidth", None)
pd.set_option("colheader_justify", "left")

path = "../data"
dataset_files = ["question_avoidance_preprocessed_dataset.parquet"]
finetuning_datasets = {}
for i in dataset_files:
    finetuning_datasets[i.split(".parquet")[0]] = pd.read_parquet(f"{path}/{i}", engine="pyarrow")

In [6]:
print("Available datasets:", list(finetuning_datasets.keys()))

Available datasets: ['question_avoidance_preprocessed_dataset']


#### Initialize mlflow

To launch the ui:

```shell
poetry run mlflow ui
```

In [7]:
# mlflow.set_experiment("Question Dodging 1 (different input format)")
# mlflow.set_tracking_uri("http://127.0.0.1:5000")
# 
# # autologging
# mlflow.tensorflow.autolog()

#### Set up GPU

In [8]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    print(gpus)
    tf.config.experimental.set_memory_growth(gpus[0], True)
    """
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    """;
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)


2023-11-16 10:02:09.668634: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2023-11-16 10:02:09.668657: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: snek
2023-11-16 10:02:09.668661: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: snek
2023-11-16 10:02:09.668756: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 535.113.1
2023-11-16 10:02:09.668767: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 535.113.1
2023-11-16 10:02:09.668769: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 535.113.1


It's important to reformulate the premise and hypothesis fed into the model. Example:

#### Load zero-shot model

There is a number of zero-shot classification models that could be used. 

One example is [typeform/distilbert-base-uncased-mnli](https://huggingface.co/typeform/distilbert-base-uncased-mnli). It supports TF/Keras as well and performs okay-ish.

Other good options:
- https://huggingface.co/facebook/bart-large-mnli (for English only)
- https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli (outperforms other models)
- https://huggingface.co/joeddav/xlm-roberta-large-xnli (multilingual)

In [42]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, AutoConfig, AutoModelForSequenceClassification

In [45]:
# loading the model
model_name = "typeform/distilbert-base-uncased-mnli"
# model_name = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
#model_name = "roberta-large-mnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)

config = AutoConfig.from_pretrained(model_name)
num_labels = len(config.id2label)


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [46]:
model = TFAutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=config.id2label,
    label2id=config.label2id)

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [48]:
model.summary()

Model: "tf_distil_bert_for_sequence_classification_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 distilbert (TFDistilBertMa  multiple                  66362880  
 inLayer)                                                        
                                                                 
 pre_classifier (Dense)      multiple                  590592    
                                                                 
 classifier (Dense)          multiple                  2307      
                                                                 
 dropout_39 (Dropout)        multiple                  0         
                                                                 
Total params: 66955779 (255.42 MB)
Trainable params: 66955779 (255.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [61]:
model.layers[0]

<transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertMainLayer at 0x7f3ac83dfa30>

#### Load preprocesssed data

In [14]:
list_of_datasets = [finetuning_datasets[dataset] for dataset in finetuning_datasets]
data = pd.concat(list_of_datasets)

del finetuning_datasets
data.sample(3)

Unnamed: 0,question,answer,label
375,"As the Gray report refers to major procurement activities , will the Minister tell me , and the House , what recent discussions he has had with commanders on the ground about the effectiveness of personal protection equipment for our troops in theatre - such as the Stourbridge war hero , 19-year - old Michelle Norris , who risked her life and was the first woman to gain the military cross for her work ?","Of course I noticed that rather startling figure when I read the Gray report myself . The right hon Gentleman , who has obviously read the report , will also have noticed that there is no evidential basis for that statement anywhere in it , nor is there an evidential basis for it anywhere else that I have ever come across . The very fact that the figure ranges between £ 1 billion and more than £ 2 billion shows , I think , how imprecise that statement inevitably is .",0
236,How important is this result for the clay season that comes now and Roland Garros?,"Oh, any title is big, and it means a lot, I mean, such a big tournament that is considered one of the biggest tournaments in our sport. I won three times here. I think that says enough about how I feel playing in Miami. I love the crowd. It's a lot of support. Night sessions are my most preferable here, because the crowd gets into it and you can feel that great vibe in the stands. I have been really playing well in the last couple of years here, so this is going to be very encouraging for me prior to the clay court season. I'm gonna have more confidence coming into the MonteCarlo tournament. It's going to be the opening tournament on clay. I haven't played it last year. I look forward to it. I want to start well. I want to start strong. I want to go deep in the tournament, and, you know, there is a lot of tournaments coming up. Obviously Roland Garros, Olympics, Wimbledon, they are top of the priority list, but still, I want to perform well on all the others.",2
114,"As the steeply rising unemployment level is the worst problem facing the country and the Government , why has not the right hon Lady and her Government brought before the House , before Parliament departs for the recess , fresh proposals to restore the £ 170 million cut that they made in the Manpower Services Commission budget ? When will they carry out and bring before the House an expanded programme to deal with this problem ? Will the right hon Lady now tell us , when the Government have failed to bring forward a programme before the departure of Parliament for the recess , how soon those proposals will be announced to the nation ?","Already , about 324,000 people are affected by and benefit from special employment and training measures . My right hon Friend the Secretary of State for Employment has given an undertaking that if the youth opportunities programme is not sufficient it will be enlarged , so that every school leaver has the offer of a place by Easter 1981 .",0


One could use the `train_test_split` method from `datasets` ([source](https://huggingface.co/docs/datasets/v2.14.5/en/package_reference/main_classes)) which readily splits a dataset object to a train and test set, but using the sklearn one makes it easier to get a train, test, and validation split. 

In [15]:
X = data[["question", "answer"]]
y = data[["label"]]

X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size=0.2, random_state=1)

X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2

In [16]:
train_dataset = pd.concat([X_train, y_train], axis=1)
test_dataset = pd.concat([X_test, y_test], axis=1)
val_dataset = pd.concat([X_val, y_val], axis=1)

In [17]:
train_dataset = Dataset.from_pandas(train_dataset, preserve_index=False)
test_dataset = Dataset.from_pandas(test_dataset, preserve_index=False)
val_dataset = Dataset.from_pandas(val_dataset, preserve_index=False)

In [18]:
#del data, X, y

In [19]:
dataset = DatasetDict({"train": train_dataset, "test": test_dataset, "val": val_dataset})

In [20]:
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'label'],
        num_rows: 253
    })
    test: Dataset({
        features: ['question', 'answer', 'label'],
        num_rows: 85
    })
    val: Dataset({
        features: ['question', 'answer', 'label'],
        num_rows: 85
    })
})

In [21]:
dataset["train"]["question"][0]

'On a slightly lighter note, what do you think makes a good definitive fist pump: the quiet steely determination or fullon adrenaline spin your arms around?'

In [22]:
#mlflow.start_run()

In [23]:
model.summary()

Model: "tf_distil_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 distilbert (TFDistilBertMa  multiple                  66362880  
 inLayer)                                                        
                                                                 
 pre_classifier (Dense)      multiple                  590592    
                                                                 
 classifier (Dense)          multiple                  2307      
                                                                 
 dropout_19 (Dropout)        multiple                  0         
                                                                 
Total params: 66955779 (255.42 MB)
Trainable params: 66955779 (255.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


#### Preprocessing the input sequence

In [24]:
premise_template = Template("Question: $question. Answer: $answer")
hypothesis_template = Template("In this example, the answer evades or ignores the question.")

# mlflow.log_params(
#     {
#         "premise_template": premise_template.safe_substitute(),
#         "hypothesis_template": hypothesis_template.safe_substitute(),
#         "input_note": "passed as premise, hypothesis" # "passed into tokenizer as [premise, hypothesis]" 
#     }
# )

def preprocess_function(row, train=True):
    #premise = f"Question: {row['premise']}"
    #hypothesis = f"This answer evades the question: {row['hypothesis']}"
    premise = premise_template.safe_substitute(question = row['question'], answer = row['answer'])
    hypothesis = hypothesis_template.safe_substitute()
    encoded = tokenizer(premise, hypothesis, add_special_tokens=True, padding=True, truncation="only_first", return_attention_mask=True, return_tensors="np") #, return_tensors="tf") # truncation=True,  padding=True, 
    if train:
        encoded["labels"] = row["label"]
    #print(encoded)
    #print(encoded)
    # encoded["input_sentence"] = tokenizer.decode(encoded.input_ids) #[0])
    #encoded["input_sentence"] = tokenizer.decode(encoded.input_ids)
    return encoded

In [70]:
tokenized_input_np = tokenizer("This is some input", return_tensors="np")
tokenized_input_tf = tokenizer("This is some input", return_tensors="tf")
# tokenized_input_pt = tokenizer("This is some input", return_tensors="pt")

inputs = [tokenized_input_np, tokenized_input_tf]# , tokenized_input_pt]

for i in inputs:
    print(i)
    print()

{'input_ids': array([[ 101, 2023, 2003, 2070, 7953,  102]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1]])}

{'input_ids': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[ 101, 2023, 2003, 2070, 7953,  102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1]], dtype=int32)>}



In [81]:
outputs_np = model(tokenized_input_np)
print(tokenized_input_np)
outputs_np.logits

{'input_ids': array([[ 101, 2023, 2003, 2070, 7953,  102]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1]])}


<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[ 3.2404332, -1.1141715, -4.125758 ]], dtype=float32)>

In [82]:
outputs = model(tokenized_input_tf)
print(tokenized_input_tf)
outputs.logits

{'input_ids': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[ 101, 2023, 2003, 2070, 7953,  102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1]], dtype=int32)>}


<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[ 3.2404332, -1.1141715, -4.125758 ]], dtype=float32)>

In [86]:
tokenizer.model_input_names

#model_inputs = {k: k for k in tokenizer.model_input_names}
pprint.pprint(tokenizer.model_input_names)

['input_ids', 'attention_mask']


In [50]:
example = preprocess_function(dataset["train"][0])
print(example.input_ids)
print(tokenizer.decode(example.input_ids[0]))
#print(tokenizer.decode(example.input_ids[1]))
#print(tokenizer.decode(example.input_ids))
print(example)

[[  101  3160  1024  2006  1037  3621  9442  3602  1010  2054  2079  2017
   2228  3084  1037  2204 15764  7345 10216  1024  1996  4251  3886  2100
   9128  2030  2440  2239 14963  6714  2115  2608  2105  1029  1012  3437
   1024  1006  7239  1012  1007  1045  2228  1045  2031  1037  2261  2367
   4617  1012   102  1999  2023  2742  1010  1996  3437 26399  2015  2030
  26663  1996  3160  1012   102]]
[CLS] question : on a slightly lighter note, what do you think makes a good definitive fist pump : the quiet steely determination or fullon adrenaline spin your arms around?. answer : ( laughter. ) i think i have a few different versions. [SEP] in this example, the answer evades or ignores the question. [SEP]
{'input_ids': array([[  101,  3160,  1024,  2006,  1037,  3621,  9442,  3602,  1010,
         2054,  2079,  2017,  2228,  3084,  1037,  2204, 15764,  7345,
        10216,  1024,  1996,  4251,  3886,  2100,  9128,  2030,  2440,
         2239, 14963,  6714,  2115,  2608,  2105,  1029,  

In [88]:
encoded_dataset = dataset.map(preprocess_function, remove_columns=["question", "answer", "label"])

Map:   0%|          | 0/253 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

In [89]:
encoded_dataset["train"].features["labels"]

Value(dtype='int64', id=None)

In [90]:
encoded_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 253
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 85
    })
    val: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 85
    })
})

In [91]:
tokenizer.decode(encoded_dataset["train"]["input_ids"][0][0])

'[CLS] question : on a slightly lighter note, what do you think makes a good definitive fist pump : the quiet steely determination or fullon adrenaline spin your arms around?. answer : ( laughter. ) i think i have a few different versions. [SEP] in this example, the answer evades or ignores the question. [SEP]'

In [92]:
# a helper function to show the prediction results

def get_results(outputs, model, return_all_scores=True):
    scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
    if return_all_scores:
        return [
            [{"label": model.config.id2label[i], "score": score.item()} for i, score in enumerate(item)]
                for item in scores
            ]
    else:
        return [
            {"label": model.config.id2label[item.argmax()], "score": item.max().item()} for item in scores
        ]

In [93]:
dataset_batch_size = 4 # 16

tf_train_dataset = model.prepare_tf_dataset(
    encoded_dataset["train"],
    shuffle=True,
    batch_size=dataset_batch_size,
    tokenizer=tokenizer
)

tf_validation_dataset = model.prepare_tf_dataset(
    encoded_dataset["val"],
    shuffle=False,
    batch_size=dataset_batch_size,
    tokenizer=tokenizer,
)

tf_test_dataset = model.prepare_tf_dataset(
    encoded_dataset["test"],
    shuffle=False,
    batch_size=dataset_batch_size,
    tokenizer=tokenizer,
)


You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [94]:
# now the dataset is ready to be fed into the model to fit
tf_train_dataset

<_PrefetchDataset element_spec=({'input_ids': TensorSpec(shape=(4, 1, None), dtype=tf.int64, name=None), 'attention_mask': TensorSpec(shape=(4, 1, None), dtype=tf.int64, name=None)}, TensorSpec(shape=(4,), dtype=tf.int64, name=None))>

In [95]:
tf_train_dataset

<_PrefetchDataset element_spec=({'input_ids': TensorSpec(shape=(4, 1, None), dtype=tf.int64, name=None), 'attention_mask': TensorSpec(shape=(4, 1, None), dtype=tf.int64, name=None)}, TensorSpec(shape=(4,), dtype=tf.int64, name=None))>

In [96]:
# del encoded_dataset

In [97]:
from transformers import create_optimizer

batch_size = 4
num_epochs = 5
number_of_training_examples = tf_train_dataset.cardinality().numpy()
batches_per_epoch = number_of_training_examples // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)

optimizer, schedule = create_optimizer(
    init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps
)

In [98]:
model.compile(optimizer=optimizer) # run_eagerly=True, 

In [120]:
# The issue was that BatchEncoding objects are not accepted, they need to be converted into a dict first
# https://github.com/huggingface/transformers/issues/20709

sample, label = dict(tokenized_input_np), np.array([0])
print(sample)
print(label)

{'input_ids': array([[ 101, 2023, 2003, 2070, 7953,  102]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1]])}
[0]


In [134]:
tokenized_input_np

{'input_ids': array([[ 101, 2023, 2003, 2070, 7953,  102]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1]])}

In [144]:
model.evaluate(sample, label)



0.013389784842729568

In [139]:
preprocessed_input = preprocess_function({"question": "Are you there?", "answer": "I'm not sure", "label": np.array(1)})
print(preprocessed_input)
print(tokenizer.decode(preprocessed_input.input_ids[0]))

{'input_ids': array([[  101,  3160,  1024,  2024,  2017,  2045,  1029,  1012,  3437,
         1024,  1045,  1005,  1049,  2025,  2469,   102,  1999,  2023,
         2742,  1010,  1996,  3437, 26399,  2015,  2030, 26663,  1996,
         3160,  1012,   102]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]]), 'labels': array(1)}
[CLS] question : are you there?. answer : i'm not sure [SEP] in this example, the answer evades or ignores the question. [SEP]


In [142]:
model(preprocessed_input)

TFSequenceClassifierOutput(loss=<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00531332], dtype=float32)>, logits=<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-1.6558033,  3.8010535, -3.047937 ]], dtype=float32)>, hidden_states=None, attentions=None)

In [147]:
preprocessed_input.keys()

dict_keys(['input_ids', 'attention_mask', 'labels'])

In [149]:
sample.keys()

dict_keys(['input_ids', 'attention_mask'])

In [157]:
model.evaluate({"input_ids": preprocessed_input["input_ids"], "attention_mask": preprocessed_input["attention_mask"]}, np.array([0]))



5.462170600891113

In [153]:
1# evaluating loss before finetuning the model on our "target data"
before_finetuning_history = model.evaluate(tf_test_dataset)

ValueError: in user code:

    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/training.py", line 2042, in test_function  *
        return step_function(self, iterator)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/training.py", line 2025, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/distribute/distribute_lib.py", line 1679, in run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/distribute/distribute_lib.py", line 3269, in call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/distribute/distribute_lib.py", line 4067, in _call_for_each_replica
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/training.py", line 2013, in run_step  **
        outputs = model.test_step(data)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 1747, in test_step
        y_pred = self(x, training=False)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/training.py", line 589, in __call__
        return super().__call__(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
        outputs = call_fn(inputs, *args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
        raise new_e.with_traceback(e.__traceback__) from None
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
        return fn(*args, **kwargs)
    File "/tmp/__autograph_generated_fileobfu4ug3.py", line 37, in tf__run_call_with_unpacked_inputs  **
        retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
    File "/tmp/__autograph_generated_file2ugw3a3y.py", line 17, in tf__call  **
        distilbert_output = ag__.converted_call(ag__.ld(self).distilbert, (), dict(input_ids=ag__.ld(input_ids), attention_mask=ag__.ld(attention_mask), head_mask=ag__.ld(head_mask), inputs_embeds=ag__.ld(inputs_embeds), output_attentions=ag__.ld(output_attentions), output_hidden_states=ag__.ld(output_hidden_states), return_dict=ag__.ld(return_dict), training=ag__.ld(training)), fscope)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
        outputs = call_fn(inputs, *args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
        raise new_e.with_traceback(e.__traceback__) from None
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
        return fn(*args, **kwargs)
    File "/tmp/__autograph_generated_fileobfu4ug3.py", line 37, in tf__run_call_with_unpacked_inputs  **
        retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
    File "/tmp/__autograph_generated_fileikv16pe2.py", line 93, in tf__call  **
        embedding_output = ag__.converted_call(ag__.ld(self).embeddings, (ag__.ld(input_ids),), dict(inputs_embeds=ag__.ld(inputs_embeds)), fscope)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
        outputs = call_fn(inputs, *args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
        raise new_e.with_traceback(e.__traceback__) from None
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
        return fn(*args, **kwargs)
    File "/tmp/__autograph_generated_filez7t8wf0l.py", line 54, in tf__call  **
        final_embeddings = ag__.converted_call(ag__.ld(self).LayerNorm, (), dict(inputs=ag__.ld(final_embeddings)), fscope)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
        outputs = call_fn(inputs, *args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
        raise new_e.with_traceback(e.__traceback__) from None
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
        return fn(*args, **kwargs)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/layers/normalization/layer_normalization.py", line 263, in call  **
        ndims = len(input_shape)
    File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/framework/tensor_shape.py", line 918, in __len__
        raise ValueError("Cannot take the length of shape with unknown rank.")

    ValueError: Exception encountered when calling layer 'tf_distil_bert_for_sequence_classification_1' (type TFDistilBertForSequenceClassification).
    
    in user code:
    
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 712, in run_call_with_unpacked_inputs  *
            return func(self, **unpacked_inputs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/transformers/models/distilbert/modeling_tf_distilbert.py", line 720, in call  *
            distilbert_output = self.distilbert(
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
            return fn(*args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
            outputs = call_fn(inputs, *args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
            raise new_e.with_traceback(e.__traceback__) from None
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
            return fn(*args, **kwargs)
        File "/tmp/__autograph_generated_fileobfu4ug3.py", line 37, in tf__run_call_with_unpacked_inputs  **
            retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
        File "/tmp/__autograph_generated_fileikv16pe2.py", line 93, in tf__call  **
            embedding_output = ag__.converted_call(ag__.ld(self).embeddings, (ag__.ld(input_ids),), dict(inputs_embeds=ag__.ld(inputs_embeds)), fscope)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
            return fn(*args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
            outputs = call_fn(inputs, *args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
            raise new_e.with_traceback(e.__traceback__) from None
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
            return fn(*args, **kwargs)
        File "/tmp/__autograph_generated_filez7t8wf0l.py", line 54, in tf__call  **
            final_embeddings = ag__.converted_call(ag__.ld(self).LayerNorm, (), dict(inputs=ag__.ld(final_embeddings)), fscope)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
            return fn(*args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
            outputs = call_fn(inputs, *args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
            raise new_e.with_traceback(e.__traceback__) from None
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
            return fn(*args, **kwargs)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/layers/normalization/layer_normalization.py", line 263, in call  **
            ndims = len(input_shape)
        File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/framework/tensor_shape.py", line 918, in __len__
            raise ValueError("Cannot take the length of shape with unknown rank.")
    
        ValueError: Exception encountered when calling layer 'distilbert' (type TFDistilBertMainLayer).
        
        in user code:
        
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 712, in run_call_with_unpacked_inputs  *
                return func(self, **unpacked_inputs)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/transformers/models/distilbert/modeling_tf_distilbert.py", line 402, in call  *
                embedding_output = self.embeddings(input_ids, inputs_embeds=inputs_embeds)  # (bs, seq_length, dim)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
                return fn(*args, **kwargs)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
                outputs = call_fn(inputs, *args, **kwargs)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
                raise new_e.with_traceback(e.__traceback__) from None
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
                return fn(*args, **kwargs)
            File "/tmp/__autograph_generated_filez7t8wf0l.py", line 54, in tf__call  **
                final_embeddings = ag__.converted_call(ag__.ld(self).LayerNorm, (), dict(inputs=ag__.ld(final_embeddings)), fscope)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
                return fn(*args, **kwargs)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
                outputs = call_fn(inputs, *args, **kwargs)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
                raise new_e.with_traceback(e.__traceback__) from None
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
                return fn(*args, **kwargs)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/layers/normalization/layer_normalization.py", line 263, in call  **
                ndims = len(input_shape)
            File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/framework/tensor_shape.py", line 918, in __len__
                raise ValueError("Cannot take the length of shape with unknown rank.")
        
            ValueError: Exception encountered when calling layer 'embeddings' (type TFEmbeddings).
            
            in user code:
            
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/transformers/models/distilbert/modeling_tf_distilbert.py", line 124, in call  *
                    final_embeddings = self.LayerNorm(inputs=final_embeddings)
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 61, in error_handler  **
                    return fn(*args, **kwargs)
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__
                    outputs = call_fn(inputs, *args, **kwargs)
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 155, in error_handler
                    raise new_e.with_traceback(e.__traceback__) from None
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
                    return fn(*args, **kwargs)
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/keras/src/layers/normalization/layer_normalization.py", line 263, in call  **
                    ndims = len(input_shape)
                File "/home/snek/a-politicians-answer/.venv/lib/python3.10/site-packages/tensorflow/python/framework/tensor_shape.py", line 918, in __len__
                    raise ValueError("Cannot take the length of shape with unknown rank.")
            
                ValueError: Exception encountered when calling layer 'LayerNorm' (type LayerNormalization).
                
                Cannot take the length of shape with unknown rank.
                
                Call arguments received by layer 'LayerNorm' (type LayerNormalization):
                  • inputs=tf.Tensor(shape=<unknown>, dtype=float32)
            
            
            Call arguments received by layer 'embeddings' (type TFEmbeddings):
              • input_ids=tf.Tensor(shape=<unknown>, dtype=int32)
              • position_ids=None
              • inputs_embeds=None
              • training=False
        
        
        Call arguments received by layer 'distilbert' (type TFDistilBertMainLayer):
          • input_ids=tf.Tensor(shape=<unknown>, dtype=int32)
          • attention_mask=tf.Tensor(shape=<unknown>, dtype=int32)
          • head_mask=None
          • inputs_embeds=None
          • output_attentions=False
          • output_hidden_states=False
          • return_dict=True
          • training=False
    
    
    Call arguments received by layer 'tf_distil_bert_for_sequence_classification_1' (type TFDistilBertForSequenceClassification):
      • input_ids={'input_ids': 'tf.Tensor(shape=<unknown>, dtype=int64)', 'attention_mask': 'tf.Tensor(shape=<unknown>, dtype=int64)', 'labels': 'tf.Tensor(shape=(None,), dtype=int64)'}
      • attention_mask=None
      • head_mask=None
      • inputs_embeds=None
      • output_attentions=None
      • output_hidden_states=None
      • return_dict=None
      • labels=None
      • training=False


In [None]:
# we are looking at Mean loss
print(model.metrics)
print(before_finetuning_history)

In [None]:
mlflow.log_metric("loss before finetuning", before_finetuning_history)

In [None]:
model.summary()

In [None]:
#import os
#os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

In [None]:
from evaluate import load


In [None]:
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard
from transformers.keras_callbacks import KerasMetricCallback

# remember to install git-lfs
# !apt install git-lfs

def compute_metrics(eval_predictions):
    predictions, labels = eval_predictions
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

#metric = model.metrics[0]

metric = load("glue", "mnli")
metric_callback = KerasMetricCallback(
    metric_fn=compute_metrics, eval_dataset=tf_validation_dataset
)

push_to_hub_model_id = "question-dodging-finetuned-distilbert-base-uncased-mnli"
tensorboard_callback = TensorBoard(log_dir="./text_classification_model_save/logs")

push_to_hub_callback = PushToHubCallback(
    output_dir="./text_classification_model_save",
    tokenizer=tokenizer,
    hub_model_id=push_to_hub_model_id,
)

callbacks = [metric_callback, tensorboard_callback, push_to_hub_callback]


In [None]:
# clear_gpu_mem()

In [None]:
history = model.fit(
        tf_train_dataset,
        validation_data=tf_validation_dataset,
        epochs=num_epochs,
        batch_size=2,
        verbose=1,
        callbacks=callbacks
    )


In [None]:
after_finetuning_history = model.evaluate(tf_test_dataset)
after_finetuning_history

In [None]:
input_example = preprocess_function({"question": "Who am I?", "answer": "not me"}, False)
input_example["labels"] = [0]

In [None]:
tokenized_exmaple = tokenizer("What are you doing here? Not sure", return_attention_mask=True)
tokenized_exmaple

In [None]:
model(tokenized_exmaple)

In [None]:
model.summary()

In [None]:
mlflow.end_run()