In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [1]:
from transformers import AutoTokenizer

model_name = "google-bert/bert-base-uncased"

In [2]:
from datasets import load_dataset

dataset = load_dataset("lenatr99/Slovene_SuperGLUE_BoolQ")

Downloading readme:   0%|          | 0.00/464 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/92 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/18 [00:00<?, ? examples/s]

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id



In [4]:
def preprocess_data(examples):
    passage_inputs = [f"passage : {x} " for x in examples["passage"]]
    question_inputs = [f"question : {x} " for x in examples["question"]]
    inputs = [passage_input + question_input for passage_input, question_input in zip(passage_inputs, question_inputs)]

    model_inputs = tokenizer(inputs, max_length=400, truncation=True)

    model_inputs["labels"] = examples["label"]
    model_inputs["labels"] = [int(label) for label in model_inputs["labels"]]

    return model_inputs

In [5]:
tokenized_dataset = dataset.map(preprocess_data, remove_columns=['idx', 'passage', 'label', 'question'], batched=True)

Map:   0%|          | 0/92 [00:00<?, ? examples/s]

Map:   0%|          | 0/18 [00:00<?, ? examples/s]

In [6]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [7]:
from sklearn.metrics import accuracy_score, f1_score


def compute_metrics(pred):
    label_ids = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(label_ids, preds, average="weighted")
    acc = accuracy_score(label_ids, preds)
    return {"accuracy": acc, "f1": f1}

In [8]:
from transformers import AutoModelForSequenceClassification, set_seed

set_seed(42)

id2label = {0: "False", 1: "True"}
label2id = {"False": 0, "True": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=2, id2label=id2label, label2id=label2id
)

model.config.use_cache = False

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
from peft import LoraConfig, get_peft_model, TaskType

lora_alpha = 32
lora_rank_dropout = 0.1
lora_module_dropout = 0.0
lora_r = 16

peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=lora_r,
    lora_alpha=lora_alpha,
    bias="none",
    base_model_name_or_path=model_name
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [10]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

'NoneType' object has no attribute 'cadam32bit_grad_fp32'
trainable params: 591,362 || all params: 110,075,140 || trainable%: 0.5372


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [11]:
from transformers import TrainingArguments

new_model_name = "lora_fine_tuned_boolq"

training_args = TrainingArguments(
    output_dir=new_model_name,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps=50,
    evaluation_strategy='steps',
    max_steps=400,
    use_cpu=False,
    load_best_model_at_end=True
)

In [12]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

max_steps is given, it will override any value given in num_train_epochs


In [13]:
trainer.train()

  0%|          | 0/400 [00:00<?, ?it/s]

{'loss': 0.6762, 'grad_norm': 1.7553035020828247, 'learning_rate': 1.7500000000000002e-05, 'epoch': 4.17}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.594681441783905, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 1.0494, 'eval_samples_per_second': 17.153, 'eval_steps_per_second': 2.859, 'epoch': 4.17}
{'loss': 0.6639, 'grad_norm': 4.576698303222656, 'learning_rate': 1.5000000000000002e-05, 'epoch': 8.33}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5719298124313354, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4247, 'eval_samples_per_second': 42.382, 'eval_steps_per_second': 7.064, 'epoch': 8.33}
{'loss': 0.6555, 'grad_norm': 2.0179154872894287, 'learning_rate': 1.25e-05, 'epoch': 12.5}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5648499727249146, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4287, 'eval_samples_per_second': 41.986, 'eval_steps_per_second': 6.998, 'epoch': 12.5}
{'loss': 0.6605, 'grad_norm': 1.401183843612671, 'learning_rate': 1e-05, 'epoch': 16.67}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5614725351333618, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4258, 'eval_samples_per_second': 42.277, 'eval_steps_per_second': 7.046, 'epoch': 16.67}
{'loss': 0.6612, 'grad_norm': 2.8593122959136963, 'learning_rate': 7.500000000000001e-06, 'epoch': 20.83}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5568368434906006, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4351, 'eval_samples_per_second': 41.365, 'eval_steps_per_second': 6.894, 'epoch': 20.83}
{'loss': 0.6508, 'grad_norm': 8.425884246826172, 'learning_rate': 5e-06, 'epoch': 25.0}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5566861629486084, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4267, 'eval_samples_per_second': 42.185, 'eval_steps_per_second': 7.031, 'epoch': 25.0}
{'loss': 0.6491, 'grad_norm': 3.6379542350769043, 'learning_rate': 2.5e-06, 'epoch': 29.17}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5550094246864319, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4314, 'eval_samples_per_second': 41.722, 'eval_steps_per_second': 6.954, 'epoch': 29.17}
{'loss': 0.663, 'grad_norm': 2.2656919956207275, 'learning_rate': 0.0, 'epoch': 33.33}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5547106266021729, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6805555555555556, 'eval_runtime': 0.4845, 'eval_samples_per_second': 37.15, 'eval_steps_per_second': 6.192, 'epoch': 33.33}
{'train_runtime': 182.8067, 'train_samples_per_second': 17.505, 'train_steps_per_second': 2.188, 'train_loss': 0.6600276756286622, 'epoch': 33.33}


TrainOutput(global_step=400, training_loss=0.6600276756286622, metrics={'train_runtime': 182.8067, 'train_samples_per_second': 17.505, 'train_steps_per_second': 2.188, 'total_flos': 568286516146944.0, 'train_loss': 0.6600276756286622, 'epoch': 33.333333333333336})

In [14]:
trainer.evaluate()

  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.5547106266021729,
 'eval_accuracy': 0.7777777777777778,
 'eval_f1': 0.6805555555555556,
 'eval_runtime': 0.6583,
 'eval_samples_per_second': 27.343,
 'eval_steps_per_second': 4.557,
 'epoch': 33.333333333333336}

In [15]:
trainer.push_to_hub(new_model_name)



adapter_model.safetensors:   0%|          | 0.00/2.37M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/lenatr99/lora_fine_tuned_boolq/commit/920ab543dbac770d5e87c92d0fbdd39a24565b36', commit_message='lora_fine_tuned_boolq', commit_description='', oid='920ab543dbac770d5e87c92d0fbdd39a24565b36', pr_url=None, pr_revision=None, pr_num=None)

In [16]:
from transformers import pipeline

adapter_name = "lenatr99/" + new_model_name

# Example
text="passage : Bankovec za 20 evrov – Zaenkrat obstaja le ena celotna serija evrskih bankovcev, vendar pa izhaja nova serija, ki bo podobna sedanji. Evropska centralna banka bo pravočasno naznanila, kdaj bodo bankovci iz prve serije izgubili status zakonitega plačilnega sredstva. question : Ali je bankovec za 20 evrov iz prve serije še vedno zakonito plačilno sredstvo?"

classifier = pipeline("sentiment-analysis", model=adapter_name)
classifier(text)

adapter_config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[{'label': 'LABEL_0', 'score': 0.5762706398963928}]