In [None]:
!pip install -q -U trl transformers accelerate peft datasets bitsandbytes evaluate git+https://github.com/huggingface/huggingface_hub

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [3]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig

model_name = "xlm-roberta-base"

In [None]:
from datasets import load_dataset

dataset = load_dataset("tjasad/Slovene_SuperGLUE_BoolQ")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
def preprocess_data(examples):

  passage_inputs = [f"passage : {x} " for x in examples["passage"]]
  question_inputs = [f"question : {x} " for x in examples["question"]]
  inputs = [passage_input + question_input for passage_input, question_input in zip(passage_inputs, question_inputs)]

  model_inputs = tokenizer(inputs, max_length=400, truncation=True)

  model_inputs["labels"] = examples["label"]
  model_inputs["labels"] = [int(label) for label in model_inputs["labels"]]

  return model_inputs

tokenized_dataset = dataset.map(preprocess_data, remove_columns=['idx', 'passage', 'label', 'question'], batched=True)

tokenized_train = tokenized_dataset["train"]
tokenized_val = tokenized_dataset["eval"]

In [7]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [8]:
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}

In [None]:
id2label = {0: "False", 1: "True"}
label2id = {"False": 0, "True": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=2, id2label=id2label, label2id=label2id
)

model.config.use_cache = False

In [10]:
from peft import  get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

NUM_VIRTUAL_TOKENS = 12

peft_config = PromptTuningConfig(
    peft_type="PROMPT_TUNING",
    task_type=TaskType.SEQ_CLS,
    num_virtual_tokens=NUM_VIRTUAL_TOKENS,
    num_layers=6,
    token_dim=768,
    num_attention_heads=12,
    tokenizer_name_or_path=model_name #The pre-trained model
)

In [11]:
model = get_peft_model(model, peft_config)
print(model.print_trainable_parameters())

trainable params: 601,346 || all params: 278,646,532 || trainable%: 0.2158
None


In [18]:
from transformers import TrainingArguments

new_model_name = "prompt_fine_tuned_boolq_XLMroberta"

training_args = TrainingArguments(
    output_dir=new_model_name,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps=50,
    evaluation_strategy='steps',
    max_steps=400,
    use_cpu=False,
    load_best_model_at_end=True
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [14]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy,F1
50,0.6671,0.594216,0.777778,0.680556
100,0.6529,0.578634,0.777778,0.680556
150,0.6499,0.577928,0.777778,0.680556
200,0.6527,0.578189,0.777778,0.680556
250,0.6471,0.571583,0.777778,0.680556
300,0.6533,0.57096,0.777778,0.680556
350,0.6599,0.569052,0.777778,0.680556
400,0.6552,0.569383,0.777778,0.680556


TrainOutput(global_step=400, training_loss=0.654756841659546, metrics={'train_runtime': 109.5461, 'train_samples_per_second': 29.211, 'train_steps_per_second': 3.651, 'total_flos': 430575540673440.0, 'train_loss': 0.654756841659546, 'epoch': 33.333333333333336})

In [None]:
# save model to hub
model_location = "tjasad" + new_model_name
trainer.push_to_hub(model_location)

In [16]:
trainer.evaluate()

{'eval_loss': 0.5693830847740173,
 'eval_accuracy': 0.7777777777777778,
 'eval_f1': 0.6805555555555557,
 'eval_runtime': 0.4474,
 'eval_samples_per_second': 40.236,
 'eval_steps_per_second': 6.706,
 'epoch': 33.333333333333336}

In [17]:
# Example
text="passage : Bankovec za 20 evrov – Zaenkrat obstaja le ena celotna serija evrskih bankovcev, vendar pa izhaja nova serija, ki bo podobna sedanji. Evropska centralna banka bo pravočasno naznanila, kdaj bodo bankovci iz prve serije izgubili status zakonitega plačilnega sredstva. question : Ali je bankovec za 20 evrov iz prve serije še vedno zakonito plačilno sredstvo?"

classifier = pipeline("sentiment-analysis", model='tjasad/prompt_fine_tuned_boolq')
classifier(text)



config.json:   0%|          | 0.00/750 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/490 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

[{'label': 'LABEL_0', 'score': 0.5092488527297974}]