In [None]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import matplotlib.pyplot as plt
from transformers import AdamW
from torch import nn
from sklearn.metrics import f1_score

# Încărcarea setului de date
dataset = load_dataset('squad')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')

def preprocess_data(examples):
    # Tokenize the questions and context
    inputs = tokenizer(
        examples['question'],
        examples['context'],
        max_length=512,
        truncation=True,
        padding="max_length"
    )
    
    # Handle the answers
    targets = []
    for answer in examples['answers']:
        if len(answer['text']) > 0:
            # Use the first answer in the list of answers
            target = tokenizer(
                answer['text'][0],
                max_length=128,
                truncation=True,
                padding="max_length"
            )['input_ids']
        else:
            # If no answer text, use an empty string
            target = tokenizer(
                "",
                max_length=128,
                truncation=True,
                padding="max_length"
            )['input_ids']
        targets.append(target)

    # Add the tokenized labels to inputs
    inputs['labels'] = targets
    return inputs


tokenized_dataset = dataset.map(preprocess_data, batched=True)
train_dataset = tokenized_dataset['train'].shuffle(seed=42).select(range(1000))  # Exemplu cu subset de date
val_dataset = tokenized_dataset['validation'].select(range(500))

class LoRA(nn.Module):
    def __init__(self, base_model, adapter_dim=8):
        super(LoRA, self).__init__()
        self.base_model = base_model
        self.adapter_dim = adapter_dim
        self.projection = nn.Linear(self.base_model.config.d_model, adapter_dim)
        self.adapter = nn.Linear(adapter_dim, self.base_model.config.vocab_size)

    def forward(self, input_ids, attention_mask, labels=None):
        # Automatically generate decoder inputs if labels are provided
        decoder_input_ids = None
        decoder_attention_mask = None
        if labels is not None:
            decoder_input_ids = labels[:, :-1]  # Shift labels to the right
            decoder_attention_mask = (decoder_input_ids != self.base_model.config.pad_token_id)

        output = self.base_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_input_ids=decoder_input_ids,
            decoder_attention_mask=decoder_attention_mask,
            output_hidden_states=True
        )
        hidden_states = output.hidden_states[-1]
        projected_hidden_states = self.projection(hidden_states)
        logits = self.adapter(projected_hidden_states)

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, logits.size(-1)), labels[:, 1:].contiguous().view(-1))
        return {"loss": loss, "logits": logits}

print(dataset['train'][0])  # Inspect a single example from the dataset


Reusing dataset squad (/home/viorel/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/88 [00:00<?, ?ba/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [3]:
# Încărcăm modelul BART și aplicăm LoRA
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
lora_model = LoRA(model)

TypeError: LoRA.__init__() missing 2 required positional arguments: 'adapter' and 'projection'

In [2]:
# Îngheață parametrii modelului original
for param in model.parameters():
    param.requires_grad = False
for param in lora_model.adapter.parameters():
    param.requires_grad = True

# Configurarea antrenamentului
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps",
    save_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    no_cuda=True
)

NameError: name 'model' is not defined

In [4]:
# Alegem optimizerul AdamW
optimizer = AdamW(lora_model.parameters(), lr=5e-5)

# Funcția de calculare a metricilor
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    # Shift labels to match predictions
    labels = labels[:, 1:]  # Exclude the first token (start token)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute F1 score
    f1 = f1_score(decoded_labels, decoded_preds, average="macro")
    return {"f1": f1}




In [5]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=lora_model)


In [6]:
# Creăm Trainer-ul
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Antrenăm modelul
trainer.train()

# Evaluăm modelul
results = trainer.evaluate()

  trainer = Trainer(


AttributeError: 'Seq2SeqLMOutput' object has no attribute 'hidden_states'

In [12]:
# Afisăm pierderea
plt.plot(results['eval_loss'], label="Loss")
plt.title('Performanța modelului BART-LORA')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

NameError: name 'batch' is not defined

In [11]:
# Vizualizăm exemple
for i in range(3):
    question = val_dataset['question'][i]
    context = val_dataset['context'][i]
    inputs = tokenizer(question, context, return_tensors="pt")
    output = lora_model.generate(inputs['input_ids'], max_length=50)
    print(f"Question: {question}")
    print(f"Answer: {tokenizer.decode(output[0], skip_special_tokens=True)}\n")

# Afisăm scorul F1
print(f"F1 Score: {results['eval_f1']}")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2048x50265 and 1024x512)