In [1]:
import torch
import time
import pandas as pd
import numpy as np  
import os
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from sklearn.model_selection import train_test_split
from datasets import Dataset
from datasets import load_dataset, load_metric
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType, PrefixTuningConfig, IA3Config
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import wandb
WANDB_API_KEY = os.environ.get("WANDB_API_KEY", None)
wandb.login(key=WANDB_API_KEY)

  from .autonotebook import tqdm as notebook_tqdm
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgonzalezmanfred309[0m ([33mgatonegro[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
model_name = "bert-base-multilingual-cased"
# Load the dataset
dataset = load_dataset('csv', data_files={
    'train': 'SuperGLUE-HumanT/csv/RTE/train.csv',
    'validation': 'SuperGLUE-HumanT/csv/RTE/val.csv'
})

# Tokenization
tokenizer = AutoTokenizer.from_pretrained(model_name)
def preprocess_function(examples):
    return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding='max_length', max_length=512)
dataset = dataset.map(preprocess_function, batched=True)

# Here you split the validation set into validation and test sets
test_train_split = dataset['validation'].train_test_split(test_size=0.5)
# Now you need to add these new sets back into your dataset
dataset['validation'] = test_train_split['train']
dataset['test'] = test_train_split['test']

# Now continue with label mapping
def label_mapping(example):
    label_dict = {'not_entailment': 0, 'entailment': 1}
    example['labels'] = label_dict[example['label']]
    return example
dataset = dataset.map(label_mapping)

# Set format for PyTorch
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])



In [7]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    f1 = f1_score(labels, predictions, average='weighted')  # Modify as needed
    accuracy = accuracy_score(labels, predictions)
    return {
        'f1': f1,
        'accuracy': accuracy
    }

In [8]:
def fine_tune_model(model_name, model, training_args, dataset):

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset['train'],
        eval_dataset=dataset['validation'],
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=6, early_stopping_threshold=0.01)]
    )

    start = time.time()
    trainer.train()
    elapsed_training = time.time() - start

    metrics = trainer.evaluate(dataset['test'])

    print(f"model: {model_name}, Dataset: Sentinews, Test Metrics: {metrics}")

    model.save_pretrained(f"{model_name}_RTE_FINAL")

    return model, metrics, elapsed_training
def run_lora_sloberta(dataset):
    task_type = TaskType.SEQ_CLS  # You might need a different TaskType depending on your exact use case

    training_args = TrainingArguments(
        output_dir=f"{model_name}-RTE",  # Change as needed
        learning_rate=1e-4,
        per_device_train_batch_size=32,  # Adjust based on your GPU memory
        per_device_eval_batch_size=32,
        num_train_epochs=15,
        weight_decay=0.1,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True
    )

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # Adjust num_labels as needed
    model = prepare_model_for_kbit_training(model, task_type)

    target_modules = (
        [
            "bert.encoder.layer." + str(i) + ".attention.self.query"
            for i in range(model.config.num_hidden_layers)
        ]
        + [
            "bert.encoder.layer." + str(i) + ".attention.self.key"
            for i in range(model.config.num_hidden_layers)
        ]
        + [
            "bert.encoder.layer." + str(i) + ".attention.self.value"
            for i in range(model.config.num_hidden_layers)
        ]
        + [
            "bert.encoder.layer." + str(i) + ".attention.output.dense"
            for i in range(model.config.num_hidden_layers)
        ]
    )

    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        task_type=task_type,
        bias="none",
        target_modules=target_modules,
    )

    model = get_peft_model(model, lora_config)
    print_trainable_parameters(model)
    
    _, metrics, elapsed_training = fine_tune_model(
        model_name, model, training_args, dataset
    )

    current_time = time.strftime("%Y-%m-%d-%H-%M-%S")
    with open("results.csv", "a") as f:
        f.write(
            f"{current_time},{model_name},Sentinews,{metrics},{metrics},{elapsed_training}\n"
        )

# Assuming 'documents' is your dataset
# You'll need to convert 'documents' into a format compatible with Hugging Face datasets
run_lora_sloberta(dataset)

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at EMBEDDIA/sloberta and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2951426 || all params: 113574916 || trainable%: 2.5986600773713096


  6%|▌         | 7/120 [00:02<00:43,  2.59it/s]
  7%|▋         | 8/120 [00:02<00:43,  2.59it/s]

{'eval_loss': 0.6896523833274841, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0755, 'eval_samples_per_second': 185.337, 'eval_steps_per_second': 13.238, 'epoch': 1.0}


 12%|█▎        | 15/120 [00:06<00:41,  2.52it/s]
 13%|█▎        | 16/120 [00:06<00:41,  2.52it/s]

{'eval_loss': 0.7079315781593323, 'eval_f1': 0.2571428571428571, 'eval_accuracy': 0.42857142857142855, 'eval_runtime': 0.0772, 'eval_samples_per_second': 181.329, 'eval_steps_per_second': 12.952, 'epoch': 2.0}


 19%|█▉        | 23/120 [00:09<00:38,  2.50it/s]
 20%|██        | 24/120 [00:09<00:38,  2.50it/s]

{'eval_loss': 0.6853160262107849, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0746, 'eval_samples_per_second': 187.777, 'eval_steps_per_second': 13.413, 'epoch': 3.0}


 26%|██▌       | 31/120 [00:12<00:34,  2.57it/s]
 27%|██▋       | 32/120 [00:13<00:34,  2.57it/s]

{'eval_loss': 0.6859315037727356, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0747, 'eval_samples_per_second': 187.485, 'eval_steps_per_second': 13.392, 'epoch': 4.0}


 32%|███▎      | 39/120 [00:16<00:31,  2.57it/s]
 33%|███▎      | 40/120 [00:16<00:31,  2.57it/s]

{'eval_loss': 0.6905068755149841, 'eval_f1': 0.38095238095238093, 'eval_accuracy': 0.5, 'eval_runtime': 0.0756, 'eval_samples_per_second': 185.192, 'eval_steps_per_second': 13.228, 'epoch': 5.0}


 39%|███▉      | 47/120 [00:19<00:28,  2.57it/s]
 40%|████      | 48/120 [00:19<00:28,  2.57it/s]

{'eval_loss': 0.6925612092018127, 'eval_f1': 0.45614035087719296, 'eval_accuracy': 0.5, 'eval_runtime': 0.0795, 'eval_samples_per_second': 176.044, 'eval_steps_per_second': 12.575, 'epoch': 6.0}


 46%|████▌     | 55/120 [00:22<00:25,  2.55it/s]
 47%|████▋     | 56/120 [00:22<00:25,  2.55it/s]

{'eval_loss': 0.6865971684455872, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0763, 'eval_samples_per_second': 183.551, 'eval_steps_per_second': 13.111, 'epoch': 7.0}


 52%|█████▎    | 63/120 [00:25<00:22,  2.56it/s]
 53%|█████▎    | 64/120 [00:26<00:21,  2.56it/s]

{'eval_loss': 0.6896091103553772, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0752, 'eval_samples_per_second': 186.161, 'eval_steps_per_second': 13.297, 'epoch': 8.0}


 59%|█████▉    | 71/120 [00:29<00:19,  2.56it/s]
 60%|██████    | 72/120 [00:29<00:18,  2.56it/s]

{'eval_loss': 0.692135214805603, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0754, 'eval_samples_per_second': 185.675, 'eval_steps_per_second': 13.262, 'epoch': 9.0}


 66%|██████▌   | 79/120 [00:32<00:15,  2.56it/s]
 67%|██████▋   | 80/120 [00:32<00:15,  2.56it/s]

{'eval_loss': 0.6996821761131287, 'eval_f1': 0.38095238095238093, 'eval_accuracy': 0.5, 'eval_runtime': 0.0793, 'eval_samples_per_second': 176.535, 'eval_steps_per_second': 12.61, 'epoch': 10.0}


 72%|███████▎  | 87/120 [00:35<00:13,  2.51it/s]
 73%|███████▎  | 88/120 [00:35<00:12,  2.51it/s]

{'eval_loss': 0.7013550400733948, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0784, 'eval_samples_per_second': 178.564, 'eval_steps_per_second': 12.755, 'epoch': 11.0}


 79%|███████▉  | 95/120 [00:38<00:09,  2.55it/s]
 80%|████████  | 96/120 [00:39<00:09,  2.55it/s]

{'eval_loss': 0.7061598896980286, 'eval_f1': 0.4155844155844156, 'eval_accuracy': 0.5714285714285714, 'eval_runtime': 0.0767, 'eval_samples_per_second': 182.426, 'eval_steps_per_second': 13.03, 'epoch': 12.0}


 80%|████████  | 96/120 [00:39<00:09,  2.43it/s]


{'train_runtime': 39.5233, 'train_samples_per_second': 88.049, 'train_steps_per_second': 3.036, 'train_loss': 0.6751779715220133, 'epoch': 12.0}


100%|██████████| 1/1 [00:00<00:00, 507.23it/s]


model: EMBEDDIA/sloberta, Dataset: Sentinews, Test Metrics: {'eval_loss': 0.7130671143531799, 'eval_f1': 0.3, 'eval_accuracy': 0.3333333333333333, 'eval_runtime': 0.0841, 'eval_samples_per_second': 178.432, 'eval_steps_per_second': 11.895, 'epoch': 12.0}
