# Fine tuning Large Langua Model(LLM) - distilber base uncased

## Libraries

In [39]:
import transformers
from datasets import Dataset, DatasetDict, ClassLabel
import pandas as pd
import numpy as np
import evaluate
import torch
from transformers import pipeline

## Dataset 
Se cargan los datasets de `training`, `validation` y `test`. Se utiliza cada uno para:
* `training` = Realizar el fine tunning del modelo.
* `validation` = validar el proceso de fine tunning.
* `test` = testear el modelo con nuevos datos.

In [20]:
df_train = pd.read_csv("../data/data_to_model/train_data.csv")
df_val = pd.read_csv("../data/data_to_model/val_data.csv")
df_test = pd.read_csv("../data/data_to_model/test_data.csv")

### Consolidating training y test:

In [21]:
# Convertir cada DataFrame en un Dataset de Hugging Face
train_dataset = Dataset.from_pandas(df_train)
val_dataset = Dataset.from_pandas(df_val)
test_dataset = Dataset.from_pandas(df_test)

# Codificar la columna 'emotion' en un ClassLabel para cada conjunto de datos
train_dataset = train_dataset.class_encode_column("emotion")
val_dataset = val_dataset.class_encode_column("emotion")
# test_dataset = test_dataset.class_encode_column("emotion")

# Crear el DatasetDict con los tres conjuntos
dataset_dict = DatasetDict(
    {
        "train": train_dataset,
        "validation": val_dataset,
        #    "test": test_dataset
    }
)

# Verificar el resultado
dataset_dict

Casting to class labels:   0%|          | 0/6027 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/753 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'emotion'],
        num_rows: 6027
    })
    validation: Dataset({
        features: ['text', 'emotion'],
        num_rows: 753
    })
})

In [22]:
from transformers import AutoTokenizer

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=True)


def preprocess_function(examples, func_tokenizer):
    return func_tokenizer(examples["text"], truncation=True)


tokenized_dataset = dataset_dict.map(
    preprocess_function,
    batched=True,
    # num_proc=20,
    fn_kwargs={"func_tokenizer": tokenizer},
)

tokenized_dataset = tokenized_dataset.rename_column("emotion", "label")
print(tokenized_dataset)

Map:   0%|          | 0/6027 [00:00<?, ? examples/s]

Map:   0%|          | 0/753 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 6027
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 753
    })
})


In [23]:
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification

id2label = {
    0: "anger",
    1: "disgust",
    2: "fear",
    3: "guilt",
    4: "joy",
    5: "sadness",
    6: "shame",
}
label2id = {
    "anger": 0,
    "disgust": 1,
    "fear": 2,
    "guilt": 3,
    "joy": 4,
    "sadness": 5,
    "shame": 6,
}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=7, id2label=id2label, label2id=label2id
).to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
training_args = transformers.TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    num_train_epochs=20,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    metric_for_best_model="f1",  # Métrica a usar para seleccionar el mejor modelo
    save_total_limit=3,  # Limitar la cantidad de checkpoints guardados
    greater_is_better=True,
)

In [32]:
# Crear el callback de Early Stopping
early_stopping_callback = transformers.EarlyStoppingCallback(
    early_stopping_patience=10,  # Número de épocas sin mejora antes de detener
    early_stopping_threshold=0.001,  # Opcional: mejora mínima requerida para resetear el contador
)

In [33]:
recall = evaluate.load("recall")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
f1 = evaluate.load("f1")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    pre = precision.compute(
        predictions=predictions, references=labels, average="weighted"
    )["precision"]
    rec = recall.compute(predictions=predictions, references=labels, average="micro")[
        "recall"
    ]
    f1_score = f1.compute(predictions=predictions, references=labels, average="micro")[
        "f1"
    ]
    acc = accuracy.compute(predictions=predictions, references=labels)["accuracy"]

    return {"precision": pre, "recall": rec, "f1": f1_score, "accuracy": acc}


trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping_callback],
)


trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.11613,0.686659,0.673307,0.677313,0.673307
2,No log,2.195333,0.676804,0.675963,0.674601,0.675963
3,No log,2.150121,0.677109,0.675963,0.674745,0.675963
4,No log,2.26095,0.692747,0.679947,0.683077,0.679947
5,No log,2.329257,0.696739,0.685259,0.686875,0.685259
6,0.013100,2.302621,0.699487,0.689243,0.690085,0.689243
7,0.013100,2.31132,0.676373,0.675963,0.674879,0.675963
8,0.013100,2.299111,0.681135,0.671979,0.674979,0.671979
9,0.013100,2.365772,0.699853,0.675963,0.682047,0.675963
10,0.013100,2.292196,0.70498,0.697211,0.699276,0.697211


TrainOutput(global_step=1900, training_loss=0.007486833083002191, metrics={'train_runtime': 633.1517, 'train_samples_per_second': 190.381, 'train_steps_per_second': 3.001, 'total_flos': 1820208203799654.0, 'train_loss': 0.007486833083002191, 'epoch': 20.0})

In [36]:
trainer.evaluate()

{'eval_loss': 2.323112726211548,
 'eval_precision': 0.7103782233582198,
 'eval_recall': 0.6985391766268261,
 'eval_f1': 0.6997722522822101,
 'eval_accuracy': 0.6985391766268261,
 'eval_runtime': 2.2358,
 'eval_samples_per_second': 336.793,
 'eval_steps_per_second': 5.367,
 'epoch': 20.0}

In [38]:
trainer.save_model("models/distilbert-base-uncased-finetuned")

In [81]:
## trainer.save_model("./my_model")
# text = "You shouldnt said that. I hate you!"
# classifier = pipeline(
#    "text-classification",
#    model="models/distilbert-base-uncased-finetuned/",
#    device="cuda",
# )
# print(classifier(text))

[{'label': 'guilt', 'score': 0.5378169417381287}]


### Using test set to evaluate the model performance.

In [37]:
# Convertir el conjunto de prueba en un Dataset de Hugging Face y codificar la columna 'emotion'
test_dataset = Dataset.from_pandas(df_test)
test_dataset = test_dataset.class_encode_column("emotion")

# Tokenizar el conjunto de prueba
tokenized_test_dataset = test_dataset.map(
    preprocess_function,
    batched=True,
    fn_kwargs={"func_tokenizer": tokenizer},
)

tokenized_test_dataset = tokenized_test_dataset.rename_column("emotion", "label")

# Evaluar el modelo en el conjunto de prueba
test_results = trainer.evaluate(eval_dataset=tokenized_test_dataset)

# Imprimir los resultados de las métricas en el conjunto de prueba
test_results

Casting to class labels:   0%|          | 0/754 [00:00<?, ? examples/s]

Map:   0%|          | 0/754 [00:00<?, ? examples/s]

{'eval_loss': 2.168704032897949,
 'eval_precision': 0.6977283244146201,
 'eval_recall': 0.6923076923076923,
 'eval_f1': 0.6926315254997579,
 'eval_accuracy': 0.6923076923076923,
 'eval_runtime': 2.4092,
 'eval_samples_per_second': 312.963,
 'eval_steps_per_second': 4.981,
 'epoch': 20.0}