# RoBERTuito k-folds - Versión

Autores:
- Pablo Quito
- Juan Valdiviezo 

In [None]:
!pip install transformers

In [None]:
from transformers import BertModel, AdamW, get_linear_schedule_with_warmup
import torch
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim import AdamW
import pandas as pd
from textwrap import wrap
from transformers import AutoTokenizer
from transformers import RobertaModel, AutoModel
import torch.nn as nn
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
# Inicialización
ROUTE = '/content/drive/My Drive/Intelektubies/Datos/Entrenamiento V4'
FILE_NAME = 'df_sinrelleno_entrenamiento_v4.xlsx'
RANDOM_SEED = 42
MAX_LEN = 130
BATCH_SIZE = 16  #Anterior 8
N_SPLITS = 5  # K-Folds
DATASET_PATH = ROUTE + '/' + FILE_NAME
NCLASES = 4  # Positivo, Negativo, Neutro, Alerta

np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("Fuente de datos: " + DATASET_PATH)
print("Dispositivo: " + str(device))
drive.mount('/content/drive')

In [None]:
# Cargar datos
df = pd.read_excel(DATASET_PATH)
print(df.shape)
print("\n".join(wrap(df['comentario'][666])))
df.head()

In [None]:
#Mapeo a las categorias para el modelo
df['sentimiento'] = df['sentimiento'].map({'Positiva': 2, 'Negativa': 0, 'Neutral': 1,'Alerta': 3}).astype(int)
df

In [None]:
# TOKENIZACIÓN
PRE_TRAINED_MODEL = 'pysentimiento/robertuito-base-uncased-emotion'
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED_MODEL)  # Usa AutoTokenizer para elegir el tokenizador correcto

In [None]:
#Ejemplo de tokenización
sample_txt = 'hola mundo'
tokens = tokenizer.tokenize(sample_txt)
tokens_id = tokenizer.convert_tokens_to_ids(tokens)
print(tokens)
tokens_id

In [None]:
#Codificación
encoding = tokenizer.encode_plus(
    sample_txt,
    max_length = 10,
    add_special_tokens = True, # Agrega [CLS] y [SEP]
    return_token_type_ids = False,
    padding='max_length',
    return_attention_mask = True,
    return_tensors = 'pt'
)

encoding.keys()

In [None]:
print(tokenizer.convert_ids_to_tokens(encoding['input_ids'][0]))
print(encoding['input_ids'][0])
print(encoding['attention_mask'][0])

In [None]:
#Crear el dataset
class IMDBDataset(Dataset):
    def __init__(self,comentarios,sentimiento,tokenizer,max_len):
        self.comentarios = comentarios
        self.sentimiento = sentimiento
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.comentarios)

    def __getitem__(self, item):
        comentario = str(self.comentarios[item])
        label = self.sentimiento[item]
        encoding = tokenizer.encode_plus(
            comentario,
            max_length = self.max_len,
            add_special_tokens = True, # Agrega [CLS] y [SEP]
            return_token_type_ids = False,
            padding='max_length',
            return_attention_mask = True,
            return_tensors = 'pt',
            truncation= True
        )
        return {
            'review': comentario,
            'input_ids':encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
            }

In [None]:
#Data Loader

def data_loader(df,tokenizer,max_len,batch_size):
    dataset = IMDBDataset(
        comentarios=df.comentario.to_numpy(),
        sentimiento = df.sentimiento.to_numpy(),
        tokenizer=tokenizer,
        max_len=MAX_LEN
    )
    #Definir Samplers para clases desbalanceadas

    #Calcular pesos inversos a la frecuencia de cada clase
    class_counts = df['sentimiento'].value_counts().sort_index().values
    class_weights = 1/np.array(class_counts)
    sample_weights = class_weights[df['sentimiento'].values]

    #Crear sampler ponderado
    sampler = WeightedRandomSampler(sample_weights,num_samples=len(sample_weights),replacement = True)
    return DataLoader(dataset,batch_size=BATCH_SIZE,num_workers=2,sampler = sampler)

In [None]:
#MODELO
class RoBERTtuitoSentimentClassifier(nn.Module):
    def __init__(self,n_classes):
        super(RoBERTtuitoSentimentClassifier,self).__init__()
        #self.roberta = RobertaModel.from_pretrained(PRE_TRAINED_MODEL)
        self.roberta = AutoModel.from_pretrained(PRE_TRAINED_MODEL,add_pooling_layer=False)
        self.drop = nn.Dropout(p=0.3)
        self.linear = nn.Linear(self.roberta.config.hidden_size,n_classes)
    def forward(self, input_ids, attention_mask):
        output = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        # RoBERTa doesn't use pooler_output like BERT
        # Use the first token's hidden state from the last_hidden_state
        cls_output = output['last_hidden_state'][:, 0, :]  # [batch_size, hidden_size]

        drop_output = self.drop(cls_output)
        output = self.linear(drop_output)
        return output

In [None]:
model = RoBERTtuitoSentimentClassifier(NCLASES)
model = model.to(device=device)

In [None]:
def train_model(model, data_loader, loss_fn, optimizer, device, scheduler, dataset_size, accumulation_steps=4):
    model.train()
    total_loss = 0
    correct_predictions = 0
    optimizer.zero_grad()  # Inicializa los gradientes

    for step, batch in enumerate(data_loader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs, labels) / accumulation_steps  # División para la acumulación
        loss.backward()  # Acumula gradientes

        # Actualiza cada 'accumulation_steps' iteraciones
        if (step + 1) % accumulation_steps == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        _, preds = torch.max(outputs, dim=1)
        correct_predictions += torch.sum(preds == labels)
        total_loss += loss.item() * accumulation_steps  # Se ajusta la pérdida

    # Actualización final si quedan gradientes acumulados
    if (step + 1) % accumulation_steps != 0:
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    return correct_predictions.double() / dataset_size, total_loss / len(data_loader)


In [None]:
target_names = ['class Negativo', 'class Neutral', 'class Positivo', 'class Alerta']
def eval_model_with_metrics(model, data_loader, loss_fn, device, n_examples, plot_confusion=False):
    model = model.eval()
    losses = []
    correct_preds = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():  # No modificar ningun parametro
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs, dim=1)
            loss = loss_fn(outputs, labels)
            correct_preds += torch.sum(preds == labels)
            losses.append(loss.item())

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    accuracy = correct_preds.double() / n_examples
    avg_loss = np.mean(losses)
    macro_f1 = f1_score(all_labels, all_preds, average='macro')

    # Métricas de evaluación usando scikit-learn
    print("Classification Report:\n", classification_report(all_labels, all_preds,target_names=target_names))
    print("Confusion Matrix:\n", confusion_matrix(all_labels, all_preds))
    print("Accuracy:", accuracy.item())
    print(f"Macro F1 Score: {macro_f1:.4f}")

    if plot_confusion:
        # Visualizar la matriz de confusión solo cuando se indique
        cm = confusion_matrix(all_labels, all_preds)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot(cmap=plt.cm.Blues)
        plt.title("Confusion Matrix")
        plt.show()

    return accuracy, avg_loss, macro_f1, all_labels, all_preds


In [None]:
#No usado en la versión final, pero útil para experimentación
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.1, path="best_model.pth"):
        """
        Args:
            patience (int): Número de épocas sin mejora antes de detener el entrenamiento.
            min_delta (float): Cambio mínimo en `val_loss` para considerar una mejora.
            path (str): Ruta donde se guardará el mejor modelo.
        """
        self.patience = patience
        self.min_delta = min_delta
        self.path = path
        self.best_loss = float('inf')
        self.counter = 0

    def __call__(self, val_loss, model, fold):
        """Verifica si se debe detener el entrenamiento."""
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            best_model_path = f"/content/drive/My Drive/Intelektubies/Modelos/RoBERTuito_folds/Early/model_fold_{fold}.pth"
            torch.save(model.state_dict(), best_model_path)  # Guarda el mejor modelo
            print(f"🔹 Mejor modelo guardado en {best_model_path} (Val Loss: {val_loss:.5f})")
        else:
            self.counter += 1
            print(f"⚠️ No hay mejora en {self.counter}/{self.patience} épocas.")

        if self.counter >= self.patience:
            print("⏹️ Early Stopping activado. Deteniendo el entrenamiento.")
            return True  # Se detiene el entrenamiento

        return False  # Continúa el entrenamiento


In [None]:
class EarlyStoppingF1:
    def __init__(self, patience=5, min_delta=0.001, path="best_model.pth"):
        self.patience = patience
        self.min_delta = min_delta
        self.path = path
        self.best_f1 = 0.0
        self.counter = 0

    def __call__(self, current_f1, model, fold):
        # Buscamos un incremento significativo en macro F1
        if current_f1 > self.best_f1 + self.min_delta:
            self.best_f1 = current_f1
            self.counter = 0
            best_model_path = f"/content/drive/My Drive/Intelektubies/Modelos/RoBERTuito_folds/Early/model_fold_{fold}.pth"
            torch.save(model.state_dict(), best_model_path)
            print(f"🔹 Mejor modelo guardado en {best_model_path} (Macro F1: {current_f1:.4f})")
        else:
            self.counter += 1
            print(f"⚠️ No hay mejora en {self.counter}/{self.patience} épocas (Macro F1: {current_f1:.4f}).")

        if self.counter >= self.patience:
            print("⏹️ Early Stopping activado. Deteniendo el entrenamiento.")
            return True
        return False


In [None]:
EPOCHS = 100
N_SPLITS = 5
skf = StratifiedKFold(n_splits=N_SPLITS,shuffle=True,random_state=RANDOM_SEED)
# Almacenar métricas de cada fold
fold_results = {
    'train_loss': [],
    'val_loss': [],
    'train_acc': [],
    'val_acc': []
}


In [None]:
# Entrenamiento con K-Folds
for fold, (train_idx, val_idx) in enumerate(skf.split(df['comentario'], df['sentimiento'])):
    print(f"Fold {fold + 1}/{N_SPLITS}")
    print("-" * 10)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    # Dividir datos en entrenamiento y validación
    df_train = df.iloc[train_idx]
    df_val = df.iloc[val_idx]

    # Crear DataLoaders
    train_data_loader = data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
    val_data_loader = data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)

    # Inicializar modelo, optimizador y scheduler
    model = RoBERTtuitoSentimentClassifier(NCLASES).to(device)

    optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.001) # antes weight_decay=0.005

    print(len(train_data_loader))
    total_steps = len(train_data_loader) * EPOCHS
    scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )# int(0.2 * total_steps) num_warmup_steps antes
    # Calcular pesos de clase a partir del dataset
    class_counts = df['sentimiento'].value_counts().sort_index().values
    class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights.to(device), label_smoothing=0.1).to(device)


    # Inicializar Early Stopping
    early_stoppingf1 = EarlyStoppingF1(patience=5, min_delta=0.01)

    best_val_loss = float("inf")

    # Entrenar y evaluar
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}/{EPOCHS}")

        train_acc, train_loss = train_model(
            model,
            train_data_loader,
            loss_fn,
            optimizer,
            device,
            scheduler,
            len(df_train),
            accumulation_steps=2
        )
        val_acc, val_loss, macro_f1, test_labels, test_preds = eval_model_with_metrics(
            model,
            val_data_loader,
            loss_fn,
            device,
            len(df_val)
        ) #Se agrego macro_f1

        # Guardar métricas de la época actual
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc.item())
        val_accuracies.append(val_acc.item())

        print(f"📉 Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"📈 Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
        print()

        # Verificar si hay que detener el entrenamiento por loss function
        #if early_stopping(val_loss, model, fold):
            #break  # Se interrumpe el entrenamiento si no hay mejora
        # macro F1 para Early Stopping
        if early_stoppingf1(macro_f1, model, fold):
          break

    # Guardar métricas del mejor modelo del fold
    fold_results['train_loss'].append(min(train_losses))
    #fold_results['val_loss'].append(early_stopping.best_loss)
    fold_results['train_acc'].append(max(train_accuracies))
    fold_results['val_acc'].append(max(val_accuracies))

print("\n🔹 Entrenamiento completado. Mejores resultados por fold:")
print(fold_results)

In [None]:
# Rango de épocas - Ultimo fold
epochs_range = range(1, len(train_losses) + 1)

# Gráfico de la pérdida (loss) por época
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Train Loss')
plt.plot(epochs_range, val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss per Epoch')
plt.legend()
plt.tight_layout()
plt.show()

# Gráfico de la precisión (accuracy) por época
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Train Accuracy')
plt.plot(epochs_range, val_accuracies, label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy per Epoch')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
def classifySentiment(review_text):
    encoding_review = tokenizer.encode_plus(
        review_text,
        max_length=MAX_LEN,  # Usamos el mismo MAX_LEN que en entrenamiento
        add_special_tokens=True,
        return_token_type_ids=False,
        padding='max_length',
        return_attention_mask=True,
        return_tensors='pt'
    )

    input_ids = encoding_review['input_ids'].to(device)
    attention_mask = encoding_review['attention_mask'].to(device)
    output = model(input_ids, attention_mask)
    _, prediction = torch.max(output, dim=1)

    sentiment_mapping = {0: 'Negativo', 1: 'Neutral', 2: 'Positivo', 3: 'Alerta'}
    predicted_sentiment = sentiment_mapping[prediction.item()]
    print(f"Texto: {review_text}")
    print(f"Sentimiento predicho: {predicted_sentiment}")

    return predicted_sentiment


## Test preliminar - Comentarios de ejemplo autogenerados

In [None]:
sample_txt = 'Debe mejorar: Muy buen profesor, pero debería ser más exigente porque cualquiera puede pasar'

print(classifySentiment(sample_txt))

In [None]:
sample_list = ['Tiene una actitud sumamente correcta que se irradia a los estudiantes',
  'Su conocimiento',
  'respeta los horarios de clases',
  'nada, no sabe enseñar y no sabe la materia practica',
  'Excelente docente, nos guio para poder sobresalir siempre en su materia y nos enseno del futuro de un ingeniero.',
  'es bien organizada y exigente',
  'Buen dominio en la MATERIA',
  'nada, no sabe enseñar y no sabe la materia practica',
  'El interés por fomentar el trabajo en equipo',
  'Buena enseñanza',
  'conocimientos  de la asignatura',
  'Desarrollo de la materia  con claridad',
  'SU CONOCIMIENTO DE LA ASIGNATURA',
  'Su compromiso',
  'Su puntualidad',
  'la puntualidad',
  '-Capacidad de aprendizaje. -Conocimiento de los temas. -CAPACIDAD PARA PROPONER ACTIVIDADES RELACIONADAS CON CADA UNO DE LOS TEMAS DICTADOS EN CLASE.',
  'Conceptos generales',
  'mucho conocimiento de la asignatura',
  'SU compresion de la materia']


for sample in sample_list:
  print(classifySentiment(sample))

In [None]:
sample_txt = 'En una práctica, no respondí una pregunta y la docente asumió que no asistí, dándome cero en el informe.luego en clase, mencionó el incidente avergonzándome frente a todos y no solo ami sino a dos estudiantes mas.no deberia hacer eso'
print(classifySentiment(sample_txt))