<img src="https://minas.medellin.unal.edu.co/eventos/latwaves2018/images/logo_unal.png" alt="unal logo" width="500"/>

#**Trabajo Final de Maestría**
##Ingeniería de Sistemas y Computación - Perfil profundización.
Joan Gabriel Bofill Barrera

Inspirado en el trabjo de ShreyasS Daniel Gaddam: https://www.kaggle.com/code/shreydan/deberta-v3-base-accelerate-finetuning

Inspirado en el trabjo de Rohit Singh: https://github.com/rohitsingh02/kaggle-feedback-english-language-learning-1st-place-solution

In [None]:
pip install accelerate

Collecting accelerate
  Downloading accelerate-0.26.1-py3-none-any.whl (270 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.26.1


In [None]:
pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [None]:
from transformers import AutoModel
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import sentencepiece
import gc

from accelerate import Accelerator

# ----------
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
config = {
    'model': 'microsoft/deberta-v3-base',
    'dropout': 0.1,
    'max_length': 1024,
    'batch_size': 8, # anything more results in CUDA OOM [for unfreezed encoder] on Kaggle GPU
    'epochs': 10,
    'lr': 3e-4,
    'enable_scheduler': True,
    'scheduler': 'CosineAnnealingWarmRestarts',
    'gradient_accumulation_steps': 2,
    'adam_eps': 1e-6, # 1e-8 default
    'freeze_encoder': True,
    'awp_start': 6
}

In [None]:
access_token = "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"
tokenizer = AutoTokenizer.from_pretrained(config['model'], token= access_token)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv("/content/drive/MyDrive/Tesis/train.csv")
#test0_df = pd.read_csv("/content/drive/MyDrive/Tesis/test.csv")#df.shape

In [None]:
train_ratio = 0.8
test_ratio = 0.2
val_ratio = 0.1

# Divide los datos en conjuntos de entrenamiento, prueba y validación
main_df, test_df = train_test_split(df, test_size= 1 - train_ratio, random_state=42)


In [None]:
main_df.shape

(3128, 8)

In [None]:
class EssayDataset:
    def __init__(self, df, config, tokenizer=None, is_test=False):
        self.df = df.reset_index(drop=True)
        self.classes = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']
        self.max_len = config['max_length']
        self.tokenizer = tokenizer
        self.is_test = is_test

    def __getitem__(self,idx):
        sample = self.df['full_text'][idx]
        tokenized = tokenizer.encode_plus(sample,
                                          None,
                                          add_special_tokens=True,
                                          max_length=self.max_len,
                                          truncation=True,
                                          padding='max_length'
                                         )
        inputs = {
            "input_ids": torch.tensor(tokenized['input_ids'], dtype=torch.long),
            "token_type_ids": torch.tensor(tokenized['token_type_ids'], dtype=torch.long),
            "attention_mask": torch.tensor(tokenized['attention_mask'], dtype=torch.long)
        }

        if self.is_test == True:
            return inputs

        label = self.df.loc[idx,self.classes].to_list()
        targets = {
            "labels": torch.tensor(label, dtype=torch.float32),
        }

        return inputs, targets

    def __len__(self):
        return len(self.df)

#Caso 1 conjunto de validación

In [None]:
train_df, val_df = train_test_split(main_df,test_size=val_ratio,random_state=1357,shuffle=True)
print('dataframe shapes:',train_df.shape, val_df.shape)

dataframe shapes: (2815, 8) (313, 8)


In [None]:
train_ds = EssayDataset(train_df, config, tokenizer=tokenizer)
val_ds = EssayDataset(val_df, config, tokenizer=tokenizer)
test_ds = EssayDataset(test_df, config, tokenizer=tokenizer, is_test = True)

In [None]:
test_ds[0]

{'input_ids': tensor([   1,  325, 5973,  ...,    0,    0,    0]),
 'token_type_ids': tensor([0, 0, 0,  ..., 0, 0, 0]),
 'attention_mask': tensor([1, 1, 1,  ..., 0, 0, 0])}

In [None]:
train_loader = torch.utils.data.DataLoader(train_ds,
                                           batch_size=config['batch_size'],
                                           shuffle=True,
                                           num_workers=2,
                                           pin_memory=True
                                          )
val_loader = torch.utils.data.DataLoader(val_ds,
                                         batch_size=config['batch_size'],
                                         shuffle=True,
                                         num_workers=2,
                                         pin_memory=True
                                        )


#Model

In [None]:
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        last_hidden_state[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
        max_embeddings = torch.max(last_hidden_state, 1)[0]
        return max_embeddings

class AttentionPooling(nn.Module):
    def __init__(self, hidden_size):
        super(AttentionPooling, self).__init__()
        self.attention_weights = nn.Linear(hidden_size, 1)

    def forward(self, last_hidden_state, attention_mask):
        attention_scores = self.attention_weights(last_hidden_state)
        attention_scores = attention_scores.squeeze(-1).masked_fill(attention_mask == 0, -1e9)
        attention_probs = nn.functional.softmax(attention_scores, dim=-1)
        weighted_embeddings = last_hidden_state * attention_probs.unsqueeze(-1)
        summed_embeddings = weighted_embeddings.sum(1)
        return summed_embeddings

In [None]:
# WLP
class WeightedLayerPooling(nn.Module):
    def __init__(self, num_hidden_layers, layer_start: int = 4, layer_weights = None):
        super(WeightedLayerPooling, self).__init__()
        self.layer_start = layer_start
        self.num_hidden_layers = num_hidden_layers
        self.layer_weights = layer_weights if layer_weights is not None \
            else nn.Parameter(
                torch.tensor([1] * (num_hidden_layers+1 - layer_start), dtype=torch.float)
            )

    def forward(self, features):
        ft_all_layers = features['all_layer_embeddings']

        all_layer_embedding = torch.stack(ft_all_layers)
        all_layer_embedding = all_layer_embedding[self.layer_start:, :, :, :]

        weight_factor = self.layer_weights.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).expand(all_layer_embedding.size())
        weighted_average = (weight_factor*all_layer_embedding).sum(dim=0) / self.layer_weights.sum()

        features.update({'token_embeddings': weighted_average})
        return features

# GeM
class GeMText(nn.Module):
    def __init__(self, dim=1, cfg=None, p=3, eps=1e-6):
        super(GeMText, self).__init__()
        self.dim = dim
        self.p = Parameter(torch.ones(1) * p)
        self.eps = eps
        self.feat_mult = 1
        # x seeems last hidden state

    def forward(self, x, attention_mask):
        attention_mask_expanded = attention_mask.unsqueeze(-1).expand(x.shape)
        x = (x.clamp(min=self.eps) * attention_mask_expanded).pow(self.p).sum(self.dim)
        ret = x / attention_mask_expanded.sum(self.dim).clip(min=self.eps)
        ret = ret.pow(1 / self.p)
        return ret

In [None]:
class LSTMPooling(nn.Module):
    def __init__(self, num_layers, hidden_size, hiddendim_lstm, dropout_rate, is_lstm=True):
        super(LSTMPooling, self).__init__()
        self.num_hidden_layers = num_layers
        self.hidden_size = hidden_size
        self.hiddendim_lstm = hiddendim_lstm

        if is_lstm:
            self.lstm = nn.LSTM(self.hidden_size, self.hiddendim_lstm, batch_first=True)
        else:
            self.lstm = nn.GRU(self.hidden_size, self.hiddendim_lstm, batch_first=True, bidirectional=True)

        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, all_hidden_states):
        hidden_states = torch.stack([all_hidden_states[layer_i][:, 0].squeeze()
                                     for layer_i in range(1, self.num_hidden_layers + 1)], dim=-1)
        hidden_states = hidden_states.view(-1, self.num_hidden_layers, self.hidden_size)
        out, _ = self.lstm(hidden_states, None)
        out = self.dropout(out[:, -1, :])
        return out

In [None]:
class AWP:
    def __init__(self, model, optimizer, *, adv_param='weight',
                 adv_lr=0.001, adv_eps=0.001):
        self.model = model
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.backup = {}

    def perturb(self, input_ids, attention_mask, y, criterion):
        """
        Perturb model parameters for AWP gradient
        Call before loss and loss.backward()
        """
        self._save()  # save model parameters
        self._attack_step()  # perturb weights

    def _attack_step(self):
        e = 1e-6
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                grad = self.optimizer.state[param]['exp_avg']
                norm_grad = torch.norm(grad)
                norm_data = torch.norm(param.detach())

                if norm_grad != 0 and not torch.isnan(norm_grad):
                    # Set lower and upper limit in change
                    limit_eps = self.adv_eps * param.detach().abs()
                    param_min = param.data - limit_eps
                    param_max = param.data + limit_eps

                    # Perturb along gradient
                    # w += (adv_lr * |w| / |grad|) * grad
                    param.data.add_(grad, alpha=(self.adv_lr * (norm_data + e) / (norm_grad + e)))

                    # Apply the limit to the change
                    param.data.clamp_(param_min, param_max)

    def _save(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                if name not in self.backup:
                    self.backup[name] = param.clone().detach()
                else:
                    self.backup[name].copy_(param.data)

    def restore(self):
        """
        Restore model parameter to correct position; AWP do not perturbe weights, it perturb gradients
        Call after loss.backward(), before optimizer.step()
        """
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data.copy_(self.backup[name])

In [None]:
class EssayModel(nn.Module):
    def __init__(self,config,num_classes=6):
        super(EssayModel,self).__init__()
        self.model_name = config['model']
        self.freeze = config['freeze_encoder']

        self.encoder = AutoModel.from_pretrained(self.model_name, token= access_token)
        if self.freeze:
            for param in self.encoder.base_model.parameters():
                param.requires_grad = False

        self.mean_pooler = MeanPooling()
        self.max_pooler = MaxPooling()
        self.attention_pooler = AttentionPooling(self.encoder.config.hidden_size)
        #self.weighted_pooler = WeightedLayerPooling(self.encoder.config.num_hidden_layers, layer_start=4)

        pooled_output_dim = 3 * self.encoder.config.hidden_size


        #self.dropout = nn.Dropout(config['dropout'])
        self.dropouts = nn.ModuleList([
                nn.Dropout(config['dropout']*i) for i in range(1,6)
            ])

        self.fc1 = nn.Linear(pooled_output_dim,64)
        self.fc2 = nn.Linear(64,num_classes)


    def forward(self,inputs):
        outputs = self.encoder(**inputs,return_dict=True)
        #outputs = self.pooler(outputs['last_hidden_state'], inputs['attention_mask'])
        mean_pooled  = self.mean_pooler(outputs['last_hidden_state'], inputs['attention_mask'])
        max_pooled  = self.max_pooler(outputs['last_hidden_state'], inputs['attention_mask'])
        attention_pooled  = self.max_pooler(outputs['last_hidden_state'], inputs['attention_mask'])
        outputs = torch.cat((mean_pooled,max_pooled,attention_pooled),dim=1)
        #outputs = self.attention_pooler(outputs)
        dropout_sum = torch.zeros_like(self.fc2(self.fc1(outputs)))


        for dropout in self.dropouts:
            dropout_output = dropout(outputs)
            dropout_output = self.fc1(dropout_output)
            dropout_sum += self.fc2(dropout_output)

        outputs = dropout_sum / len(self.dropouts)
        return outputs

In [None]:
class Trainer:
    def __init__(self, model, loaders, config, accelerator):
        self.model = model
        self.train_loader, self.val_loader = loaders
        self.config = config
        self.input_keys = ['input_ids','token_type_ids','attention_mask']
        self.accelerator = accelerator
        self.awp_start = config['awp_start']
        self.optim = self._get_optim()

        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optim, T_0=5,eta_min=1e-7)
        # Inicializa AWP
        self.awp = AWP(model, self.optim, adv_lr=0.001, adv_eps=0.001)
        self.train_losses = []
        self.val_losses = []
        self.best_val_loss = float('inf')
        self.best_model_weights = None

    def prepare(self):
        self.model, self.optim, self.train_loader, self.val_loader, self.scheduler, self.awp = self.accelerator.prepare(
            self.model,
            self.optim,
            self.train_loader,
            self.val_loader,
            self.scheduler,
            self.awp
        )

    def _get_optim(self):
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=self.config['lr'], eps=self.config['adam_eps'])
        return optimizer


    def loss_fn(self, outputs, targets):
        colwise_rmse = torch.sqrt(torch.mean(torch.square(targets - outputs), dim=0))
        loss = torch.mean(colwise_rmse, dim=0)
        return loss


    def train_one_epoch(self,epoch):

        running_loss = 0.
        progress = tqdm(self.train_loader, total=len(self.train_loader))

        for idx,(inputs,targets) in enumerate(progress):
            with self.accelerator.accumulate(self.model):

                outputs = self.model(inputs)
                if epoch >= self.awp_start:
                    #print('Enable AWP')
                    self.awp.perturb(inputs['input_ids'], inputs['attention_mask'], targets['labels'], self.loss_fn)

                loss = self.loss_fn(outputs, targets['labels'])
                running_loss += loss.item()

                self.accelerator.backward(loss)
                self.awp.restore()

                self.optim.step()


                if self.config['enable_scheduler']:
                    self.scheduler.step(epoch - 1 + idx / len(self.train_loader))

                self.optim.zero_grad()

                del inputs, targets, outputs, loss


        train_loss = running_loss/len(self.train_loader)
        self.train_losses.append(train_loss)

    @torch.no_grad()
    def valid_one_epoch(self,epoch):

        running_loss = 0.
        progress = tqdm(self.val_loader, total=len(self.val_loader))

        for (inputs, targets) in progress:

            outputs = self.model(inputs)

            loss = self.loss_fn(outputs, targets['labels'])
            running_loss += loss.item()

            del inputs, targets, outputs, loss


        val_loss = running_loss/len(self.val_loader)
        self.val_losses.append(val_loss)
        if running_loss < self.best_val_loss:
            self.best_val_loss = running_loss
            self.best_model_weights = self.model.state_dict().copy()  # Guarda los pesos del modelo


    def test(self, test_loader):

        preds = []
        for (inputs) in test_loader:

            outputs = self.model(inputs)
            preds.append(outputs.detach().cpu())

        preds = torch.concat(preds)
        return preds

    def fit(self):

        self.prepare()

        fit_progress = tqdm(
            range(1, self.config['epochs']+1),
            leave = True,
            desc="Training..."
        )

        for epoch in fit_progress:

            self.model.train()
            fit_progress.set_description(f"EPOCH {epoch} / {self.config['epochs']} | training...")
            self.train_one_epoch(epoch)
            self.clear()

            self.model.eval()
            fit_progress.set_description(f"EPOCH {epoch} / {self.config['epochs']} | validating...")
            self.valid_one_epoch(epoch)
            self.clear()

            print(f"{'➖️'*10} EPOCH {epoch} / {self.config['epochs']} {'➖️'*10}")
            print(f"train loss: {self.train_losses[-1]}")
            print(f"valid loss: {self.val_losses[-1]}\n\n")

        # Al final del entrenamiento, restablece el modelo a su mejor estado
        if self.best_model_weights is not None:
            self.model.load_state_dict(self.best_model_weights)



    def clear(self):
        gc.collect()
        torch.cuda.empty_cache()

    def predict(self, test_loader):
        """ Realiza predicciones en un conjunto de datos de prueba. """
        self.model.eval()
        predictions = []
        for batch in test_loader:
            inputs = {key: val.to(self.accelerator.device) for key, val in batch.items()}
            outputs = self.model(inputs)
            predictions.append(outputs.detach().cpu().numpy())

        return np.concatenate(predictions, axis=0)

    def save_model(self, file_path):
        """ Guarda el modelo entrenado. """
        torch.save(self.model.state_dict(), file_path)

    def load_model(self, file_path):
        """ Carga los pesos del modelo desde un archivo. """
        # Cargar los pesos guardados en el modelo
        self.model.load_state_dict(torch.load(file_path))

        # Asegúrate de llamar a .to(device) para mover el modelo al dispositivo correcto
        self.model.to(self.accelerator.device)

        print(f"Modelo cargado desde {file_path}")


#Training with HuggingFace Accelerate

In [None]:
accelerator = Accelerator(gradient_accumulation_steps=config['gradient_accumulation_steps'])

In [None]:
model = EssayModel(config).to(device=accelerator.device )
trainer = Trainer(model, (train_loader, val_loader), config, accelerator)

In [None]:
%%time
trainer.fit()

Training...:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 1 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.6415204635228623
valid loss: 0.5752608299255371




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 2 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.5319728842005134
valid loss: 0.48817660436034205




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 3 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.5060592833059755
valid loss: 0.4776798695325851




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 4 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.48417903686111624
valid loss: 0.46597485467791555




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 5 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.47138992409137165
valid loss: 0.47937974110245707




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 6 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.5268672975626859
valid loss: 0.554510374367237




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 7 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.4991548802195625
valid loss: 0.4773168094456196




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 8 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.48666118847375567
valid loss: 0.45863300263881684




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 9 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.45929821275851945
valid loss: 0.46263681799173356




  0%|          | 0/352 [00:00<?, ?it/s]

Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP
Enable AWP

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 10 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.45249087863009085
valid loss: 0.4612312998622656


CPU times: user 17min 36s, sys: 7.96 s, total: 17min 44s
Wall time: 17min 33s


#Predicciones del conjunto de prueba

In [None]:
test_loader = torch.utils.data.DataLoader(test_ds,
                                         batch_size=config['batch_size'],
                                         shuffle=False,
                                         num_workers=2,
                                         pin_memory=True
                                        )

In [None]:
predictions = trainer.predict(test_loader)

In [None]:
predictions

array([[2.5369308, 2.292933 , 2.5307474, 2.3977187, 2.1857681, 2.218063 ],
       [2.7328768, 2.8648095, 2.907902 , 2.98207  , 3.170171 , 2.9260123],
       [3.701252 , 3.7173138, 3.707238 , 3.9037175, 3.8823946, 3.8481262],
       ...,
       [3.220044 , 3.2523313, 3.323613 , 3.20933  , 3.2762291, 3.3173845],
       [3.7265217, 3.692913 , 3.715847 , 3.6676583, 3.4546945, 3.6296394],
       [2.597064 , 2.251639 , 2.649451 , 2.3775766, 2.0419447, 2.3020399]],
      dtype=float32)

# Guardar el modelo

In [None]:
model_save_path = '/content/drive/MyDrive/Tesis/deberta_base_v2.pth'
trainer.save_model(model_save_path)

# Evaluación  en el conjunto de prueba (no visto antes por el modelo)

In [None]:
preds = pd.DataFrame(predictions, columns = ["cohesion_pred",	"syntax_pred",	"vocabulary_pred",	"phraseology_pred",	"grammar_pred",	"conventions_pred"])

In [None]:
preresults =pd.concat([test_df.loc[:,"cohesion":"conventions"].reset_index(drop=True), preds], axis=1);preresults

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions,cohesion_pred,syntax_pred,vocabulary_pred,phraseology_pred,grammar_pred,conventions_pred
0,3.0,2.5,2.5,2.0,2.0,2.0,2.536931,2.292933,2.530747,2.397719,2.185768,2.218063
1,3.0,2.0,3.0,3.5,3.0,3.0,2.732877,2.864810,2.907902,2.982070,3.170171,2.926012
2,4.0,4.0,3.0,4.0,4.0,4.0,3.701252,3.717314,3.707238,3.903718,3.882395,3.848126
3,3.0,3.0,3.5,3.0,3.5,3.5,3.101037,3.134714,3.243597,3.426769,3.339916,3.443916
4,3.5,3.5,3.5,3.5,3.0,3.5,3.214050,3.126246,3.171850,3.163868,3.149998,2.976717
...,...,...,...,...,...,...,...,...,...,...,...,...
778,2.0,2.5,3.0,3.0,3.5,2.0,2.828016,2.947402,3.119020,3.095261,3.148752,2.556337
779,2.5,2.5,3.0,3.0,2.5,2.5,2.969252,3.021908,2.948768,3.050830,3.166217,3.171437
780,2.0,3.0,3.0,3.0,3.0,2.5,3.220044,3.252331,3.323613,3.209330,3.276229,3.317384
781,4.0,3.5,4.0,3.5,3.5,4.0,3.726522,3.692913,3.715847,3.667658,3.454695,3.629639


In [None]:
preresults["se_cohesion"]=(preresults["cohesion"]-preresults["cohesion_pred"])**2
preresults["se_syntax"]=(preresults["syntax"]-preresults["syntax_pred"])**2
preresults["se_vocabulary"]=(preresults["vocabulary"]-preresults["vocabulary_pred"])**2
preresults["se_phraseology"]=(preresults["phraseology"]-preresults["phraseology_pred"])**2
preresults["se_grammar"]=(preresults["grammar"]-preresults["grammar_pred"])**2
preresults["se_conventions"]=(preresults["conventions"]-preresults["conventions_pred"])**2

In [None]:
results = pd.DataFrame()
results["se_cohesion"]=(preresults["cohesion"]-preresults["cohesion_pred"])**2
results["se_syntax"]=(preresults["syntax"]-preresults["syntax_pred"])**2
results["se_vocabulary"]=(preresults["vocabulary"]-preresults["vocabulary_pred"])**2
results["se_phraseology"]=(preresults["phraseology"]-preresults["phraseology_pred"])**2
results["se_grammar"]=(preresults["grammar"]-preresults["grammar_pred"])**2
results["se_conventions"]=(preresults["conventions"]-preresults["conventions_pred"])**2

In [None]:
preresults

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions,cohesion_pred,syntax_pred,vocabulary_pred,phraseology_pred,grammar_pred,conventions_pred,se_cohesion,se_syntax,se_vocabulary,se_phraseology,se_grammar,se_conventions
0,3.0,2.5,2.5,2.0,2.0,2.0,2.536931,2.292933,2.530747,2.397719,2.185768,2.218063,0.214433,0.042877,0.000945,0.158180,0.034510,0.047552
1,3.0,2.0,3.0,3.5,3.0,3.0,2.732877,2.864810,2.907902,2.982070,3.170171,2.926012,0.071355,0.747895,0.008482,0.268252,0.028958,0.005474
2,4.0,4.0,3.0,4.0,4.0,4.0,3.701252,3.717314,3.707238,3.903718,3.882395,3.848126,0.089250,0.079912,0.500186,0.009270,0.013831,0.023066
3,3.0,3.0,3.5,3.0,3.5,3.5,3.101037,3.134714,3.243597,3.426769,3.339916,3.443916,0.010208,0.018148,0.065742,0.182131,0.025627,0.003145
4,3.5,3.5,3.5,3.5,3.0,3.5,3.214050,3.126246,3.171850,3.163868,3.149998,2.976717,0.081767,0.139692,0.107682,0.112985,0.022499,0.273825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,2.0,2.5,3.0,3.0,3.5,2.0,2.828016,2.947402,3.119020,3.095261,3.148752,2.556337,0.685611,0.200168,0.014166,0.009075,0.123375,0.309511
779,2.5,2.5,3.0,3.0,2.5,2.5,2.969252,3.021908,2.948768,3.050830,3.166217,3.171437,0.220198,0.272388,0.002625,0.002584,0.443845,0.450828
780,2.0,3.0,3.0,3.0,3.0,2.5,3.220044,3.252331,3.323613,3.209330,3.276229,3.317384,1.488507,0.063671,0.104725,0.043819,0.076303,0.668117
781,4.0,3.5,4.0,3.5,3.5,4.0,3.726522,3.692913,3.715847,3.667658,3.454695,3.629639,0.074790,0.037215,0.080743,0.028109,0.002053,0.137167


In [None]:
mse = results.mean();mse

se_cohesion       0.238749
se_syntax         0.213179
se_vocabulary     0.182916
se_phraseology    0.205683
se_grammar        0.221576
se_conventions    0.209849
dtype: float64

In [None]:
rmse = np.sqrt(mse);rmse

se_cohesion       0.488619
se_syntax         0.461713
se_vocabulary     0.427687
se_phraseology    0.453523
se_grammar        0.470718
se_conventions    0.458093
dtype: float64

In [None]:
rmse.mean()

0.4600589793825727

In [None]:
mse = results.mean();mse

se_cohesion       0.237249
se_syntax         0.214843
se_vocabulary     0.183556
se_phraseology    0.204724
se_grammar        0.222089
se_conventions    0.203368
dtype: float64

In [None]:
rmse = np.sqrt(mse);rmse

se_cohesion       0.487082
se_syntax         0.463512
se_vocabulary     0.428435
se_phraseology    0.452464
se_grammar        0.471263
se_conventions    0.450963
dtype: float64

In [None]:
rmse.mean()

0.4589531885684844

In [None]:
rmse = np.sqrt(mse);rmse

se_cohesion       0.518120
se_syntax         0.501027
se_vocabulary     0.458781
se_phraseology    0.496667
se_grammar        0.472023
se_conventions    0.469415
dtype: float64

In [None]:
rmse.mean()

0.48600545784911864

#Cargar el modelo

In [None]:
#trainer = Trainer(model, (train_loader, val_loader), config, accelerator)
model_load_path = '/content/drive/MyDrive/Tesis/deberta_base_v1.pth'

# Cargar el modelo previamente entrenado
trainer.load_model(model_load_path)

Modelo cargado desde /content/drive/MyDrive/Tesis/deberta_base_v1.pth
