In [None]:
pip install accelerate

Collecting accelerate
  Downloading accelerate-0.26.1-py3-none-any.whl (270 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/270.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/270.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.26.1


In [None]:
pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [None]:
from transformers import AutoModel
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import sentencepiece
import gc

from accelerate import Accelerator

# ----------
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
config = {
    'model': 'microsoft/deberta-v3-base',
    'dropout': 0.5,
    'max_length': 512,
    'batch_size': 8, # anything more results in CUDA OOM [for unfreezed encoder] on Kaggle GPU
    'epochs': 10,
    'lr': 3e-4,
    'enable_scheduler': True,
    'scheduler': 'CosineAnnealingWarmRestarts',
    'gradient_accumulation_steps': 2,
    'adam_eps': 1e-6, # 1e-8 default
    'freeze_encoder': True
}

In [None]:
access_token = "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"
tokenizer = AutoTokenizer.from_pretrained(config['model'], token= access_token)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv("/content/drive/MyDrive/Tesis/train.csv")
#test0_df = pd.read_csv("/content/drive/MyDrive/Tesis/test.csv")#df.shape

In [None]:
train_ratio = 0.8
test_ratio = 0.2
val_ratio = 0.1

# Divide los datos en conjuntos de entrenamiento, prueba y validación
main_df, test_df = train_test_split(df, test_size= 1 - train_ratio, random_state=42)


In [None]:
main_df.shape

(3128, 8)

In [None]:
class EssayDataset:
    def __init__(self, df, config, tokenizer=None, is_test=False):
        self.df = df.reset_index(drop=True)
        self.classes = ['cohesion','syntax','vocabulary','phraseology','grammar','conventions']
        self.max_len = config['max_length']
        self.tokenizer = tokenizer
        self.is_test = is_test

    def __getitem__(self,idx):
        sample = self.df['full_text'][idx]
        tokenized = tokenizer.encode_plus(sample,
                                          None,
                                          add_special_tokens=True,
                                          max_length=self.max_len,
                                          truncation=True,
                                          padding='max_length'
                                         )
        inputs = {
            "input_ids": torch.tensor(tokenized['input_ids'], dtype=torch.long),
            "token_type_ids": torch.tensor(tokenized['token_type_ids'], dtype=torch.long),
            "attention_mask": torch.tensor(tokenized['attention_mask'], dtype=torch.long)
        }

        if self.is_test == True:
            return inputs

        label = self.df.loc[idx,self.classes].to_list()
        targets = {
            "labels": torch.tensor(label, dtype=torch.float32),
        }

        return inputs, targets

    def __len__(self):
        return len(self.df)

#Caso 1 conjunto de validación

In [None]:
train_df, val_df = train_test_split(main_df,test_size=val_ratio,random_state=1357,shuffle=True)
print('dataframe shapes:',train_df.shape, val_df.shape)

dataframe shapes: (2815, 8) (313, 8)


In [None]:
train_ds = EssayDataset(train_df, config, tokenizer=tokenizer)
val_ds = EssayDataset(val_df, config, tokenizer=tokenizer)
test_ds = EssayDataset(test_df, config, tokenizer=tokenizer, is_test = True)

In [None]:
test_ds[0]

{'input_ids': tensor([    1,   325,  5973,   264,   286,   266, 58585, 40955,  4389,   335,
           274,   418,   280,   297,   286,   266, 58585, 40955,  1727, 38710,
           274,   298,  2435,  5973,   262,   355,   263,   306,   551,   267,
          5699, 90999, 10339,   260,   273,  1757,   264,   291,  1548,   401,
           335,   262,   355,   286,   266, 58585, 40955,  4389,   306,   295,
         19315, 20198,   266,   688,   263,   264,   286,   266,  1453,  4389,
           269,   379,   539,   267,   432,   263,   327,   264,   286,   266,
          5973,  4389,   269,   539,   267,   262,  7169,  4673, 13282,   260,
          1244,   294,   273,   374,   267,   266,   961,  1867,   297, 45147,
           263,   273,   489,   286,   266,  5973,  4389,   275,   262, 77042,
           268,   270,   738,   261,   273,   394,   266,  2140,   361,   374,
           267,   262,   961, 35521, 59722,   297,   373,   489,   286,   266,
          2330,  4389,   261,   263,   

In [None]:
train_loader = torch.utils.data.DataLoader(train_ds,
                                           batch_size=config['batch_size'],
                                           shuffle=True,
                                           num_workers=2,
                                           pin_memory=True
                                          )
val_loader = torch.utils.data.DataLoader(val_ds,
                                         batch_size=config['batch_size'],
                                         shuffle=True,
                                         num_workers=2,
                                         pin_memory=True
                                        )


#Model

In [None]:
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()

    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

In [None]:
class EssayModel(nn.Module):
    def __init__(self,config,num_classes=6):
        super(EssayModel,self).__init__()
        self.model_name = config['model']
        self.freeze = config['freeze_encoder']

        self.encoder = AutoModel.from_pretrained(self.model_name)
        if self.freeze:
            for param in self.encoder.base_model.parameters():
                param.requires_grad = False

        self.pooler = MeanPooling()
        self.dropout = nn.Dropout(config['dropout'])
        self.fc1 = nn.Linear(self.encoder.config.hidden_size,64)
        self.fc2 = nn.Linear(64,num_classes)


    def forward(self,inputs):
        outputs = self.encoder(**inputs,return_dict=True)
        outputs = self.pooler(outputs['last_hidden_state'], inputs['attention_mask'])
        outputs = self.fc1(outputs)
        outputs = self.fc2(outputs)
        return outputs

In [None]:
class Trainer:
    def __init__(self, model, loaders, config, accelerator):
        self.model = model
        self.train_loader, self.val_loader = loaders
        self.config = config
        self.input_keys = ['input_ids','token_type_ids','attention_mask']
        self.accelerator = accelerator

        self.optim = self._get_optim()

        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optim, T_0=5,eta_min=1e-7)

        self.train_losses = []
        self.val_losses = []
        self.best_val_loss = float('inf')
        self.best_model_weights = None

    def prepare(self):
        self.model, self.optim, self.train_loader, self.val_loader, self.scheduler = self.accelerator.prepare(
            self.model,
            self.optim,
            self.train_loader,
            self.val_loader,
            self.scheduler
        )

    def _get_optim(self):
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=self.config['lr'], eps=self.config['adam_eps'])
        return optimizer


    def loss_fn(self, outputs, targets):
        colwise_rmse = torch.sqrt(torch.mean(torch.square(targets - outputs), dim=0))
        loss = torch.mean(colwise_rmse, dim=0)
        return loss


    def train_one_epoch(self,epoch):

        running_loss = 0.
        progress = tqdm(self.train_loader, total=len(self.train_loader))

        for idx,(inputs,targets) in enumerate(progress):
            with self.accelerator.accumulate(self.model):

                outputs = self.model(inputs)

                loss = self.loss_fn(outputs, targets['labels'])
                running_loss += loss.item()

                self.accelerator.backward(loss)

                self.optim.step()

                if self.config['enable_scheduler']:
                    self.scheduler.step(epoch - 1 + idx / len(self.train_loader))

                self.optim.zero_grad()

                del inputs, targets, outputs, loss


        train_loss = running_loss/len(self.train_loader)
        self.train_losses.append(train_loss)

    @torch.no_grad()
    def valid_one_epoch(self,epoch):

        running_loss = 0.
        progress = tqdm(self.val_loader, total=len(self.val_loader))

        for (inputs, targets) in progress:

            outputs = self.model(inputs)

            loss = self.loss_fn(outputs, targets['labels'])
            running_loss += loss.item()

            del inputs, targets, outputs, loss


        val_loss = running_loss/len(self.val_loader)
        self.val_losses.append(val_loss)
        if running_loss < self.best_val_loss:
            self.best_val_loss = running_loss
            self.best_model_weights = self.model.state_dict().copy()  # Guarda los pesos del modelo


    def test(self, test_loader):

        preds = []
        for (inputs) in test_loader:

            outputs = self.model(inputs)
            preds.append(outputs.detach().cpu())

        preds = torch.concat(preds)
        return preds

    def fit(self):

        self.prepare()

        fit_progress = tqdm(
            range(1, self.config['epochs']+1),
            leave = True,
            desc="Training..."
        )

        for epoch in fit_progress:

            self.model.train()
            fit_progress.set_description(f"EPOCH {epoch} / {self.config['epochs']} | training...")
            self.train_one_epoch(epoch)
            self.clear()

            self.model.eval()
            fit_progress.set_description(f"EPOCH {epoch} / {self.config['epochs']} | validating...")
            self.valid_one_epoch(epoch)
            self.clear()

            print(f"{'➖️'*10} EPOCH {epoch} / {self.config['epochs']} {'➖️'*10}")
            print(f"train loss: {self.train_losses[-1]}")
            print(f"valid loss: {self.val_losses[-1]}\n\n")

        # Al final del entrenamiento, restablece el modelo a su mejor estado
        if self.best_model_weights is not None:
            self.model.load_state_dict(self.best_model_weights)



    def clear(self):
        gc.collect()
        torch.cuda.empty_cache()

    def predict(self, test_loader):
        """ Realiza predicciones en un conjunto de datos de prueba. """
        self.model.eval()
        predictions = []
        for batch in test_loader:
            inputs = {key: val.to(self.accelerator.device) for key, val in batch.items()}
            outputs = self.model(inputs)
            predictions.append(outputs.detach().cpu().numpy())

        return np.concatenate(predictions, axis=0)

    def save_model(self, file_path):
        """ Guarda el modelo entrenado. """
        torch.save(self.model.state_dict(), file_path)

    def load_model(self, file_path):
        """ Carga los pesos del modelo desde un archivo. """
        # Cargar los pesos guardados en el modelo
        self.model.load_state_dict(torch.load(file_path))

        # Asegúrate de llamar a .to(device) para mover el modelo al dispositivo correcto
        self.model.to(self.accelerator.device)

        print(f"Modelo cargado desde {file_path}")


#Training with HuggingFace Accelerate

In [None]:
accelerator = Accelerator(gradient_accumulation_steps=config['gradient_accumulation_steps'])

In [None]:
model = EssayModel(config).to(device=accelerator.device )
trainer = Trainer(model, (train_loader, val_loader), config, accelerator)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

In [None]:
%%time
trainer.fit()

Training...:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 1 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.7161148959262804
valid loss: 0.5229944884777069




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 2 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.47426135050640866
valid loss: 0.4894890896975994




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 3 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.45849056457254017
valid loss: 0.49473111927509306




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 4 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.4522422000088475
valid loss: 0.47510265186429024




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 5 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.44894225683740596
valid loss: 0.47214500233531




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 6 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.45623104917732155
valid loss: 0.50222502425313




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 7 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.44963872915303166
valid loss: 0.4918330185115337




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 8 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.44303832711143926
valid loss: 0.47018138840794566




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 9 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.43993183047595347
valid loss: 0.4636309489607811




  0%|          | 0/352 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️ EPOCH 10 / 10 ➖️➖️➖️➖️➖️➖️➖️➖️➖️➖️
train loss: 0.4357493744993752
valid loss: 0.4689690858125687


CPU times: user 6min 35s, sys: 6.58 s, total: 6min 42s
Wall time: 6min 36s


#Predicciones del conjunto de prueba

In [None]:
test_loader = torch.utils.data.DataLoader(test_ds,
                                         batch_size=config['batch_size'],
                                         shuffle=False,
                                         num_workers=2,
                                         pin_memory=True
                                        )

In [None]:
predictions = trainer.predict(test_loader)

In [None]:
predictions

array([[2.6541789, 2.439184 , 2.7805128, 2.619595 , 2.3654215, 2.2567284],
       [3.123642 , 3.1593719, 3.3096251, 3.357508 , 3.4380133, 3.1537507],
       [3.8949594, 3.914886 , 3.9676604, 4.049704 , 4.1540833, 4.0372643],
       ...,
       [3.3629544, 3.357554 , 3.4483097, 3.3544085, 3.4629943, 3.4519408],
       [3.969756 , 3.929296 , 3.910649 , 3.9489663, 3.6989625, 3.850896 ],
       [2.8264587, 2.479192 , 2.8828294, 2.5902004, 2.1733067, 2.4947562]],
      dtype=float32)

# Guardar el modelo

In [None]:
model_save_path = '/content/drive/MyDrive/Tesis/deberta_base_v1.pth'
trainer.save_model(model_save_path)

# Evaluación  en el conjunto de prueba (no visto antes por el modelo)

In [None]:
preds = pd.DataFrame(predictions, columns = ["cohesion_pred",	"syntax_pred",	"vocabulary_pred",	"phraseology_pred",	"grammar_pred",	"conventions_pred"])

In [None]:
preresults =pd.concat([test_df.loc[:,"cohesion":"conventions"].reset_index(drop=True), preds], axis=1);preresults

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions,cohesion_pred,syntax_pred,vocabulary_pred,phraseology_pred,grammar_pred,conventions_pred
0,3.0,2.5,2.5,2.0,2.0,2.0,2.654179,2.439184,2.780513,2.619595,2.365422,2.256728
1,3.0,2.0,3.0,3.5,3.0,3.0,3.123642,3.159372,3.309625,3.357508,3.438013,3.153751
2,4.0,4.0,3.0,4.0,4.0,4.0,3.894959,3.914886,3.967660,4.049704,4.154083,4.037264
3,3.0,3.0,3.5,3.0,3.5,3.5,3.299432,3.312919,3.502336,3.548896,3.470794,3.503109
4,3.5,3.5,3.5,3.5,3.0,3.5,3.246544,3.145738,3.170080,3.238313,3.185226,2.989196
...,...,...,...,...,...,...,...,...,...,...,...,...
778,2.0,2.5,3.0,3.0,3.5,2.0,2.976367,3.038159,3.308442,3.319680,3.267378,2.654202
779,2.5,2.5,3.0,3.0,2.5,2.5,3.135164,3.157727,3.169601,3.276312,3.343801,3.293128
780,2.0,3.0,3.0,3.0,3.0,2.5,3.362954,3.357554,3.448310,3.354409,3.462994,3.451941
781,4.0,3.5,4.0,3.5,3.5,4.0,3.969756,3.929296,3.910649,3.948966,3.698962,3.850896


In [None]:
preresults["se_cohesion"]=(preresults["cohesion"]-preresults["cohesion_pred"])**2
preresults["se_syntax"]=(preresults["syntax"]-preresults["syntax_pred"])**2
preresults["se_vocabulary"]=(preresults["vocabulary"]-preresults["vocabulary_pred"])**2
preresults["se_phraseology"]=(preresults["phraseology"]-preresults["phraseology_pred"])**2
preresults["se_grammar"]=(preresults["grammar"]-preresults["grammar_pred"])**2
preresults["se_conventions"]=(preresults["conventions"]-preresults["conventions_pred"])**2

In [None]:
results = pd.DataFrame()
results["se_cohesion"]=(preresults["cohesion"]-preresults["cohesion_pred"])**2
results["se_syntax"]=(preresults["syntax"]-preresults["syntax_pred"])**2
results["se_vocabulary"]=(preresults["vocabulary"]-preresults["vocabulary_pred"])**2
results["se_phraseology"]=(preresults["phraseology"]-preresults["phraseology_pred"])**2
results["se_grammar"]=(preresults["grammar"]-preresults["grammar_pred"])**2
results["se_conventions"]=(preresults["conventions"]-preresults["conventions_pred"])**2

In [None]:
preresults

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions,cohesion_pred,syntax_pred,vocabulary_pred,phraseology_pred,grammar_pred,conventions_pred,se_cohesion,se_syntax,se_vocabulary,se_phraseology,se_grammar,se_conventions
0,3.0,2.5,2.5,2.0,2.0,2.0,2.654179,2.439184,2.780513,2.619595,2.365422,2.256728,0.119592,0.003699,0.078687,0.383898,0.133533,0.065909
1,3.0,2.0,3.0,3.5,3.0,3.0,3.123642,3.159372,3.309625,3.357508,3.438013,3.153751,0.015287,1.344143,0.095868,0.020304,0.191856,0.023639
2,4.0,4.0,3.0,4.0,4.0,4.0,3.894959,3.914886,3.967660,4.049704,4.154083,4.037264,0.011034,0.007244,0.936367,0.002470,0.023742,0.001389
3,3.0,3.0,3.5,3.0,3.5,3.5,3.299432,3.312919,3.502336,3.548896,3.470794,3.503109,0.089659,0.097919,0.000005,0.301287,0.000853,0.000010
4,3.5,3.5,3.5,3.5,3.0,3.5,3.246544,3.145738,3.170080,3.238313,3.185226,2.989196,0.064240,0.125501,0.108847,0.068480,0.034309,0.260921
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,2.0,2.5,3.0,3.0,3.5,2.0,2.976367,3.038159,3.308442,3.319680,3.267378,2.654202,0.953292,0.289615,0.095137,0.102195,0.054113,0.427980
779,2.5,2.5,3.0,3.0,2.5,2.5,3.135164,3.157727,3.169601,3.276312,3.343801,3.293128,0.403433,0.432605,0.028765,0.076348,0.712001,0.629052
780,2.0,3.0,3.0,3.0,3.0,2.5,3.362954,3.357554,3.448310,3.354409,3.462994,3.451941,1.857645,0.127845,0.200982,0.125605,0.214364,0.906191
781,4.0,3.5,4.0,3.5,3.5,4.0,3.969756,3.929296,3.910649,3.948966,3.698962,3.850896,0.000915,0.184295,0.007984,0.201571,0.039586,0.022232


In [None]:
mse = results.mean();mse

se_cohesion       0.268448
se_syntax         0.251028
se_vocabulary     0.210480
se_phraseology    0.246678
se_grammar        0.222805
se_conventions    0.220350
dtype: float64

In [None]:
rmse = np.sqrt(mse);rmse

se_cohesion       0.518120
se_syntax         0.501027
se_vocabulary     0.458781
se_phraseology    0.496667
se_grammar        0.472023
se_conventions    0.469415
dtype: float64

In [None]:
rmse.mean()

0.48600545784911864

#Cargar el modelo

In [None]:
#trainer = Trainer(model, (train_loader, val_loader), config, accelerator)
model_load_path = '/content/drive/MyDrive/Tesis/deberta_base_v1.pth'

# Cargar el modelo previamente entrenado
trainer.load_model(model_load_path)

Modelo cargado desde /content/drive/MyDrive/Tesis/deberta_base_v1.pth
