In [None]:
import json
import torch
import torch.nn.functional as F
import time
import random
import numpy as np
!pip install --upgrade transformers
import transformers
# from transformers import RobertaTokenizer, RobertaForMultipleChoice
from transformers import BertTokenizer, BertForMultipleChoice
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
from torch.utils.data import Dataset


#ENTRENO

In [None]:
# SET RANDOM SEED
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# Modelo y tokenizer
bert = "google-bert/bert-base-multilingual-cased"
tokenizer = BertTokenizer.from_pretrained(bert)
model = BertForMultipleChoice.from_pretrained(bert)

# roberta = "PlanTL-GOB-ES/roberta-base-bne"
# tokenizer = RobertaTokenizer.from_pretrained(roberta)
# model = RobertaForMultipleChoice.from_pretrained(roberta)

## Dataset Personalizado

In [None]:
from torch.utils.data import Dataset, DataLoader

class MultipleChoiceDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        context = item["context"]
        question = item["question"]
        choices = [c["text"] for c in item["choices"]]
        label = [i for i, c in enumerate(item["choices"]) if c["type"] == "correct answer"][0]
        texts = [f"{context} Pregunta: {question} Opción: {choice}" for choice in choices]

        encodings = self.tokenizer(
            texts,
            truncation=True,
            max_length=self.max_length,
            padding="max_length",
            return_tensors="pt"
        )

        max_num_choices = max(len(d['choices']) for d in self.data)

        padding_shape = (max_num_choices - encodings['input_ids'].shape[0], self.max_length)
        encodings['input_ids'] = torch.cat([encodings['input_ids'], torch.zeros(padding_shape, dtype=encodings['input_ids'].dtype)], dim=0)
        encodings['attention_mask'] = torch.cat([encodings['attention_mask'], torch.zeros(padding_shape, dtype=encodings['attention_mask'].dtype)], dim=0)

        return {
            "input_ids": encodings["input_ids"],
            "attention_mask": encodings["attention_mask"],
            "labels": label
        }

## Cargamos los subconjuntos training y dev


In [None]:
with open("1-training.json", "r", encoding="utf-8") as f:
    train_data = json.load(f)["data"]

with open("1-dev.json", "r", encoding="utf-8") as f:
    dev_data = json.load(f)["data"]

train_dataset = MultipleChoiceDataset(train_data, tokenizer)
dev_dataset = MultipleChoiceDataset(dev_data, tokenizer)

print(TrainingArguments.__module__)

## Entrenamos el modelo:

In [None]:
!pip install --upgrade transformers
import transformers
import time
print(transformers.__version__)
from transformers import Trainer, TrainingArguments

import os
os.environ["WANDB_DISABLED"] = "true"

start_time = time.time()

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,

    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    report_to="none",
    fp16=True
)



trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

trainer.train()

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Tiempo de ejecución: {elapsed_time:.2f} segundos")
trainer.save_model("modelo_finetuned")
tokenizer.save_pretrained("modelo_finetuned")

#EVALUACIÓN

In [None]:
# Cargar dataset
with open("4-test-BERT.json", "r", encoding="utf-8") as f:
    test_dataset = json.load(f)["data"]

def format_pair(context, question, choice):
    return f"{context} Pregunta: {question} Opción: {choice}"

def prepare_inputs(context, question, choices):
    paired_texts = [format_pair(context, question, choice) for choice in choices]
    #Tokenizamos el prompt --> se pasa a numeros
    encoded = tokenizer(paired_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
    return {
        #IDs de los tokens del modelo
        'input_ids': encoded['input_ids'].unsqueeze(0).to(model.device),
        #máscara que indica qué tokens son reales y cuales son padding
        'attention_mask': encoded['attention_mask'].unsqueeze(0).to(model.device),
    }

#El modelo predice la opción correcta
def predict_answer(inputs, choices):
#torch.no_grad --> más rápido y consume menos memoria
  with torch.no_grad():
    #
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(0)

    probs = F.softmax(logits, dim=-1)

    predicted_index = torch.argmax(probs).item()


    if predicted_index >= len(choices):
        print(f"Error: Índice de predicción {predicted_index} fuera de rango.")
        return None

    return choices[predicted_index]


In [None]:
max_labels = max(len(item["choices"]) for item in json.load(open("4-test-BERT.json"))["data"])
# max_labels = max(len(item["choices"]) for item in json.load(open("1-test.json"))["data"])
correct_predictionsBERT = 0
total_questions = len(test_dataset)
predictions = []

start_time = time.time()
for item in test_dataset:
  is_correct_prediction = False
  context = item["context"]
  question = item["question"]
  choices = [choice["text"] for choice in item["choices"]]
  correct_answer = next(choice["text"] for choice in item["choices"] if choice["type"] == "correct answer")

  num_choices = len(choices)
  if num_choices > max_labels:
    print(f"Error: Pregunta con {num_choices} opciones. Supera el máximo permitido: {max_labels}")
    continue
  if num_choices == 0:
    print(f"Pregunta con {num_choices} opciones.")
    continue

  # prompt = format_prompt(context, question, choices)
  # inputsBERT = tokenizer_bert(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
  # predicted_answerBERT = predict_answerBERT(inputsBERT, choices, num_choices)
  inputs = prepare_inputs(context, question, choices)
  predicted_answer = predict_answer(inputs, choices)


  if correct_answer == predicted_answer:
    correct_predictionsBERT += 1
    is_correct_prediction = True


  predictions.append({
    "context": context,
    "question": question,
    "choices": choices,
    "correct_answer": correct_answer,
    "predicted_answer": predicted_answer,
    "is_correct": is_correct_prediction
  })


end_time = time.time()
elapsed_time = end_time - start_time

with open("predictionsBERT-opt.json", "w", encoding="utf-8") as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

print(f"Tiempo de ejecución: {elapsed_time:.2f} segundos")
print()
print(f"Total de preguntas evaluadas: {total_questions}")
print(f"Respuestas correctas BERT: {correct_predictionsBERT}")
print(f"Precisión del modelo BERT: {correct_predictionsBERT / total_questions:.2%}")
print()

Tiempo de ejecución: 2.06 segundos

Total de preguntas evaluadas: 50
Respuestas correctas BERT: 28
Precisión del modelo BERT: 56.00%

