In [12]:
import json
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForMaskedLM
import evaluate
from sentence_transformers import SentenceTransformer, util
from transformers import AutoModelForQuestionAnswering

# Cargar datos de entrenamiento
with open("3-corpus.json", "r", encoding="utf-8") as f:
    train_data = json.load(f)

# Cargar datos de prueba
with open("3-test-set.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

# Tokenizador y modelo BETO
MODEL_NAME = "dccuchile/bert-base-spanish-wwm-cased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForMaskedLM.from_pretrained(MODEL_NAME)

# Definición del dataset personalizado
class BETODataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        query, response = self.data[idx]
        inputs = self.tokenizer(query, truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt")
        targets = self.tokenizer(response, truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt")
        return {
            "input_ids": inputs["input_ids"].squeeze(),
            "attention_mask": inputs["attention_mask"].squeeze(),
            "labels": targets["input_ids"].squeeze()
        }

# Crear dataset y DataLoader
train_dataset = BETODataset(train_data, tokenizer)
test_dataset = BETODataset(test_data, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Configurar entrenamiento
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # Evaluar después de cada época
    save_strategy="epoch",
    per_device_train_batch_size=8,  # Aumentar si hay más memoria disponible
    per_device_eval_batch_size=8,
    learning_rate=5e-5,  # Reducir si hay sobreajuste
    num_train_epochs=3,  # Aumentar si se necesita mejor ajuste
    weight_decay=0.01,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Entrenar modelo
trainer.train()

# Evaluación del modelo
rouge = evaluate.load("rouge")
embedder = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

def evaluate_model():
    model.eval()
    predictions = []
    references = []
    cos_sim_scores = []
    
    with torch.no_grad():
        for query, expected_response in test_data:
            inputs = tokenizer(query, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
            output_ids = model.generate(**inputs, max_length=512)
            generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
            predictions.append(generated_text)
            references.append(expected_response)
            
            # Calcular similitud de coseno entre embeddings
            embedding_pred = embedder.encode(generated_text, convert_to_tensor=True)
            embedding_ref = embedder.encode(expected_response, convert_to_tensor=True)
            cos_sim = util.pytorch_cos_sim(embedding_pred, embedding_ref).item()
            cos_sim_scores.append(cos_sim)
    
    # Calcular ROUGE
    results = rouge.compute(predictions=predictions, references=references)
    
    # Calcular promedio de similitud de coseno
    avg_cos_sim = np.mean(cos_sim_scores)
    
    return results, avg_cos_sim

rouge_scores, avg_cos_sim = evaluate_model()
print("ROUGE Scores:", rouge_scores)
print("Avg Cosine Similarity:", avg_cos_sim)


BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


Epoch,Training Loss,Validation Loss
1,0.5251,0.388371
2,0.3425,0.39709
3,0.3152,0.395004


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]



.gitattributes:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.12k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/470M [00:00<?, ?B/s]

model_O1.onnx:   0%|          | 0.00/470M [00:00<?, ?B/s]

model_O2.onnx:   0%|          | 0.00/470M [00:00<?, ?B/s]

model_O3.onnx:   0%|          | 0.00/470M [00:00<?, ?B/s]

model_O4.onnx:   0%|          | 0.00/235M [00:00<?, ?B/s]

model_qint8_arm64.onnx:   0%|          | 0.00/118M [00:00<?, ?B/s]

model_qint8_avx512.onnx:   0%|          | 0.00/118M [00:00<?, ?B/s]

model_qint8_avx512_vnni.onnx:   0%|          | 0.00/118M [00:00<?, ?B/s]

model_quint8_avx2.onnx:   0%|          | 0.00/118M [00:00<?, ?B/s]

openvino_model.bin:   0%|          | 0.00/470M [00:00<?, ?B/s]

openvino/openvino_model.xml:   0%|          | 0.00/399k [00:00<?, ?B/s]

openvino_model_qint8_quantized.bin:   0%|          | 0.00/119M [00:00<?, ?B/s]

(…)nvino/openvino_model_qint8_quantized.xml:   0%|          | 0.00/709k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/471M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

unigram.json:   0%|          | 0.00/14.8M [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

ValueError: Input length of input_ids is 512, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [None]:
from transformers import AutoTokenizer

# Carga el tokenizer desde el modelo base (ajusta según el modelo que usaste)
base_model = "bert-base-uncased"  # Reemplázalo si usaste otro
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Guarda el tokenizer en tu checkpoint
tokenizer.save_pretrained("./results/checkpoint-2490/")

# Ahora intenta cargar el modelo y el tokenizer nuevamente
from transformers import AutoModelForMaskedLM

model = AutoModelForMaskedLM.from_pretrained("./results/checkpoint-2490")
tokenizer = AutoTokenizer.from_pretrained("./results/checkpoint-2490")


In [25]:
from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint_path = "./results/checkpoint-2490/"  # Usa el checkpoint más reciente
save_path = "./results/final_model"  # Ruta donde se guardará el modelo consolidado

model = AutoModelForMaskedLM.from_pretrained(checkpoint_path)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)


('./results/final_model\\tokenizer_config.json',
 './results/final_model\\special_tokens_map.json',
 './results/final_model\\vocab.txt',
 './results/final_model\\added_tokens.json',
 './results/final_model\\tokenizer.json')

In [26]:
import json
import torch
import numpy as np
from transformers import AutoModelForMaskedLM, AutoTokenizer
import evaluate
from sentence_transformers import SentenceTransformer, util

# Cargar modelo y tokenizador fine-tuneado
MODEL_NAME = "dccuchile/bert-base-spanish-wwm-cased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForMaskedLM.from_pretrained("./results/final_model")

# Cargar datos de prueba
with open("3-test-set.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

# Cargar métricas
rouge = evaluate.load("rouge")
embedder = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

def get_embedding(text, model, tokenizer):
    """Obtiene el embedding del texto usando el modelo fine-tuneado"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
    with torch.no_grad():
        output = model(**inputs)
    return output.logits.mean(dim=1).squeeze()  # Usar la salida de logits para embeddings

def evaluate_model():
    model.eval()
    predictions = []
    references = []
    cos_sim_scores = []
    
    with torch.no_grad():
        for query, expected_response in test_data:
            # Generar embeddings con modelo fine-tuneado
            embedding_query = get_embedding(query, model, tokenizer)
            embedding_response = get_embedding(expected_response, model, tokenizer)
            
            # Comparar con embeddings originales
            embedding_pred = embedder.encode(query, convert_to_tensor=True)
            embedding_ref = embedder.encode(expected_response, convert_to_tensor=True)
            
            # Calcular similitud de coseno
            cos_sim_finetuned = util.pytorch_cos_sim(embedding_query, embedding_response).item()
            cos_sim_original = util.pytorch_cos_sim(embedding_pred, embedding_ref).item()
            cos_sim_scores.append((cos_sim_finetuned, cos_sim_original))

            predictions.append(query)
            references.append(expected_response)
    
    # Calcular ROUGE
    results = rouge.compute(predictions=predictions, references=references)
    
    # Calcular promedios de similitud de coseno
    avg_cos_sim_finetuned = np.mean([x[0] for x in cos_sim_scores])
    avg_cos_sim_original = np.mean([x[1] for x in cos_sim_scores])
    
    return results, avg_cos_sim_finetuned, avg_cos_sim_original

# Evaluar modelo
rouge_scores, avg_cos_sim_finetuned, avg_cos_sim_original = evaluate_model()
print("ROUGE Scores:", rouge_scores)
print("Avg Cosine Similarity (Fine-tuned):", avg_cos_sim_finetuned)
print("Avg Cosine Similarity (Original):", avg_cos_sim_original)


ROUGE Scores: {'rouge1': 0.17133047577200533, 'rouge2': 0.05076627985931451, 'rougeL': 0.11374332847081262, 'rougeLsum': 0.11359419251660435}
Avg Cosine Similarity (Fine-tuned): 0.9960256213075426
Avg Cosine Similarity (Original): 0.4398946776771876
