In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

etiqueta_map = {
    "Gestión Pública e Instituciones": 0,
    "Economía, Empresa, Empleo e Infraestructuras": 1,
    "Sociedad, Igualdad y Derechos": 2,
    "Otros": 3
}
target_names = list(etiqueta_map.keys())

model_path = "clasificador_analisis/clasificador/clasificador_tema/classweights_crossentropy/comparativa/ep_3/modelo_final"

tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)
model.eval()

def predict_tema(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    probs = torch.softmax(logits, dim=1).squeeze()
    return int(torch.argmax(probs))

def get_tema_scores(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    probs = torch.softmax(logits, dim=1).squeeze().tolist()
    return probs


  warn(


In [None]:
df = pd.read_excel("clasificador_analisis/clasificador/clasificador_tema/classweights_crossentropy/mejor_test.xlsx")
df = df.rename(columns={"text": "text", "label": "label_manual_num"})
df["label_predicted"] = df["text"].apply(lambda x: predict_tema(x, model, tokenizer))
df["scores"] = df["text"].apply(lambda x: get_tema_scores(x, model, tokenizer))

In [3]:
df[["score_instituciones", "score_economia", "score_identidad", "score_otros"]] = pd.DataFrame(df["scores"].tolist(), index=df.index)

print("\n📊 MATRIZ DE CONFUSIÓN:")
print(confusion_matrix(df["label_manual_num"], df["label_predicted"]))

print("\n📈 CLASSIFICATION REPORT:")
print(classification_report(
    df["label_manual_num"],
    df["label_predicted"],
    target_names=target_names,
    digits=3  
))


📊 MATRIZ DE CONFUSIÓN:
[[69  3  6  4]
 [ 6 39  0  1]
 [ 4  3 29  1]
 [ 4  0  2 13]]

📈 CLASSIFICATION REPORT:
                                              precision    recall  f1-score   support

             Gestión Pública e Instituciones      0.831     0.841     0.836        82
Economía, Empresa, Empleo e Infraestructuras      0.867     0.848     0.857        46
               Sociedad, Igualdad y Derechos      0.784     0.784     0.784        37
                                       Otros      0.684     0.684     0.684        19

                                    accuracy                          0.815       184
                                   macro avg      0.791     0.789     0.790       184
                                weighted avg      0.815     0.815     0.815       184

