In [1]:
import torch
import torch.nn.functional as F
from transformers import RobertaTokenizer, RobertaForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm
2025-04-06 15:11:15.941835: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import torch.nn as nn

class TemperatureScaler(nn.Module):
    def __init__(self):
        super(TemperatureScaler, self).__init__()
        # log_temperature pentru a ne asigura că T > 0 (T = exp(log_temperature))
        self.log_temperature = nn.Parameter(torch.zeros(1))

    def forward(self, logits):
        temperature = torch.exp(self.log_temperature)
        return logits / temperature

class CalibratedModel(nn.Module):
    def __init__(self, model, temp_scaler):
        super(CalibratedModel, self).__init__()
        self.model = model
        self.temp_scaler = temp_scaler

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        scaled_logits = self.temp_scaler(logits)
        calibrated_probs = F.softmax(scaled_logits, dim=-1)
        return calibrated_probs

In [3]:
model = RobertaForSequenceClassification.from_pretrained("saved_models/roberta/roberta_torch_model")
tokenizer = RobertaTokenizer.from_pretrained("saved_models/roberta/roberta_tokenizer")

model.eval()

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [4]:
temp_scaler = TemperatureScaler()
calibrated_model = CalibratedModel(model, temp_scaler)
calibrated_model.load_state_dict(torch.load("saved_models/roberta/calibrated_model.pth", map_location="cpu"))
calibrated_model.eval()

CalibratedModel(
  (model): RobertaForSequenceClassification(
    (roberta): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (de

In [5]:
import re
def wordopt(text):
    text = re.sub(r'https?://\S+|www\.\S+', '[URL]', text)
    text = text.replace('\n', ' ')
    text = text.encode('ascii', 'ignore').decode('ascii')
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [25]:
text_example = '''The avalanche of fake news, pro-Putin and anti-European messages on social media puts Romanian authorities on alert. SRI teaches people how to spot online dangers, MIA reports that eleven fake accounts are disinformation in the name of the Police. And the CNA found illegal posts on TikTok and Facebook and asked the two platforms to remove them.'''

text_example = wordopt(text_example)
encoded_input = tokenizer.encode_plus(
    text_example,
    return_tensors="pt",
    truncation=True,
    padding="max_length",
    max_length=512
)
with torch.no_grad():
    calibrated_probs = calibrated_model(
        input_ids=encoded_input["input_ids"],
        attention_mask=encoded_input["attention_mask"]
    )

pred_label = torch.argmax(calibrated_probs, dim=-1).item()

print("Text:", text_example)
print("Probabilități calibrate:", calibrated_probs.squeeze().tolist())
decizie = "REAL" if pred_label == 1 else "FAKE"
print("Eticheta prezisă (calibrată):", decizie)

Text: The avalanche of fake news, pro-Putin and anti-European messages on social media puts Romanian authorities on alert. SRI teaches people how to spot online dangers, MIA reports that eleven fake accounts are disinformation in the name of the Police. And the CNA found illegal posts on TikTok and Facebook and asked the two platforms to remove them.
Probabilități calibrate: [0.025622986257076263, 0.9743770360946655]
Eticheta prezisă (calibrată): REAL


In [15]:
encoded_input = tokenizer.encode_plus(
    text_example,
    return_tensors="pt",
    truncation=True,
    padding="max_length",
    max_length=512
)

model.eval()
with torch.no_grad():
    outputs = model(**encoded_input)
    logits = outputs.logits
    # Softmax direct pe logits = probabilități necalibrate
    probs_uncalibrated = torch.softmax(logits, dim=-1)
    pred_label_uncalibrated = torch.argmax(probs_uncalibrated, dim=-1).item()

print("Text:", text_example)
print("Probabilități necalibrate:", probs_uncalibrated.squeeze().tolist())
print("Eticheta prezisă (necalibrat):", pred_label_uncalibrated)

Text: The Pardons that Sleepy Joe Biden gave to the Unselect Committee of Political Thugs, and many others, are hereby declared VOID, VACANT, AND OF NO FURTHER FORCE OR EFFECT, because of the fact that they were done by Autopen. In other words, Joe Biden did not sign them but, more importantly, he did not know anything about them! The necessary Pardoning Documents were not explained to, or approved by, Biden. He knew nothing about them, and the people that did may have committed a crime. Therefore, those on the Unselect Committee, who destroyed and deleted ALL evidence obtained during their two year Witch Hunt of me, and many other innocent people, should fully understand that they are subject to investigation at the highest level. The fact is, they were probably responsible for the Documents that were signed on their behalf without the knowledge or consent of the Worst President in the History of our Country, Crooked Joe Biden!
Probabilități necalibrate: [0.9997610449790955, 0.0002389

In [36]:
temperature_value = torch.exp(temp_scaler.log_temperature).item()
print("Valoare temperatură:", temperature_value)

Valoare temperatură: 1.1422650814056396


In [5]:
import pandas as pd
# Dacă ai și un model calibrat (wrapper-ul CalibratedModel) salvat, încarcă-l.
# Pentru acest exemplu, presupunem că ai salvat modelul calibrat ca "calibrated_model.pth"
import torch.nn as nn
import numpy as np
df = pd.read_csv("../../datasets/WELFake_cleaned.csv")

class TemperatureScaler(nn.Module):
    def __init__(self):
        super(TemperatureScaler, self).__init__()
        self.log_temperature = nn.Parameter(torch.zeros(1))
    def forward(self, logits):
        temperature = torch.exp(self.log_temperature)
        return logits / temperature

class CalibratedModel(nn.Module):
    def __init__(self, model, temp_scaler):
        super(CalibratedModel, self).__init__()
        self.model = model
        self.temp_scaler = temp_scaler
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        scaled_logits = self.temp_scaler(logits)
        return F.softmax(scaled_logits, dim=-1)

# Încarcă modelul calibrat
temp_scaler = TemperatureScaler()
calibrated_model = CalibratedModel(model, temp_scaler)
calibrated_model.load_state_dict(torch.load("saved_models/roberta/calibrated_model.pth", map_location="cpu"))
calibrated_model.eval()

# Funcție pentru a obține predicții dintr-o listă de texte (fără DataLoader)
def get_predictions_from_texts(texts, model, tokenizer, batch_size=16):
    all_probs = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i: i+batch_size]
        encoded = tokenizer(batch_texts, return_tensors="pt", 
                            padding="max_length", truncation=True, max_length=512)
        with torch.no_grad():
            outputs = model(**encoded)
            probs = F.softmax(outputs.logits, dim=-1)
            all_probs.append(probs)
    all_probs = torch.cat(all_probs, dim=0).cpu().numpy()
    return all_probs

def get_predictions_from_texts_calibrated(texts, calibrated_model, tokenizer, batch_size=16):
    all_probs = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i: i+batch_size]
        encoded = tokenizer(batch_texts, return_tensors="pt", 
                            padding="max_length", truncation=True, max_length=512)
        with torch.no_grad():
            probs = calibrated_model(input_ids=encoded["input_ids"], attention_mask=encoded["attention_mask"])
            all_probs.append(probs)
    all_probs = torch.cat(all_probs, dim=0).cpu().numpy()
    return all_probs

# Funcție pentru calculul ECE și MCE
def compute_calibration_metrics(probs, labels, n_bins=10):
    confidences = np.max(probs, axis=1)
    predictions = np.argmax(probs, axis=1)
    accuracies = (predictions == np.array(labels)).astype(np.float32)
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0
    mce = 0.0
    for i in range(n_bins):
        in_bin = (confidences >= bin_boundaries[i]) & (confidences < bin_boundaries[i+1])
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(accuracies[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            gap = abs(avg_confidence_in_bin - accuracy_in_bin)
            ece += gap * prop_in_bin
            mce = max(mce, gap)
    return ece, mce

# Obține predicțiile pentru modelul necalibrat
texts = df["text"].tolist()
labels = df["label"].tolist()
probs_uncalibrated = get_predictions_from_texts(texts, model, tokenizer, batch_size=2)
ece_uncalibrated, mce_uncalibrated = compute_calibration_metrics(probs_uncalibrated, labels, n_bins=10)
print("Model necalibrat:")
print(f"ECE: {ece_uncalibrated:.4f}")
print(f"MCE: {mce_uncalibrated:.4f}")

# Obține predicțiile pentru modelul calibrat
probs_calibrated = get_predictions_from_texts_calibrated(texts, calibrated_model, tokenizer, batch_size=2)
ece_calibrated, mce_calibrated = compute_calibration_metrics(probs_calibrated, labels, n_bins=10)
print("\nModel calibrat:")
print(f"ECE: {ece_calibrated:.4f}")
print(f"MCE: {mce_calibrated:.4f}")

NameError: name 'model' is not defined