In [4]:
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import torch.nn.functional as F

saved_model_path = "./climate_fever_model"  
model = AutoModelForSequenceClassification.from_pretrained(saved_model_path)
tokenizer = AutoTokenizer.from_pretrained(saved_model_path)

label_map = {
    "SUPPORTS": "SUPPORTS",
    "REFUTES": "REFUTES",
    "NOT_ENOUGH_INFO": "UNDECIDED",
    "DISPUTED": "UNDECIDED"
}
label_mapping = ['SUPPORTS', 'REFUTES', 'UNDECIDED']  
label_to_index = {label: idx for idx, label in enumerate(label_mapping)} 

def predict_label_with_probs(claim):
    features = tokenizer(
        [claim], 
        padding='max_length', 
        truncation=True, 
        return_tensors="pt", 
        max_length=512
    )
    
    model.eval()
    with torch.no_grad():
        scores = model(**features).logits
        probs = F.softmax(scores, dim=-1)  
        predicted_label_idx = probs.argmax(dim=1).item()
        predicted_label = label_mapping[predicted_label_idx]
        probabilities = probs[0].tolist() 
    
    
    return predicted_label

dataset_path = "dataset\\fr_climate-fever-dataset-r1_period_maj_opus-mt-tc-big-en-fr_v2-unicode.jsonl"
true_labels = []
predicted_labels = []

with open(dataset_path, "r", encoding="utf-8") as file:
    for line in file:
        data = json.loads(line)
        claim = data["claim"]
        claim_label = data["claim_label"]
        
        if claim_label == 'DISPUTED':
            continue
        
        mapped_claim_label = label_map.get(claim_label, "neutral")  # Default to 'neutral' if label not found
        
        predicted_label = predict_label_with_probs(claim)
        
        print(f"predicted_label: {predicted_label}")
        true_labels.append(label_to_index[mapped_claim_label])
        predicted_labels.append(label_to_index[predicted_label])

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='weighted')  # Weighted average for multiclass
recall = recall_score(true_labels, predicted_labels, average='weighted')  # Weighted average for multiclass
f1 = f1_score(true_labels, predicted_labels, average='weighted')  # Weighted average for multiclass

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1 Score (weighted): {f1:.4f}")

Claim: Le réchauffement climatique pousse les ours polaires vers l’extinction.
Probabilities: [0.1859290599822998, 0.42708903551101685, 0.38698193430900574]
Predicted Label: REFUTES
predicted_label: REFUTES
Claim: Le soleil est entré dans un «verrouillage» qui pourrait provoquer un temps glacial, des tremblements de terre et la famine, disent les scientifiques.
Probabilities: [0.17503845691680908, 0.43944552540779114, 0.38551604747772217]
Predicted Label: REFUTES
predicted_label: REFUTES
Claim: La population d'ours polaires a augmenté.
Probabilities: [0.18599869310855865, 0.42838168144226074, 0.3856196403503418]
Predicted Label: REFUTES
predicted_label: REFUTES
Claim: Une étude ironique révèle que plus de CO2 a légèrement refroidi la planète.
Probabilities: [0.18654797971248627, 0.427424818277359, 0.38602718710899353]
Predicted Label: REFUTES
predicted_label: REFUTES
Claim: Les ajouts humains de CO2 se situent dans la marge d’erreur des mesures actuelles et l’augmentation progressive d

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [2]:
print(model.config.num_labels)  # Should print 3 if you're predicting 3 classes


3
