# Test BERT Models

In [7]:
import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

# 1. Încarcă dataset-ul
df = pd.read_csv("../datasets/WELFake_Dataset.csv")
df = df.dropna(subset=["text"])
texts = df["text"].tolist()
true_labels = df["label"].tolist()  # presupunem că etichetele sunt 0 sau 1

# 2. Setează batch size-ul pentru evaluare
batch_size = 16  # ajustează după necesitate

# 3. Listează căile către modele și etichetele pentru fiecare
model_paths = [
    "../saved_models/saved_bert_model",         # modelul de bază
    "../saved_models/saved_bert_model_fold_1",
    "../saved_models/saved_bert_model_fold_2",
    "../saved_models/saved_bert_model_fold_3",
    "../saved_models/saved_bert_model_fold_4",
    "../saved_models/saved_bert_model_fold_5"
]
model_names = ["base", "fold_1", "fold_2", "fold_3", "fold_4", "fold_5"]

# 4. Dicționar pentru a stoca metricile fiecărui model
metrics_dict = {}

# 5. Pentru fiecare model, încarcă-l, procesează datele în batch-uri și calculează metricile
for model_path, model_name in zip(model_paths, model_names):
    print(f"Evaluare model: {model_name}")
    
    # Încarcă modelul
    model = TFBertForSequenceClassification.from_pretrained(model_path)
    
    # Pentru modelul de bază, tokenizer-ul este salvat într-un folder separat
    if model_name == "base":
        tokenizer_path = "../saved_models/saved_bert_tokenizer"
    else:
        tokenizer_path = model_path
        
    tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
    
    # Tokenizează toate textele (toate înregistrările) deodată
    tokens = tokenizer(
        texts,
        max_length=128,
        padding="max_length",
        truncation=True,
        return_tensors="tf"
    )
    
    # Crează un tf.data.Dataset pentru evaluare, folosind batch-uri
    dataset = tf.data.Dataset.from_tensor_slices({
        "input_ids": tokens["input_ids"],
        "attention_mask": tokens["attention_mask"]
    }).batch(batch_size)
    
    # Obține predicțiile folosind metoda predict (acestea se procesează pe batch-uri)
    outputs = model.predict(dataset)
    logits = outputs.logits  # forma: (num_samples, 1)
    
    # Aplică funcția sigmoid pentru a transforma logits în probabilități și apoi pragul 0.5 pentru a obține predicțiile binare
    probs = tf.math.sigmoid(logits).numpy()
    preds = (probs > 0.5).astype(int).flatten()
    
    # Calculează metricile
    acc = accuracy_score(true_labels, preds)
    prec = precision_score(true_labels, preds, zero_division=0)
    rec = recall_score(true_labels, preds, zero_division=0)
    f1 = f1_score(true_labels, preds, zero_division=0)
    
    metrics_dict[model_name] = {
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1
    }
    
    print(f"Model: {model_name}")
    print(f"  Accuracy:  {acc:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall:    {rec:.4f}")
    print(f"  F1 Score:  {f1:.4f}\n")

# 6. Creează un grafic comparativ cu metricile pentru fiecare model
labels = model_names
x = np.arange(len(labels))
width = 0.2

accs = [metrics_dict[name]["accuracy"] for name in model_names]
precs = [metrics_dict[name]["precision"] for name in model_names]
recs = [metrics_dict[name]["recall"] for name in model_names]
f1s = [metrics_dict[name]["f1"] for name in model_names]

fig, ax = plt.subplots(figsize=(10,6))
ax.bar(x - 1.5*width, accs, width, label="Accuracy")
ax.bar(x - 0.5*width, precs, width, label="Precision")
ax.bar(x + 0.5*width, recs, width, label="Recall")
ax.bar(x + 1.5*width, f1s, width, label="F1 Score")

ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_ylim(0, 1.0)
ax.set_ylabel("Valoare metrică")
ax.set_title("Metrici de evaluare pentru modelele BERT pe întregul dataset")
ax.legend()

plt.show()


Evaluare model: base


Some layers from the model checkpoint at ../saved_models/saved_bert_model were not used when initializing TFBertForSequenceClassification: ['dropout_75']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at ../saved_models/saved_bert_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


Model: base
  Accuracy:  0.9942
  Precision: 0.9989
  Recall:    0.9899
  F1 Score:  0.9944

Evaluare model: fold_1


Some layers from the model checkpoint at ../saved_models/saved_bert_model_fold_1 were not used when initializing TFBertForSequenceClassification: ['dropout_37']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at ../saved_models/saved_bert_model_fold_1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


Model: fold_1
  Accuracy:  0.9958
  Precision: 0.9989
  Recall:    0.9929
  F1 Score:  0.9959

Evaluare model: fold_2


Some layers from the model checkpoint at ../saved_models/saved_bert_model_fold_2 were not used when initializing TFBertForSequenceClassification: ['dropout_75']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at ../saved_models/saved_bert_model_fold_2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


KeyboardInterrupt: 