In [39]:
import pandas as pd
import torch
import os
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split


In [40]:
gossipcop_fake = pd.read_csv("../dataset/gossipcop_fake.csv")
gossipcop_fake['label'] = 1

gossipcop_real = pd.read_csv("../dataset/gossipcop_real.csv")
gossipcop_real['label'] = 0

politifact_fake = pd.read_csv("../dataset/politifact_fake.csv")
politifact_fake['label'] = 1

politifact_real = pd.read_csv("../dataset/politifact_real.csv")
politifact_real['label'] = 0

liar_dataset = pd.read_csv("../dataset/liar_dataset/valid_binary_reassigned.csv")

In [41]:
os.makedirs("./results/gossipcop", exist_ok=True)
os.makedirs("./results/politifact", exist_ok=True)
os.makedirs("./results/liar_dataset", exist_ok=True)
os.makedirs("./results/politifact_gossipcop", exist_ok=True)
os.makedirs("./results/all_three", exist_ok=True)
os.makedirs("./logs", exist_ok=True)

In [42]:
df_gossipcop = pd.concat([gossipcop_fake, gossipcop_real], ignore_index=True)
df_politifact = pd.concat([politifact_fake, politifact_real], ignore_index=True)
df_politifact_gossipcop = pd.concat([gossipcop_fake, gossipcop_real, politifact_fake, politifact_real], ignore_index=True)
df_liar_dataset = liar_dataset[['title', 'label']]
# Now concatenate
df_all_three = pd.concat([df_politifact_gossipcop, df_liar_dataset], ignore_index=True)

# Final column selection
df_gossipcop = df_gossipcop[['title', 'label']]
df_politifact = df_politifact[['title', 'label']]
df_politifact_gossipcop = df_politifact_gossipcop[['title', 'label']]
df_all_three = df_all_three[['title', 'label']]


# Save the combined dataset
df_gossipcop.to_csv("gossipcop_dataset.csv", index=False)
df_politifact.to_csv("politifact_dataset.csv", index=False)
df_politifact_gossipcop.to_csv("politifact_gossipcop.csv", index=False)
df_all_three.to_csv("all_three.csv")

In [43]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [44]:
from torch.utils.data import Dataset
import torch

class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


In [45]:
def prepare_datasets(df, tokenizer):
    train_texts, val_texts, train_labels, val_labels = train_test_split(
        df['title'].tolist(), df['label'].tolist(), test_size=0.2, random_state=42
    )
    
    train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
    val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)
    
    train_dataset = NewsDataset(train_encodings, train_labels)
    val_dataset = NewsDataset(val_encodings, val_labels)
    
    return train_dataset, val_dataset


In [None]:
def train_model(train_dataset, val_dataset, output_dir, model_name):
    # Initialize model
    model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
    
    # Set up training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        eval_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        logging_dir="./logs",
        logging_steps=100,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        report_to="none"  
    )
    
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
    )
    
    # Train model
    print(f"Training model: {model_name}")
    trainer.train()
    
    # Save model
    model.save_pretrained(f"{output_dir}/{model_name}")
    tokenizer.save_pretrained(f"{output_dir}/{model_name}")
    
    # Evaluate model
    eval_result = trainer.evaluate()
    print(f"Evaluation results for {model_name}:")
    print(eval_result)
    
    return model, eval_result

In [47]:
# Prepare datasets
# print("Preparing GossipCop datasets...")
# gossipcop_train_dataset, gossipcop_val_dataset = prepare_datasets(df_gossipcop, tokenizer)

# print("Preparing PolitiFact datasets...")
# politifact_train_dataset, politifact_val_dataset = prepare_datasets(df_politifact, tokenizer)

# print("Preparing politifact_gossipcop datasets...")
# combined_train_dataset, combined_val_dataset = prepare_datasets(df_politifact_gossipcop, tokenizer)

print("Preparing df_liar_dataset")
liar_dataset_test_dataset, liar_dataset_val_dataset = prepare_datasets(df_liar_dataset, tokenizer)

print("Preparing all_three_val datasets...")
all_three_test_dataset, all_three_val_dataset = prepare_datasets(df_all_three, tokenizer)

Preparing df_liar_dataset
Preparing all_three_val datasets...


In [48]:
# gossipcop_model, gossipcop_results = train_model(
#     gossipcop_train_dataset, 
#     gossipcop_val_dataset, 
#     "./results/gossipcop", 
#     "gossipcop_model"
# )

In [49]:
# politifact_model, politifact_results = train_model(
#     politifact_train_dataset, 
#     politifact_val_dataset, 
#     "./results/politifact", 
#     "politifact_model"
# )

In [50]:
# combined_model, combined_results = train_model(
#     combined_train_dataset, 
#     combined_val_dataset, 
#     "./results/combined", 
#     "combined_model"
# )

In [51]:
liar_model, liar_results = train_model(
    liar_dataset_test_dataset, 
    liar_dataset_val_dataset, 
    "./results/liar_dataset", 
    "liar_dataset_model"
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model: liar_dataset_model


Epoch,Training Loss,Validation Loss
1,0.696,0.701899
2,0.6277,0.762765
3,0.4455,1.27664


Evaluation results for liar_dataset_model:
{'eval_loss': 0.7018986940383911, 'eval_runtime': 10.953, 'eval_samples_per_second': 23.464, 'eval_steps_per_second': 3.013, 'epoch': 3.0}


In [52]:
all_three_model, all_three_results = train_model(
    all_three_test_dataset, 
    all_three_val_dataset, 
    "./results/all_three", 
    "all_three_model"
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model: all_three_model


Epoch,Training Loss,Validation Loss
1,0.3989,0.426205
2,0.3307,0.474835
3,0.1391,0.638294


Evaluation results for all_three_model:
{'eval_loss': 0.4262050986289978, 'eval_runtime': 209.9611, 'eval_samples_per_second': 23.319, 'eval_steps_per_second': 2.915, 'epoch': 3.0}


In [10]:
from evaluation import evaluate_model, load_model_and_tokenizer
from sklearn.model_selection import train_test_split
import pandas as pd

# Define full dataset paths instead of test files
models_to_evaluate = [
    {
        "name": "GossipCop",
        "model_path": "./results/gossipcop/gossipcop_model",
        "full_data": "../dataset/gossipcop_dataset.csv"
    },
        {
        "name": "combined",
        "model_path": "./results/combined/combined_model",
        "full_data": "../dataset/combined.csv"
    },
    {
        "name": "PolitiFact",
        "model_path": "./results/politifact/politifact_model",
        "full_data": "../dataset/politifact_dataset.csv"
    },
    {
        "name": "LIAR Dataset",
        "model_path": "./results/liar_dataset/liar_dataset_model",
        "full_data": "../dataset/liar_dataset/valid_binary_reassigned.csv"
    },
    {
        "name": "All Three Combined",
        "model_path": "./results/all_three/all_three_model",
        "full_data": "../dataset/all_three.csv"
    },
]

# Loop through and evaluate each model using a fresh test split
for model_info in models_to_evaluate:
    print(f"\n🔍 Evaluating model: {model_info['name']}")

    # Load model + tokenizer
    model, tokenizer = load_model_and_tokenizer(model_info["model_path"])

    # Load full dataset and split
    full_df = pd.read_csv(model_info["full_data"])
    _, test_df = train_test_split(full_df, test_size=0.2, random_state=42)

    # Evaluate
    acc, precision, recall, f1 = evaluate_model(model, tokenizer, test_df)

    print(f"✅ {model_info['name']} Results:")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")



🔍 Evaluating model: GossipCop


100%|██████████| 277/277 [01:57<00:00,  2.36it/s]


✅ GossipCop Results:
Accuracy:  0.8708
Precision: 0.7868
Recall:    0.6364
F1 Score:  0.7036

🔍 Evaluating model: combined


100%|██████████| 290/290 [02:06<00:00,  2.29it/s]


✅ combined Results:
Accuracy:  0.8425
Precision: 0.6999
Recall:    0.6359
F1 Score:  0.6664

🔍 Evaluating model: PolitiFact


100%|██████████| 14/14 [00:05<00:00,  2.39it/s]


✅ PolitiFact Results:
Accuracy:  0.8585
Precision: 0.8036
Recall:    0.9184
F1 Score:  0.8571

🔍 Evaluating model: LIAR Dataset


100%|██████████| 17/17 [00:10<00:00,  1.55it/s]


✅ LIAR Dataset Results:
Accuracy:  0.5175
Precision: 0.5185
Recall:    0.3307
F1 Score:  0.4038

🔍 Evaluating model: All Three Combined


100%|██████████| 306/306 [02:14<00:00,  2.28it/s]

✅ All Three Combined Results:
Accuracy:  0.8356
Precision: 0.8026
Recall:    0.4802
F1 Score:  0.6009



