In [76]:
import sys
import os
from pathlib import Path

project_root = Path.cwd().parent 
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.predictive_models import (
    load_datasets,
    build_frequency_model,
    predict_frequency_model,
    build_sensationalism_model,
    predict_sensationalism_model,
    build_malicious_account_model,
    predict_malicious_account_model,
    build_naive_realism_model,
    predict_naive_realism_model,
    map_sensationalism_from_counts,
    map_naive_realism_from_sentiment
)

import pandas as pd
import json
from sklearn.metrics import accuracy_score




# Ground Truth

In [122]:
ground_truth = pd.read_csv('../data/ground_truth.csv')
ground_truth

Unnamed: 0,statement,frequency_heuristic,malicious_account,sensationalism,naive_realism,link
0,A president steeped in constitutional lore mig...,2,0,2,2,https://www.cnn.com/2025/10/20/politics/trump-...
1,"Trump is 'committed' to $2,000 tariff dividend...",0,0,0,1,https://abc7.com/post/trump-is-committed-givin...
2,The longest US government shutdown in history ...,0,0,0,1,https://www.theguardian.com/us-news/2025/nov/1...
3,Add AP News as your preferred source to see mo...,0,0,0,0,https://apnews.com/article/redistricting-gerry...
4,"Key Points Prop 50 passed in California, appro...",1,0,1,0,https://azcapitoltimes.com/news/2025/11/12/pro...
5,"The bill will now head to the Senate, where it...",0,0,1,1,https://abcnews.go.com/Politics/house-vote-ful...
6,More than 6 in 10 registered voters said they ...,0,0,0,0,https://www.nbcnews.com/politics/politics-news...
7,"UK will not tolerate Chinese spying, minister ...",1,0,0,0,https://www.bbc.com/news/articles/c4gpnz05kr8o
8,Most Californians probably see the Capitol as ...,0,0,2,1,https://calmatters.org/commentary/2025/11/dana...
9,A new KFF/New York Times Survey of Immigrants ...,0,0,2,0,https://www.kff.org/racial-equity-and-health-p...


In [78]:
ground_truth.statement[0]



In [79]:
ground_truth.link[0]

'https://www.cnn.com/2025/10/20/politics/trump-no-kings-protests-vance-cia-analysis'

# Predictive Models

In [80]:
train_path = "../data/train_set.csv"
val_path = "../data/val_set.csv"
test_path = "../data/test_set.csv"
df_train, df_val, df_test = load_datasets(
    train_path, val_path, test_path
)

## Frequency Heuristic

### LiarPLUS Accuracy

In [81]:
train_freq = df_train.copy()
val_freq   = df_val.copy()
test_freq  = df_test.copy()

In [82]:
model, tfidf, count_vec, token_dict, buzzwords, label_encoder = build_frequency_model(train_freq)

val_pred_freq = predict_frequency_model(val_freq, model, tfidf, count_vec, token_dict, buzzwords, label_encoder)
test_pred_freq = predict_frequency_model(test_freq, model, tfidf, count_vec, token_dict, buzzwords, label_encoder)


In [83]:
label_to_score = {
    "true": 0,
    "mostly-true": 0,
    "half-true": 1,
    "barely-true": 2,
    "false": 2,
    "pants-on-fire": 2
}

val_freq["frequency_heuristic"] = [label_to_score.get(lbl, 1) for lbl in val_freq["label"]]
test_freq["frequency_heuristic"] = [label_to_score.get(lbl, 1) for lbl in test_freq["label"]]

freq_val_accuracy = accuracy_score(val_freq["frequency_heuristic"], val_pred_freq["predicted_frequency_heuristic"])
freq_test_accuracy = accuracy_score(test_freq["frequency_heuristic"], test_pred_freq["predicted_frequency_heuristic"])


In [84]:
print("Frequency Heuristic Val Accuracy:", freq_val_accuracy)
print("Frequency Heuristic Test Accuracy:", freq_test_accuracy)

Frequency Heuristic Val Accuracy: 0.36751497005988026
Frequency Heuristic Test Accuracy: 0.3769633507853403


### Ground Truth Accuracy

In [85]:
freq_pred_ground_truth = predict_frequency_model(ground_truth, model, tfidf, count_vec, token_dict, buzzwords, label_encoder)

In [86]:
freq_accuracy = accuracy_score(ground_truth["frequency_heuristic"], freq_pred_ground_truth["predicted_frequency_heuristic"])
print("Frequency Heuristic Ground Truth Accuracy:", freq_accuracy)

Frequency Heuristic Ground Truth Accuracy: 0.45


## Sensationliasm

### LiarPLUS Accuracy

In [87]:
train_sens = df_train.copy()
val_sens   = df_val.copy()
test_sens  = df_test.copy()

In [88]:
sens_model, sens_numeric_features = build_sensationalism_model(train_sens)

val_pred_sens = predict_sensationalism_model(val_sens, sens_model, sens_numeric_features)
test_pred_sens = predict_sensationalism_model(test_sens, sens_model, sens_numeric_features)


In [89]:
val_sens["sensationalism"]  = val_sens.apply(map_sensationalism_from_counts, axis=1)
test_sens["sensationalism"] = test_sens.apply(map_sensationalism_from_counts, axis=1)


In [90]:
sens_val_accuracy = accuracy_score(val_sens["sensationalism"], val_pred_sens['predicted_sensationalism'])
sens_test_accuracy = accuracy_score(test_sens["sensationalism"], test_pred_sens['predicted_sensationalism'])

print("Sensationalism Val Accuracy:", sens_val_accuracy)
print("Sensationalism Test Accuracy:", sens_test_accuracy)

Sensationalism Val Accuracy: 0.5112275449101796
Sensationalism Test Accuracy: 0.5130890052356021


### Ground Truth Accuracy

In [91]:
sens_pred_ground_truth = predict_sensationalism_model(ground_truth, sens_model, sens_numeric_features)

In [92]:
sens_accuracy = accuracy_score(ground_truth["sensationalism"], sens_pred_ground_truth["predicted_sensationalism"])
print("Sensationalism Ground Truth Accuracy:", sens_accuracy)

Sensationalism Ground Truth Accuracy: 0.55


## Malicious Account

### LiarPLUS Accuracy

In [93]:
train_mal = df_train.copy()
val_mal   = df_val.copy()
test_mal  = df_test.copy()

In [94]:
mal_model, mal_tfidf, mal_le = build_malicious_account_model(train_mal)

In [95]:
val_pred_mal = predict_malicious_account_model(val_mal, mal_model, mal_tfidf, mal_le)
test_pred_mal = predict_malicious_account_model(test_mal, mal_model, mal_tfidf, mal_le)


In [96]:
label_to_score = {
    "true": 0,
    "mostly-true": 0,
    "half-true": 1,
    "barely-true": 2,
    "false": 2,
    "pants-on-fire": 2
}

val_mal["malicious_account"]  = [label_to_score.get(lbl, 1) for lbl in val_mal["label"]]
test_mal["malicious_account"] = [label_to_score.get(lbl, 1) for lbl in test_mal["label"]]


In [97]:
mal_val_accuracy = accuracy_score(val_mal["malicious_account"], val_pred_mal['predicted_malicious_account'])

mal_test_accuracy = accuracy_score(test_mal["malicious_account"], test_pred_mal['predicted_malicious_account'])

print("Malicious Account Val Accuracy:", mal_val_accuracy)
print("Malicious Account Test Accuracy:", mal_test_accuracy)

Malicious Account Val Accuracy: 0.35254491017964074
Malicious Account Test Accuracy: 0.36350037397157814


### Ground Truth Accuracy

In [98]:
mal_pred_ground_truth = predict_malicious_account_model(ground_truth, mal_model, mal_tfidf, mal_le)

In [99]:
mal_accuracy = accuracy_score(ground_truth["malicious_account"], mal_pred_ground_truth["predicted_malicious_account"])
print("Malicious Account Ground Truth Accuracy:", mal_accuracy)

Malicious Account Ground Truth Accuracy: 0.7


## Naive Realism

### LiarPLUS Accuracy

In [100]:
train_nr = df_train.copy()
val_nr   = df_val.copy()
test_nr  = df_test.copy()

In [101]:
naive_model, naive_numeric_features = build_naive_realism_model(train_nr)


In [102]:
val_pred_nr = predict_naive_realism_model(val_nr, naive_model, naive_numeric_features)
test_pred_nr = predict_naive_realism_model(test_nr, naive_model, naive_numeric_features)


In [103]:
val_nr["naive_realism"]  = val_nr["statement"].apply(map_naive_realism_from_sentiment)
test_nr["naive_realism"] = test_nr["statement"].apply(map_naive_realism_from_sentiment)


In [104]:
nr_val_accuracy = accuracy_score(val_nr["naive_realism"], val_pred_nr['predicted_naive_realism'])
nr_test_accuracy = accuracy_score(test_nr["naive_realism"], test_pred_nr['predicted_naive_realism'])

print("Naive Realism Val Accuracy:", nr_val_accuracy)
print("Naive Realism Test Accuracy:", nr_test_accuracy)

Naive Realism Val Accuracy: 0.6781437125748503
Naive Realism Test Accuracy: 0.6768885564697082


### Ground Truth Accuracy

In [105]:
nr_pred_ground_truth = predict_naive_realism_model(ground_truth, naive_model, naive_numeric_features)


In [106]:
nr_accuracy = accuracy_score(ground_truth["naive_realism"], nr_pred_ground_truth["predicted_naive_realism"])
print("Naive Realism Ground Truth Accuracy:", nr_accuracy)

Naive Realism Ground Truth Accuracy: 0.4


# GenAI

## Base Prompt

In [154]:
base_df = pd.read_csv('../webapp/results/base_outputs.csv')
base_df.iloc[0]

id                                 e4fc0d08-70fb-47f3-b9f7-244fae6b6ac0
url                   https://www.cnn.com/2025/10/20/politics/trump-...
freq_score                                                            2
freq_reason           The article consistently repeats the central c...
freq_confidence                                                      95
mal_score                                                             0
mal_reason            The article itself, while highly partisan, fol...
mal_confidence                                                       90
sens_score                                                            2
sens_reason           The text is saturated with dramatic, emotional...
sens_confidence                                                     100
naive_score                                                           2
naive_reason          The article presents a single, un-nuanced pers...
naive_confidence                                                

In [108]:
freq_acc  = (ground_truth["frequency_heuristic"] == base_df["freq_score"]).mean()
mal_acc   = (ground_truth["malicious_account"] == base_df["mal_score"]).mean()
sens_acc  = (ground_truth["sensationalism"] == base_df["sens_score"]).mean()
naive_acc = (ground_truth["naive_realism"] == base_df["naive_score"]).mean()

In [109]:
print("Frequency heuristic accuracy:", freq_acc)
print("Malicious account accuracy:", mal_acc)
print("Sensationalism accuracy:", sens_acc)
print("Naive Realism accuracy:", naive_acc)


Frequency heuristic accuracy: 0.6
Malicious account accuracy: 0.8
Sensationalism accuracy: 0.45
Naive Realism accuracy: 0.6


## Chain of Thought

In [151]:
cot_df = pd.read_csv('../webapp/results/cot_outputs.csv')
cot_df.iloc[0]

id                                 846eba04-9a07-4db2-8e2b-bd22b5e637ee
url                   https://www.cnn.com/2025/10/20/politics/trump-...
freq_score                                                            2
freq_reason           The article heavily repeats the central narrat...
freq_confidence                                                     100
mal_score                                                             2
mal_reason            The article is a piece of speculative fiction ...
mal_confidence                                                      100
sens_score                                                            2
sens_reason           The article uses highly dramatic and exaggerat...
sens_confidence                                                     100
naive_score                                                           2
naive_reason          The article presents its critical perspective ...
naive_confidence                                                

In [111]:
freq_acc  = (ground_truth["frequency_heuristic"] == cot_df["freq_score"]).mean()
mal_acc   = (ground_truth["malicious_account"] == cot_df["mal_score"]).mean()
sens_acc  = (ground_truth["sensationalism"] == cot_df["sens_score"]).mean()
naive_acc = (ground_truth["naive_realism"] == cot_df["naive_score"]).mean()

In [112]:
print("Frequency heuristic accuracy:", freq_acc)
print("Malicious account accuracy:", mal_acc)
print("Sensationalism accuracy:", sens_acc)
print("Naive Realism accuracy:", naive_acc)

Frequency heuristic accuracy: 0.7
Malicious account accuracy: 0.5
Sensationalism accuracy: 0.35
Naive Realism accuracy: 0.45


## Fractal Chain of Thought

In [113]:
fcot_df = pd.read_csv('../webapp/results/fcot_outputs.csv')


In [153]:
fcot_df.iloc[0]

id                                 57c0b9f9-9da2-4a91-adb6-9899a13eb906
url                   https://www.cnn.com/2025/10/20/politics/trump-...
freq_score                                                            2
freq_reason           The article heavily and repeatedly centers on ...
freq_confidence                                                     100
mal_score                                                             0
mal_reason            I am disagreeing with the model's score of 2. ...
mal_confidence                                                       95
sens_score                                                            2
sens_reason           The article employs highly sensationalized and...
sens_confidence                                                     100
naive_score                                                           2
naive_reason          I am disagreeing with the model's score of 1 a...
naive_confidence                                                

In [114]:
freq_acc  = (ground_truth["frequency_heuristic"] == fcot_df["freq_score"]).mean()
mal_acc   = (ground_truth["malicious_account"] == fcot_df["mal_score"]).mean()
sens_acc  = (ground_truth["sensationalism"] == fcot_df["sens_score"]).mean()
naive_acc = (ground_truth["naive_realism"] == fcot_df["naive_score"]).mean()

In [115]:
print("Frequency heuristic accuracy:", freq_acc)
print("Malicious account accuracy:", mal_acc)
print("Sensationalism accuracy:", sens_acc)
print("Naive Realism accuracy:", naive_acc)

Frequency heuristic accuracy: 0.55
Malicious account accuracy: 0.55
Sensationalism accuracy: 0.6
Naive Realism accuracy: 0.7
