# Imports

In [None]:
import sys, os
sys.path.append(os.path.abspath(".."))

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from src.predictive_models import (
    load_datasets,
    build_frequency_model,
    predict_frequency_model,
    # build_echo_chamber_model,
    # predict_echo_chamber_model,
    build_sensationalism_model,
    predict_sensationalism_model,
    # build_credibility_model,
    # predict_credibility_model
)


# Load LiarPlus Dataset

In [None]:
train_path = "../data/train_set.csv"
val_path = "../data/val_set.csv"
test_path = "../data/test_set.csv"
df_train, df_val, df_test = load_datasets(
    train_path, val_path, test_path
) 


In [None]:
df_train.head()

In [None]:
df_train.shape

In [None]:
df_val.shape

In [None]:
df_test.shape

# Frequency Heuristic

In [None]:
train_freq = df_train.copy()
val_freq   = df_val.copy()
test_freq  = df_test.copy()

In [None]:
model_freq, tfidf_freq, count_vec_freq, token_dict_freq, buzzwords_freq, le_freq = build_frequency_model(train_freq)

val_results = predict_frequency_model(val_freq, model_freq, tfidf_freq, count_vec_freq, token_dict_freq, buzzwords_freq, le_freq)
test_results = predict_frequency_model(test_freq, model_freq, tfidf_freq, count_vec_freq, token_dict_freq, buzzwords_freq, le_freq)

print(val_results.head())

# Echo Chamber

In [None]:
# train_echo = df_train.copy()
# val_echo   = df_val.copy()
# test_echo  = df_test.copy()

In [None]:
# model_echo, vectorizer_echo, le_echo, concentration_map = build_echo_chamber_model(train_echo)

# val_results_echo = predict_echo_chamber_model(val_echo, model_echo, vectorizer_echo, le_echo, concentration_map)
# print(val_results_echo.head())

# test_results_echo = predict_echo_chamber_model(test_echo, model_echo, vectorizer_echo, le_echo, concentration_map)
# print(test_results_echo.head())

# Sensationalism

In [None]:
train_sens = df_train.copy()
val_sens   = df_val.copy()
test_sens  = df_test.copy()

In [None]:
sens_pipeline, sens_meta, sens_num = build_sensationalism_model(train_sens, val_sens, test_sens)

val_results_sens = predict_sensationalism_model(val_sens, sens_pipeline, sens_meta, sens_num)
test_results_sens = predict_sensationalism_model(test_sens, sens_pipeline, sens_meta, sens_num)


In [None]:
print(val_results_sens.head())

In [None]:
print(test_results_sens.head())

# Credibility

In [None]:
# train_cred = df_train.copy()
# val_cred   = df_val.copy()
# test_cred  = df_test.copy()

In [None]:
# cred_pipeline, party_enc_cred = build_credibility_model(train_cred, val_cred, test_cred)

# val_results_cred = predict_credibility_model(val_cred, cred_pipeline, party_enc_cred)
# test_results_cred = predict_credibility_model(test_cred, cred_pipeline, party_enc_cred)


# Overview Dashboards

In [None]:
PALETTE = sns.color_palette("husl", 8)
_color_index = 0

In [None]:
def overview_dashboard(results_df, model_name, score_col, bins=20, kde=True):
    global _color_index
    if score_col not in results_df.columns:
        print(f"Column '{score_col}' not found in DataFrame.")
        return

    color = PALETTE[_color_index % len(PALETTE)]
    _color_index += 1

    plt.figure(figsize=(8, 5))
    sns.histplot(results_df[score_col], bins=bins, kde=kde, color=color)
    plt.title(f"{model_name} Score Distribution", fontsize=14, fontweight='bold', color=color)
    plt.xlabel("Score (0–1)")
    plt.ylabel("Count")
    plt.show()

    mean_score = results_df[score_col].mean()
    median_score = results_df[score_col].median()
    std_score = results_df[score_col].std()
    high_conf_pct = (results_df[score_col] > 0.8).mean() * 100

    print(f"{model_name} Summary Statistics:")
    print(f"   • Mean Score: {mean_score:.3f}")
    print(f"   • Median Score: {median_score:.3f}")
    print(f"   • Std. Deviation: {std_score:.3f}")
    print(f"   • % High-Confidence (Score > 0.8): {high_conf_pct:.1f}%")

In [None]:
overview_dashboard(test_results, "Frequency Heuristic", "frequency_heuristic_score")

In [None]:
# overview_dashboard(test_results_echo, "Echo Chamber", "echo_chamber_score")

In [None]:
overview_dashboard(test_results_sens, "Sensationalism", "sensationalism_score")

In [None]:
# overview_dashboard(test_results_cred, "Credibility", "credibility_score")

In [None]:
metrics_list = [
    {
        "Model": "Frequency Heuristic",
        "Accuracy": accuracy_score(df_test["label"], test_results["predicted_frequency_heuristic"]),
        "Precision": precision_score(df_test["label"], test_results["predicted_frequency_heuristic"], average='weighted', zero_division=0),
        "Recall": recall_score(df_test["label"], test_results["predicted_frequency_heuristic"], average='weighted', zero_division=0),
        "F1": f1_score(df_test["label"], test_results["predicted_frequency_heuristic"], average='weighted', zero_division=0)
    },
    {
        "Model": "Sensationalism",
        "Accuracy": accuracy_score(test_sens["sensationalism"], test_results_sens["predicted_sensationalism"]),
        "Precision": precision_score(test_sens["sensationalism"], test_results_sens["predicted_sensationalism"], average='weighted', zero_division=0),
        "Recall": recall_score(test_sens["sensationalism"], test_results_sens["predicted_sensationalism"], average='weighted', zero_division=0),
        "F1": f1_score(test_sens["sensationalism"], test_results_sens["predicted_sensationalism"], average='weighted', zero_division=0)
    },
    # {
    #     "Model": "Credibility",
    #     "Accuracy": accuracy_score(test_cred["credibility"], test_results_cred["predicted_credibility"]),
    #     "Precision": precision_score(test_cred["credibility"], test_results_cred["predicted_credibility"], average='weighted', zero_division=0),
    #     "Recall": recall_score(test_cred["credibility"], test_results_cred["predicted_credibility"], average='weighted', zero_division=0),
    #     "F1": f1_score(test_cred["credibility"], test_results_cred["predicted_credibility"], average='weighted', zero_division=0)
    # }
]

In [None]:
metrics_df = pd.DataFrame(metrics_list)
display(metrics_df.style.background_gradient(cmap="Greens").format(precision=3))

In [None]:
metrics_df.set_index("Model")[["Accuracy", "Precision", "Recall", "F1"]].plot(kind="bar", figsize=(8,5), colormap="Greens")
plt.title("Model Performance Comparison")
plt.ylabel("Score")
plt.ylim(0,1)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.show()

In [None]:
# echo_summary = {
#     "Mean Confidence": test_results_echo["echo_chamber_score"].mean(),
#     "Median Confidence": test_results_echo["echo_chamber_score"].median(),
#     "High-Confidence % (>0.8)": (test_results_echo["echo_chamber_score"] > 0.8).mean() * 100,
#     "Low-Confidence % (<0.5)": (test_results_echo["echo_chamber_score"] < 0.5).mean() * 100
# }

In [None]:
# print("Echo Chamber Confidence Summary:")
# for k, v in echo_summary.items():
#     print(f"   {k}: {v:.2f}")

In [None]:
# sns.histplot(test_results_echo["echo_chamber_score"], bins=20, kde=True, color="#6A0DAD")
# plt.title("Echo Chamber Confidence Distribution", fontsize=13)
# plt.xlabel("Confidence (0–1)")
# plt.ylabel("Count")
# plt.grid(alpha=0.3)
# plt.show()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
ConfusionMatrixDisplay.from_predictions(
    test_sens["sensationalism"],
    test_results_sens["predicted_sensationalism"],
    display_labels=["Low","Medium","High"],
    cmap="Blues", colorbar=False
)
plt.title("Sensationalism Confusion Matrix")
plt.show()

In [None]:
# ConfusionMatrixDisplay.from_predictions(
#     test_cred["credibility"],
#     test_results_cred["predicted_credibility"],
#     display_labels=["Low","Medium","High"],
#     cmap="Blues", colorbar=False
# )
# plt.title("Credibility Confusion Matrix")
# plt.show()

In [None]:
def plot_multiclass_roc(y_true, y_prob, model_name, n_classes=3, colors=None):
    from sklearn.preprocessing import label_binarize
    from sklearn.metrics import roc_curve, auc

    y_true_bin = label_binarize(y_true, classes=list(range(n_classes)))
    plt.figure(figsize=(7,5))

    if colors is None:
        colors = sns.color_palette("husl", n_classes)

    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_prob[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=2, color=colors[i], label=f"Class {i} (AUC = {roc_auc:.2f})")

    plt.plot([0,1],[0,1],'k--',lw=1)
    plt.title(f"{model_name} – ROC Curves")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()

In [None]:
X_test_freq = pd.DataFrame({
    "tfidf_mean": tfidf_freq.transform(df_test["statement"]).mean(axis=1).A1,
    "word_freq_mean": df_test["statement"].apply(lambda x: np.mean([token_dict_freq[w] for w in x.lower().split() if w in token_dict_freq]) if x else 0),
    "buzzword_score": df_test["statement"].apply(lambda x: sum(b in x.lower() for b in buzzwords_freq)),
    "repetition_score": df_test["statement"].apply(lambda x: 1 - len(set(x.lower().split()))/len(x.lower().split()) if x else 0)
}).fillna(0)

y_true_freq = le_freq.transform(df_test["label"])
y_prob_freq = model_freq.predict_proba(X_test_freq)

plot_multiclass_roc(y_true_freq, y_prob_freq, "Frequency Heuristic", n_classes=len(le_freq.classes_))

In [None]:
# X_test_cred = test_cred[["statement","party_encoded","expertise_level","subjectivity"]]
# y_true_cred = test_cred["credibility"]
# y_prob_cred = cred_pipeline.predict_proba(X_test_cred)

# plot_multiclass_roc(y_true_cred, y_prob_cred, "Credibility", n_classes=3)