In [29]:
import os
import pandas as pd
from misinformation_detector import MisinformationDetector

# ============================ STEP 1: CLEAN THE DATA ============================

# Define dataset path (change to "test.tsv" or "valid.tsv" as needed)
dataset_path = "data/combined_dataset.tsv"

# Column names in the LIAR dataset
column_names = [
    "id", "label", "statement", "subject", "speaker", "job_title", "state",
    "party_affiliation", "barely_true_counts", "false_counts", "half_true_counts",
    "mostly_true_counts", "pants_on_fire_counts", "context"
]

# Load dataset
df = pd.read_csv(dataset_path, sep='\t', names=column_names, index_col=False)

# Remove ambiguous labels (keeping only "true", "false", "pants-fire")
df = df[df["label"].isin(["true", "false", "pants-fire"])]

# Remove statements with less than 5 words (not informative)
df = df[df["statement"].str.split().str.len() >= 7]

# Remove statements with missing speakers
df = df.dropna(subset=["speaker"])

# ============================ STEP 2: RUN MISINFORMATION DETECTION ============================

# Load cleaned dataset
test_df = df

# Convert labels to binary misinformation classification
def convert_label(label):
    misinformation_labels = ["false", "pants-fire"]  # "true" is considered not misinformation
    return label in misinformation_labels

test_df["contains_misinformation"] = test_df["label"].apply(convert_label)
test_df = test_df[["statement", "contains_misinformation"]]

# Initialize the Misinformation Detector
detector = MisinformationDetector()

# Sample a subset of test data for analysis (to avoid excessive API calls)
test_sample = test_df.sample(n=100, random_state=53)

# Store results
analysis_results = []

# Run analysis on the test dataset
for index, row in test_sample.iterrows():
    statement = row["statement"]
    ground_truth = row["contains_misinformation"]

    # Run misinformation detection model
    result = detector.analyze_text(statement)

    # Store results
    analysis_results.append({
        "statement": statement,
        "ground_truth": ground_truth,
        "model_prediction": result["contains_misinformation"],
        "confidence_score": result["confidence_score"],
        "detected_criteria": result["detected_criteria"],
        "explanation": result["explanation"],
        "prompt_for_context": result["prompt_for_context"]
    })

# Convert results to DataFrame and save
results_df = pd.DataFrame(analysis_results)
results_df.to_csv("analysis_results.csv", index=False)

print("Analysis completed. Results saved in 'analysis_results.csv'.")


Analysis completed. Results saved in 'analysis_results.csv'.


In [36]:
import pandas as pd
df = pd.read_csv("analysis_results.csv")
df.head()


Unnamed: 0,statement,ground_truth,model_prediction,confidence_score,detected_criteria,explanation,prompt_for_context
0,The actual deportations from the interior of t...,False,True,0.7,[1],The statement claims that deportations from th...,False
1,Martin Luther King Jr. was a Republican!,True,True,0.9,"[1, 2]",The claim that Martin Luther King Jr. was a Re...,False
2,You are three times more likely to be able to ...,True,False,0.8,[],The statement appears to be a factual claim ba...,True
3,"Under Donald Trumps tax plan, 51 percent of si...",False,True,0.8,"[1, 2]",The statement makes a specific claim about the...,True
4,Says the Patient Protection and Affordable Car...,True,True,0.8,"[1, 2, 5]",The text makes a claim about the Patient Prote...,True


In [37]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load the analysis results
df = pd.read_csv("analysis_results.csv")

# Extract ground truth and model predictions
y_true = df["ground_truth"].astype(int)  # Convert boolean to int (0 or 1)
y_pred = df["model_prediction"].astype(int)  # Convert boolean to int (0 or 1)
y_scores = df["confidence_score"]  # Confidence scores from the model

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
roc_auc = roc_auc_score(y_true, y_scores)

# Print results
print("Misinformation Detection Benchmark Results:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC-AUC Score: {roc_auc:.4f}")


Misinformation Detection Benchmark Results:
Accuracy: 0.6100
Precision: 0.6585
Recall: 0.8308
F1-score: 0.7347
ROC-AUC Score: 0.6057


In [38]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load analysis results
df = pd.read_csv("analysis_results.csv")

# Extract ground truth and confidence scores
y_true = df["ground_truth"].astype(int)
y_scores = df["confidence_score"]  # Model’s confidence score

# 🔹 Set a higher threshold for classifying as misinformation
threshold = 0.75  # Change this value (0.7, 0.8, etc.) to test different thresholds
y_pred = (y_scores >= threshold).astype(int)

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
roc_auc = roc_auc_score(y_true, y_scores)

# Print updated benchmark results
print(f"🔍 Misinformation Detection Benchmark Results (Threshold={threshold}):")
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"🎯 Precision: {precision:.4f}")
print(f"📈 Recall: {recall:.4f}")
print(f"⚖️  F1-score: {f1:.4f}")
print(f"📊 ROC-AUC Score: {roc_auc:.4f}")


🔍 Misinformation Detection Benchmark Results (Threshold=0.75):
✅ Accuracy: 0.6700
🎯 Precision: 0.6905
📈 Recall: 0.8923
⚖️  F1-score: 0.7785
📊 ROC-AUC Score: 0.6057


In [39]:
# Find misclassified cases
df["predicted"] = y_pred

# 🔹 False Positives: Statements incorrectly flagged as misinformation
false_positives = df[(df["ground_truth"] == 0) & (df["predicted"] == 1)]

# 🔹 False Negatives: Misinformation that was missed
false_negatives = df[(df["ground_truth"] == 1) & (df["predicted"] == 0)]

# Save misclassified cases for review
false_positives.to_csv("false_positives.csv", index=False)
false_negatives.to_csv("false_negatives.csv", index=False)

print("🔍 False positives saved to 'false_positives.csv'.")
print("🔍 False negatives saved to 'false_negatives.csv'.")


🔍 False positives saved to 'false_positives.csv'.
🔍 False negatives saved to 'false_negatives.csv'.


In [40]:
from sklearn.preprocessing import MinMaxScaler

# Normalize confidence scores to 0-1 range
scaler = MinMaxScaler()
df["confidence_score"] = scaler.fit_transform(df[["confidence_score"]])

# Save the updated analysis results
df.to_csv("normalized_analysis_results.csv", index=False)

print("✅ Confidence scores normalized and saved as 'normalized_analysis_results.csv'.")


✅ Confidence scores normalized and saved as 'normalized_analysis_results.csv'.


In [41]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import MinMaxScaler

# ===================== STEP 1: LOAD ANALYSIS RESULTS =====================

# Load analysis results
df = pd.read_csv("analysis_results.csv")

# Extract ground truth and confidence scores
y_true = df["ground_truth"].astype(int)
y_scores = df["confidence_score"]  # Model’s confidence scores

# ===================== STEP 2: THRESHOLD TUNING =====================

# 🔹 Try different thresholds (adjust these values)
thresholds = [0.6, 0.7, 0.75, 0.8]

# Store benchmark results for each threshold
benchmark_results = []

for threshold in thresholds:
    # Apply threshold to make predictions
    y_pred = (y_scores >= threshold).astype(int)

    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)  # Avoid division errors
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_scores)

    # Store results
    benchmark_results.append({
        "Threshold": threshold,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-score": f1,
        "ROC-AUC": roc_auc
    })

    print(f"\n🔍 Benchmark Results (Threshold={threshold}):")
    print(f"✅ Accuracy: {accuracy:.4f}")
    print(f"🎯 Precision: {precision:.4f}")
    print(f"📈 Recall: {recall:.4f}")
    print(f"⚖️  F1-score: {f1:.4f}")
    print(f"📊 ROC-AUC Score: {roc_auc:.4f}")

# Save threshold tuning results
pd.DataFrame(benchmark_results).to_csv("threshold_tuning_results.csv", index=False)
print("\n✅ Threshold tuning results saved as 'threshold_tuning_results.csv'.")

# ===================== STEP 3: FIND MISCLASSIFIED CASES =====================

# Apply best threshold (adjust if needed based on previous results)
best_threshold = 0.75
df["predicted"] = (df["confidence_score"] >= best_threshold).astype(int)

# 🔹 False Positives: Statements incorrectly flagged as misinformation
false_positives = df[(df["ground_truth"] == 0) & (df["predicted"] == 1)]

# 🔹 False Negatives: Misinformation that was missed
false_negatives = df[(df["ground_truth"] == 1) & (df["predicted"] == 0)]

# Save misclassified cases
false_positives.to_csv("false_positives.csv", index=False)
false_negatives.to_csv("false_negatives.csv", index=False)

print("\n🔍 Misclassified cases saved:")
print("⚠️  False positives saved to 'false_positives.csv'.")
print("⚠️  False negatives saved to 'false_negatives.csv'.")

# ===================== STEP 4: NORMALIZE CONFIDENCE SCORES =====================

# Normalize confidence scores to 0-1 range
scaler = MinMaxScaler()
df["normalized_confidence"] = scaler.fit_transform(df[["confidence_score"]])

# Save updated dataset
df.to_csv("normalized_analysis_results.csv", index=False)
print("\n✅ Confidence scores normalized and saved as 'normalized_analysis_results.csv'.")



🔍 Benchmark Results (Threshold=0.6):
✅ Accuracy: 0.6400
🎯 Precision: 0.6465
📈 Recall: 0.9846
⚖️  F1-score: 0.7805
📊 ROC-AUC Score: 0.6057

🔍 Benchmark Results (Threshold=0.7):
✅ Accuracy: 0.6400
🎯 Precision: 0.6465
📈 Recall: 0.9846
⚖️  F1-score: 0.7805
📊 ROC-AUC Score: 0.6057

🔍 Benchmark Results (Threshold=0.75):
✅ Accuracy: 0.6700
🎯 Precision: 0.6905
📈 Recall: 0.8923
⚖️  F1-score: 0.7785
📊 ROC-AUC Score: 0.6057

🔍 Benchmark Results (Threshold=0.8):
✅ Accuracy: 0.6700
🎯 Precision: 0.6905
📈 Recall: 0.8923
⚖️  F1-score: 0.7785
📊 ROC-AUC Score: 0.6057

✅ Threshold tuning results saved as 'threshold_tuning_results.csv'.

🔍 Misclassified cases saved:
⚠️  False positives saved to 'false_positives.csv'.
⚠️  False negatives saved to 'false_negatives.csv'.

✅ Confidence scores normalized and saved as 'normalized_analysis_results.csv'.
