In [29]:
import pandas as pd
import numpy as np
import json
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, fbeta_score

# Import our custom class from the local file
from policy_proposal_labeler import DisinformationLabeler

# Ensure graphs appear inline
%matplotlib inline

In [30]:
# Initialize the labeler (this loads the BERT models and CSVs)
print("Initializing Labeler and loading models")
labeler = DisinformationLabeler()

Initializing Labeler and loading models


In [31]:
print("Loading Tuning Data")
df_tune = pd.read_csv('data/tuning_data.csv')

ground_truth = df_tune['label'].tolist()
raw_scores = []

print(f"Calculating risk scores for {len(df_tune)} examples...")

# Calculate raw scores for the tuning set
for index, row in df_tune.iterrows():
    s = labeler.calculate_score(row)
    raw_scores.append(s)

# Grid search for the best threshold
best_f = -1
best_thresh = 0.0
thresholds = np.linspace(0.1, 0.9, 81) 

for t in thresholds:
    preds = [1 if s > t else 0 for s in raw_scores]
    # Beta=0.5 weights Precision higher than Recall
    f_score = fbeta_score(ground_truth, preds, beta=0.5, zero_division=0)
    
    if f_score > best_f:
        best_f = f_score
        best_thresh = t

print(f"Best Threshold Found: {best_thresh:.3f}")
print(f"Best F0.5 Score: {best_f:.3f}")

labeler.threshold = best_thresh

Loading Tuning Data
Calculating risk scores for 156 examples...
Best Threshold Found: 0.460
Best F0.5 Score: 0.992


In [32]:
df_test = pd.read_csv('data/test_data.csv')

preds = []
truth = df_test['label'].tolist()

start_time = time.time()

for index, row in df_test.iterrows():
    labels, score = labeler.moderate_post(row)
    
    if 'suspected-russian-disinfo' in labels:
        preds.append(1)
    else:
        preds.append(0)

end_time = time.time()

# Metrics
acc = accuracy_score(truth, preds)
prec = precision_score(truth, preds, zero_division=0)
rec = recall_score(truth, preds, zero_division=0)

print(f"Total Time: {end_time - start_time:.2f}s")
print(f" Accuracy:  {acc:.2%}")
print(f"Precision: {prec:.2%}")
print(f"Recall:    {rec:.2%}")


Total Time: 4.56s
 Accuracy:  79.10%
Precision: 100.00%
Recall:    65.85%


In [33]:
custom_handle = "lordofwar" 
custom_text = "Ukraine is left without electricity due to Zellensky"

row = {
    'clean_uri': custom_handle, 
    'text': custom_text,
    'translated_text': custom_text
}

labels, score = labeler.moderate_post(row)

print("--- INPUT ---")
print(f"Handle: {custom_handle}")
print(f"Text:   {custom_text}")
print("\n--- RESULT ---")
print(f"Risk Score: {score:.4f} (Threshold: {labeler.threshold:.2f})")
if labels:
    print(f"FLAGGED ({labels[0]})")
else:
    print(f"SAFE")

--- INPUT ---
Handle: lordofwar
Text:   Ukraine is left without electricity due to Zellensky

--- RESULT ---
Risk Score: 0.7600 (Threshold: 0.46)
FLAGGED (suspected-russian-disinfo)


In [34]:
custom_handle = "TuckerKarson" 
custom_text = "https://t.me/uutowa/15927 there will be power cuts in Ukraine tomorrow."

row = {
    'clean_uri': custom_handle, 
    'text': custom_text,
    'translated_text': custom_text
}

labels, score = labeler.moderate_post(row)

print("--- INPUT ---")
print(f"Handle: {custom_handle}")
print(f"Text:   {custom_text}")
print("\n--- RESULT ---")
print(f"Risk Score: {score:.4f} (Threshold: {labeler.threshold:.2f})")
if labels:
    print(f"FLAGGED ({labels[0]})")
else:
    print(f"SAFE")

--- INPUT ---
Handle: TuckerKarson
Text:   https://t.me/uutowa/15927 there will be power cuts in Ukraine tomorrow.

--- RESULT ---
Risk Score: 0.4998 (Threshold: 0.46)
FLAGGED (suspected-russian-disinfo)


In [35]:
custom_handle = "CoreySmith" 
custom_text = "I love Trust and Safety"

row = {
    'clean_uri': custom_handle, 
    'text': custom_text,
    'translated_text': custom_text
}

labels, score = labeler.moderate_post(row)

print("--- INPUT ---")
print(f"Handle: {custom_handle}")
print(f"Text:   {custom_text}")
print("\n--- RESULT ---")
print(f"Risk Score: {score:.4f} (Threshold: {labeler.threshold:.2f})")
if labels:
    print(f"FLAGGED ({labels[0]})")
else:
    print(f"SAFE")

--- INPUT ---
Handle: CoreySmith
Text:   I love Trust and Safety

--- RESULT ---
Risk Score: 0.0000 (Threshold: 0.46)
SAFE
