In [None]:
# Op-Ed Analysis: David Brooks - "Don't Fear AI"
# This notebook analyzes op-eds using three components:
# 1. Claim extraction - identifies the N most central claims
# 2. Claim coherence - analyzes how claims affect each other's likelihood
# 3. External fact checking - verifies claims against external sources

In [None]:
# Import modules
from pathlib import Path
import json
from claim_extractor import extract_claims
from claim_coherence import analyze_coherence, format_coherence_matrix
from external_fact_checking import check_facts, get_fact_check_summary

In [None]:
# Load the op-ed documents
documents = [
    {
        "id": "david_brooks_dont_fear_ai",
        "path": "data/david_brooks_dont_fear_ai.txt"
    },
    {
        "id": "yud_shut_it_down",
        "path": "data/yud_shut_it_down.txt"
    },
    # Add more documents here as needed
]

# Load document texts
docs_data = []
for doc_info in documents:
    doc_path = Path(doc_info["path"])
    doc_text = doc_path.read_text()
    docs_data.append({
        "id": doc_info["id"],
        "text": doc_text
    })
    print(f"Loaded {doc_info['id']}: {len(doc_text)} characters")

print(f"\nTotal documents loaded: {len(docs_data)}")

In [None]:
# Extract Central Claims

In [None]:
# Extract claims from each document
all_claims = []

for doc_data in docs_data:
    doc_claims = extract_claims(doc_data["text"], doc_data["id"], n=5)
    all_claims.extend(doc_claims)
    print(f"Extracted {len(doc_claims)} claims from {doc_data['id']}")

print(f"\nTotal claims extracted: {len(all_claims)}")
print("\nAll claims:")
for claim in all_claims:
    print(f"{claim.doc_id}[{claim.claim_idx}]: {claim.claim[:100]}...")

In [None]:
# Analyze Claim Coherence

In [None]:
# Analyze coherence between all claims (across all documents)
coherence_results = analyze_coherence(all_claims)

print(f"Analyzed {len(coherence_results)} claim relationships across all documents")

In [None]:
# Analyze coherence locally with improved conflict metrics
def get_conflict_metrics(coherence_results, claims):
    """Calculate conflict metrics for coherence analysis."""
    negative_rels = [c for c in coherence_results if c.delta_prob < 0]
    total_rels = len(claims) * (len(claims) - 1)
    
    if not total_rels:
        return {"conflict_prevalence": 0, "avg_conflict_intensity": 0, "max_conflict": 0}
    
    return {
        "conflict_prevalence": len(negative_rels) / total_rels,
        "avg_conflict_intensity": sum(abs(c.delta_prob) for c in negative_rels) / len(negative_rels) if negative_rels else 0,
        "max_conflict": min((c.delta_prob for c in negative_rels), default=0)
    }

def get_top_load_bearing_claims(coherence_results, claims, n=3):
    """Get claims with highest total impact (absolute magnitude) on other claims."""
    impact_scores = {}
    impact_counts = {}
    
    for c in coherence_results:
        if c.claim_i_idx not in impact_scores:
            impact_scores[c.claim_i_idx] = 0
            impact_counts[c.claim_i_idx] = 0
        impact_scores[c.claim_i_idx] += abs(c.delta_prob)  # Use absolute magnitude
        impact_counts[c.claim_i_idx] += 1
    
    avg_impact = {idx: impact_scores[idx] / impact_counts[idx] for idx in impact_scores}
    top_indices = sorted(avg_impact.items(), key=lambda x: x[1], reverse=True)[:n]
    
    return [
        {
            "claim": claims[idx].claim,
            "doc_id": claims[idx].doc_id,
            "claim_idx": claims[idx].claim_idx,
            "avg_impact": score,
            "total_impact": impact_scores[idx],
            "num_relationships": impact_counts[idx]
        }
        for idx, score in top_indices
    ]

# Calculate metrics
conflict_metrics = get_conflict_metrics(coherence_results, all_claims)
top_load_bearing = get_top_load_bearing_claims(coherence_results, all_claims)

print("COHERENCE ANALYSIS\n")

print("CONFLICT METRICS:")
print(f"  Prevalence: {conflict_metrics['conflict_prevalence']:.1%}")
print(f"  Avg Intensity: {conflict_metrics['avg_conflict_intensity']:.2f}")
print(f"  Max Conflict: {conflict_metrics['max_conflict']:.2f}")

print(f"\nTOP LOAD-BEARING CLAIMS:")
for i, claim_info in enumerate(top_load_bearing, 1):
    print(f"{i}. {claim_info['doc_id']}[{claim_info['claim_idx']}] (avg impact: {claim_info['avg_impact']:.2f}, total: {claim_info['total_impact']:.1f})")
    print(f"   {claim_info['claim'][:120]}...")

In [None]:
# View coherence as a clean formatted matrix
print(format_coherence_matrix(coherence_results, all_claims))

In [None]:
# External Fact Checking

In [None]:
# Check facts for all claims (across all documents)
fact_checks = check_facts(all_claims)

print(f"Fact-checked {len(fact_checks)} claims across all documents")

In [None]:
# Get fact check summary
fact_summary = get_fact_check_summary(fact_checks)

print("FACT CHECK SUMMARY\n")
print(f"Average Veracity: {fact_summary['average_veracity']:.1f}/100\n")

if fact_summary['most_accurate_claims']:
    print("Most Accurate Claims:")
    for i, claim_info in enumerate(fact_summary['most_accurate_claims'], 1):
        print(f"{i}. Veracity: {claim_info['veracity']}/100")
        print(f"   {claim_info['claim'][:100]}...")
        print(f"   {claim_info['explanation'][:150]}...")

if fact_summary['least_accurate_claims']:
    print("\nLeast Accurate Claims:")
    for i, claim_info in enumerate(fact_summary['least_accurate_claims'], 1):
        print(f"{i}. Veracity: {claim_info['veracity']}/100")
        print(f"   {claim_info['claim'][:100]}...")
        print(f"   {claim_info['explanation'][:150]}...")
        if claim_info.get('sources'):
            print(f"   Sources: {', '.join(claim_info['sources'][:3])}")