In [None]:
import mwapi
from mwapi.errors import APIError

# This script gets all English Wikipedia pages in Category:Wikipedia controversial topics

def query():
    pages = []
    session = mwapi.Session(
        'https://en.wikipedia.org',
        user_agent='mwapi sync demo'
    )
    try:
        # Use continuation to fetch all pages in the category
        for portion in session.get(
            formatversion=2,
            action='query',
            generator='categorymembers',
            gcmtitle='Category:Wikipedia controversial topics',
            gcmlimit=100,  # 100 results per request
            continuation=True
        ):
            if 'query' in portion and 'pages' in portion['query']:
                for page in portion['query']['pages']:
                    pages.append(page['title'])
            else:
                print("MediaWiki returned empty result batch.")
    except APIError as error:
        raise ValueError(
            "MediaWiki returned an error:", str(error)
        )
    print("Fetched {} pages".format(len(pages)))
    return pages

In [None]:
controversial_pages = query()

In [None]:
controversial_pages

In [None]:
import re

controversial_page_set = set()

for p in controversial_pages:
    if 'File talk:' in p or 'User talk:' in p or 'Template:' in p or 'Template talk:' in p or 'Wikipedia:' in p or 'Wikipedia talk:' in p:
        continue

    p = p.replace('Talk:', '')
    if re.search(r'Archive \d+', p):
        p = re.sub(r'Archive \d+', '', p)

    controversial_page_set.add(p.strip().lower())

In [None]:
len(controversial_page_set)

In [None]:
import pickle as pkl

grok_idx = pkl.load(open('../results/cached_grok_idx.pkl', 'rb'))

In [None]:
controversial_pages_in_grokipedia = controversial_page_set.intersection(grok_idx.keys())

In [None]:
with open('../results/controversial_pages_in_grokipedia.txt', 'w') as f:
    for page in controversial_pages_in_grokipedia:
        f.write(page + '\n')


## Citation analysis

In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch

In [None]:
reliability_df = pd.read_csv(f'../supplemental_data/perennial_sources_enwiki/enwiki_perennial_list.csv')
reliability_df['domain'] = reliability_df['source']
reliability_df = reliability_df[['domain', 'status']]
similarities = pd.read_parquet('../results/embeddings_similarities_pairwise_top1_alignments.parquet')

In [None]:
def find_reliability_shift_maximal_controversial(
    reliability_df, 
    controversial_path='../results/controversial_pages_in_grokipedia.txt',
    result_dir='../results', 
    fsuffix='_domains.json'
):
    """
    Find articles (limited to controversial articles) where
    (WP_reliable - WP_unreliable) - (grok_unreliable - grok_reliable) is maximal.

    Args:
        reliability_df: DataFrame with 'domain' and 'status' columns
        controversial_path: Path to file with list of controversial articles (one per line, name already normalized)
        result_dir: Directory containing wp_domains.json and grok_domains.json

    Returns:
        List of tuples with article shift data, sorted by the maximal function
    """

    # Load controversial articles set (assume already normalized, as written before)
    with open(controversial_path, 'r', encoding='utf-8') as f:
        controversial_set = set(line.strip() for line in f if line.strip())

    def get_article_reliability_counts(json_file, reliability_df):
        """Load JSON and count citations by reliability status per article."""
        # Create lookup dict for reliability status by normalized domain
        reliability_lookup = {}
        for _, row in reliability_df.iterrows():
            domain = row['domain']
            if domain:
                reliability_lookup[domain] = row.get('status', None)
        
        article_stats = {}
        
        with open(json_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        for article_dict in data:
            for article_title, domains in article_dict.items():
                if article_title.lower().replace('_', ' ') not in controversial_set:
                    continue
                if isinstance(domains, dict):
                    rel_count = 0
                    unrel_count = 0
                    blacklist_count = 0
                    no_consensus_count = 0
                    deprecated_count = 0
                    other_count = 0
                    total_count = 0
                    
                    for domain, count in domains.items():
                        total_count += count
                        if domain and domain in reliability_lookup:
                            status = reliability_lookup[domain]
                            if status == 'Generally reliable':
                                rel_count += count
                            elif status == 'Generally unreliable':
                                unrel_count += count
                            elif status == 'Deprecated':
                                deprecated_count += count
                            elif status == 'No consensus':
                                no_consensus_count += count
                            elif status == 'Blacklisted':
                                blacklist_count += count
                            else:
                                other_count += count
                        else:
                            other_count += count
                    
                    article_stats[article_title] = (
                        article_title, rel_count, unrel_count, blacklist_count, no_consensus_count, deprecated_count, other_count, total_count)
        return article_stats
    
    # Get per-article reliability counts, but only for controversial articles
    wp_dict = get_article_reliability_counts(f'{result_dir}/wp{fsuffix}', reliability_df)
    grok_dict = get_article_reliability_counts(f'{result_dir}/grok{fsuffix}', reliability_df)
    
    print(f"Loaded {len(wp_dict)} controversial articles from Wikipedia")
    print(f"Loaded {len(grok_dict)} controversial articles from Grokipedia\n")
    
    # All controversial articles found in either dataset
    all_articles = set(wp_dict.keys()) | set(grok_dict.keys())

    # Calculate the maximal function for each controversial article
    article_maximals = []
    for article in all_articles:
        wp_data = wp_dict.get(article, (article, 0, 0, 0, 0, 0, 0, 0))
        grok_data = grok_dict.get(article, (article, 0, 0, 0, 0, 0, 0, 0))
        
        _, wp_rel, wp_unrel, wp_blacklist, wp_no_consensus, wp_deprecated, wp_other, wp_total = wp_data
        _, grok_rel, grok_unrel, grok_blacklist, grok_no_consensus, grok_deprecated, grok_other, grok_total = grok_data
        
        # Calculate: (WP_reliable - WP_unreliable) - (grok_unreliable - grok_reliable)
        maximal_value = (wp_rel - wp_unrel) - (grok_unrel - grok_rel)
        
        article_maximals.append((
            article,
            wp_rel, wp_unrel, wp_blacklist, wp_no_consensus, wp_deprecated, wp_other, wp_total,
            grok_rel, grok_unrel, grok_blacklist, grok_no_consensus, grok_deprecated, grok_other, grok_total,
            maximal_value
        ))
    
    return article_maximals

columns = [
        'title', 'wp_reliable', 'wp_unreliable', 'wp_blacklist', 'wp_no_consensus', 'wp_deprecated', 'wp_other',
        'wp_total', 'grok_reliable', 'grok_unreliable', 'grok_blacklist', 'grok_no_consensus', 'grok_deprecated',
        'grok_other', 'grok_total', 'maximal'
]

In [None]:
article_maximals = find_reliability_shift_maximal_controversial(reliability_df)
max_reliability_shift_df = pd.DataFrame(article_maximals, columns=columns)

In [None]:
def plot_reliability_charts(filtered_df, fsuffix='', show=True, title=None):
    """
    Plots stacked bar + diagonal comparison charts using the given DataFrame:
    1. Stacked bar and overlay: Proportion of sources in each reliability category for Wikipedia and Grokipedia, with diagonal fills illustrating change.
    2. Bar chart: Percentage of articles containing at least one source of each type (not 'other') for Wikipedia and Grokipedia.

    Parameters:
        filtered_df (pd.DataFrame): DataFrame, typically filtered on articles of interest.
        show (bool): If True, calls plt.show() at end.
    """

    # --- Setup categories, labels, colors ---
    column_order = [
        'reliable', 'unreliable', 'blacklist', 'no_consensus', 'deprecated', 'other'
    ]
    display_names = {
        'reliable': 'Generally reliable',
        'unreliable': 'Generally unreliable',
        'blacklist': 'Blacklisted',
        'no_consensus': 'No consensus',
        'deprecated': 'Deprecated',
        'other': 'Other'
    }
    color_map = {
        'reliable': 'green',
        'unreliable': 'red',
        'blacklist': 'black',
        'no_consensus': 'yellow',
        'deprecated': 'orange',
        'other': 'grey'
    }

    # --- Aggregate counts as "wp" and "grok" "status" table ---
    agg = {
        'wp_reliable': filtered_df['wp_reliable'].sum(),
        'wp_unreliable': filtered_df['wp_unreliable'].sum(),
        'wp_blacklist': filtered_df['wp_blacklist'].sum(),
        'wp_no_consensus': filtered_df['wp_no_consensus'].sum(),
        'wp_deprecated': filtered_df['wp_deprecated'].sum(),
        'wp_other': filtered_df['wp_other'].sum(),
        'grok_reliable': filtered_df['grok_reliable'].sum(),
        'grok_unreliable': filtered_df['grok_unreliable'].sum(),
        'grok_blacklist': filtered_df['grok_blacklist'].sum(),
        'grok_no_consensus': filtered_df['grok_no_consensus'].sum(),
        'grok_deprecated': filtered_df['grok_deprecated'].sum(),
        'grok_other': filtered_df['grok_other'].sum(),
    }

    # Make DF of shape: index=['Wikipedia', 'Grokipedia'], columns=column_order
    wp_row = [agg[f'wp_{k}'] for k in column_order]
    grok_row = [agg[f'grok_{k}'] for k in column_order]
    prop_df = pd.DataFrame(
        [wp_row, grok_row],
        columns=column_order,
        index=['Wikipedia', 'Grokipedia']
    )
    prop_df_norm = prop_df.div(prop_df.sum(axis=1), axis=0).fillna(0)

    # --- Plotting stacked bars with diagonal overlays, make first plot narrower and better aligned ---
    labels = ['Wikipedia', 'Grokipedia']
    x = np.arange(len(labels))
    bar_sep = 0.09  # reduce gap between bars
    width = 0.18    # make bars narrower

    fig, axs = plt.subplots(1, 2, figsize=(16, 7), gridspec_kw={'width_ratios': [1, 1.7]})
    
    # Set figure title (for whole figure)
    plot_title = title if title is not None else "Source Reliability Category Proportion: Wikipedia vs Grokipedia"
    fig.suptitle(plot_title, fontsize=18, y=0.98)
    ax = axs[0]

    # Set up stacking
    bottoms = [0, 0]
    bars_wp = []
    bars_grok = []

    # For synchronized stacking, process in column order:
    for j, col in enumerate(column_order):
        color = color_map.get(col, 'grey')
        # WP bar proportions
        wp_prop = prop_df_norm.loc['Wikipedia', col]
        grok_prop = prop_df_norm.loc['Grokipedia', col]
        bar_wp = ax.bar(x[0] - width/2 - bar_sep/2, wp_prop, width=width,
                        bottom=bottoms[0], color=color, edgecolor='none', zorder=2, alpha=0.8)
        bar_grok = ax.bar(x[1] + width/2 + bar_sep/2, grok_prop, width=width,
                          bottom=bottoms[1], color=color, edgecolor='none', zorder=2, alpha=0.8)

        # Diagonal change fill
        wp_top = bottoms[0] + wp_prop
        grok_top = bottoms[1] + grok_prop
        ax.fill_between(
            [x[0] - width/2, x[1] + width/2],
            [wp_top, grok_top],
            [bottoms[0], bottoms[1]],
            color=color, alpha=0.25, zorder=1, linewidth=0
        )
        bars_wp.append(bar_wp)
        bars_grok.append(bar_grok)
        bottoms[0] += wp_prop
        bottoms[1] += grok_prop

    # Set axis ticks and labels
    ax.set_xticks(x)
    ax.set_xticklabels(labels, fontsize=16)
    ax.set_ylabel("Proportion of Citations", fontsize=16)
    ax.set_title("Source Status Proportion: Wikipedia vs Grokipedia", fontsize=16)

    # Make axis tight with bars, remove excess white space
    # Bars are at x[0]=0 and x[1]=1, with width=0.18 and bar_sep=0.09
    # Left bar spans: -0.18 to 0, right bar spans: 1 to 1.18
    # Add small padding: 0.05 on each side
    ax.set_xlim(-0.23, 1.23)
    ax.set_ylim(bottom=0, top=1.01)
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)

    # Custom legend patch (color by status) - move inside plot to reduce whitespace
    legend_elements = [Patch(facecolor=color_map.get(col, 'grey'), label=display_names.get(col, col), alpha=0.8) for col in column_order]
    ax.legend(handles=legend_elements, title='Source Status', loc='upper center', framealpha=0.9)

    # Tighten subplot spacing to reduce whitespace
    fig.subplots_adjust(wspace=0.15, left=0.05, right=0.97, top=0.92, bottom=0.1)

    # ---- New plot: % of articles containing at least 1 in each source type (not 'other') ----

    ax2 = axs[1]
    main_types = ['reliable', 'unreliable', 'blacklist', 'no_consensus', 'deprecated']
    type_labels = [display_names[t] for t in main_types]
    bar_x = np.arange(len(main_types))
    bar_width = 0.36

    n_articles = len(filtered_df)
    percentages = {'Wikipedia': [], 'Grokipedia': []}
    for source_type in main_types:
        wp_col = f'wp_{source_type}'
        grok_col = f'grok_{source_type}'
        wp_count = (filtered_df[wp_col] > 0).sum()
        grok_count = (filtered_df[grok_col] > 0).sum()
        percentages['Wikipedia'].append(wp_count / n_articles * 100)
        percentages['Grokipedia'].append(grok_count / n_articles * 100)

    # Plot as side-by-side bars
    ax2.bar(bar_x - bar_width/2, percentages['Wikipedia'], bar_width,
           label='Wikipedia', color="#4977bc", edgecolor='black', alpha=0.7)
    ax2.bar(bar_x + bar_width/2, percentages['Grokipedia'], bar_width,
           label='Grokipedia', color="#e86b54", edgecolor='black', alpha=0.7)

    for i, (wp, gk) in enumerate(zip(percentages['Wikipedia'], percentages['Grokipedia'])):
        ax2.text(i - bar_width/2, wp + 1, f"{wp:.1f}%", ha='center', va='bottom', fontsize=10, color="#10426b")
        ax2.text(i + bar_width/2, gk + 1, f"{gk:.1f}%", ha='center', va='bottom', fontsize=10, color="#7a230c")

    ax2.set_xticks(bar_x)
    ax2.set_xticklabels(type_labels, rotation=14, fontsize=16)
    ax2.set_ylim(0, 105)
    ax2.set_ylabel("Percent of Articles", fontsize=16)
    ax2.set_title("% of Articles Citing Any Source in Category", fontsize=16)
    ax2.legend(loc='upper right', fontsize=16)
    ax2.grid(axis='y', linestyle=':', alpha=0.4)

    fig.tight_layout()
    fig.savefig(f'../graphics/overall_grok_wp_cite_composition_{fsuffix}.pdf')

    if show:
        plt.show()

In [None]:
plot_reliability_charts(max_reliability_shift_df, fsuffix='controversial')

In [None]:
def find_reliability_shift_by_buckets_controversial(
    lin_reliability, 
    controversial_path='../results/controversial_pages_in_grokipedia.txt',
    result_dir='../results', 
    fsuffix='_domains.json'
):
    """
    Find reliability shifts in controversial articles, grouped by reliability score buckets (0.2-sized).
    Uses lin_reliability DataFrame with reliability_score or pc1, but filters to controversial articles only.

    Args:
        lin_reliability: DataFrame with 'domain' and 'reliability_score' (or 'pc1') columns
        controversial_path: Path to file with list of controversial articles (already normalized, one per line)
        result_dir: Directory containing wp_domains.json and grok_domains.json

    Returns:
        List of tuples with article shift data for controversial articles grouped by reliability score buckets
    """
    import pandas as pd
    import json

    # Load controversial articles set (assume already normalized, as written before)
    with open(controversial_path, 'r', encoding='utf-8') as f:
        controversial_set = set(line.strip() for line in f if line.strip())

    # Define bucket edges and labels (same as general function)
    bucket_size = 0.2
    buckets = [(i * bucket_size, (i + 1) * bucket_size) for i in range(5)]
    bucket_labels = [f"{i * bucket_size:.1f}-{(i + 1) * bucket_size:.1f}" for i in range(5)]

    def get_bucket(score):
        if pd.isna(score):
            return None
        for i, (low, high) in enumerate(buckets):
            if low <= score < high:
                return i
        # Handle edge case: score == 1.0
        if score == 1.0:
            return 4
        return None

    def get_article_reliability_bucket_counts(json_file, lin_reliability):
        """Load JSON and count citations by reliability score bucket per controversial article."""
        # Create lookup dict for reliability score by domain
        score_col = 'reliability_score' if 'reliability_score' in lin_reliability.columns else 'pc1'
        reliability_lookup = dict()
        for _, row in lin_reliability.iterrows():
            domain = row['domain']
            if domain and pd.notna(row.get(score_col)):
                reliability_lookup[domain] = row[score_col]
        
        article_stats = []
        with open(json_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        for article_dict in data:
            for article_title, domains in article_dict.items():
                # Only process if article is in the controversial set
                if article_title.lower().replace('_', ' ') not in controversial_set:
                    continue
                if isinstance(domains, dict):
                    bucket_counts = {i: 0 for i in range(5)}
                    other_count = 0
                    total_count = 0
                    for domain, count in domains.items():
                        total_count += count
                        if domain and domain in reliability_lookup:
                            score = reliability_lookup[domain]
                            bucket_idx = get_bucket(score)
                            if bucket_idx is not None:
                                bucket_counts[bucket_idx] += count
                            else:
                                other_count += count
                        else:
                            other_count += count
                    article_stats.append((
                        article_title,
                        bucket_counts[0], bucket_counts[1], bucket_counts[2],
                        bucket_counts[3], bucket_counts[4], other_count, total_count
                    ))
        return article_stats

    # Get per-controversial-article reliability bucket counts for each source
    wp_articles = get_article_reliability_bucket_counts(f'{result_dir}/wp{fsuffix}', lin_reliability)
    grok_articles = get_article_reliability_bucket_counts(f'{result_dir}/grok{fsuffix}', lin_reliability)
    print(f"Loaded {len(wp_articles)} controversial articles from Wikipedia")
    print(f"Loaded {len(grok_articles)} controversial articles from Grokipedia\n")

    # Lookup by article
    wp_dict = {art[0]: art for art in wp_articles}
    grok_dict = {art[0]: art for art in grok_articles}
    all_articles = set(wp_dict.keys()) | set(grok_dict.keys())

    # Calculate reliability bucket shifts for controversial subset
    article_bucket_shifts = []
    for article in all_articles:
        wp_data = wp_dict.get(article, (article, 0, 0, 0, 0, 0, 0, 0))
        grok_data = grok_dict.get(article, (article, 0, 0, 0, 0, 0, 0, 0))

        _, wp_b0, wp_b1, wp_b2, wp_b3, wp_b4, wp_other, wp_total = wp_data
        _, grok_b0, grok_b1, grok_b2, grok_b3, grok_b4, grok_other, grok_total = grok_data

        # Calculate shift for each bucket: WP_count - Grok_count (positive=WP has more, negative=Grok has more)
        shifts = [
            wp_b0 - grok_b0,
            wp_b1 - grok_b1,
            wp_b2 - grok_b2,
            wp_b3 - grok_b3,
            wp_b4 - grok_b4
        ]
        # Weighted sum: higher buckets weighted more
        weighted_shift = sum(shifts[i] * (i + 1) for i in range(5))

        article_bucket_shifts.append((
            article,
            wp_b0, wp_b1, wp_b2, wp_b3, wp_b4, wp_other, wp_total,
            grok_b0, grok_b1, grok_b2, grok_b3, grok_b4, grok_other, grok_total,
            shifts[0], shifts[1], shifts[2], shifts[3], shifts[4],
            weighted_shift
        ))

    return article_bucket_shifts


bucket_columns_controversial = [
    'title',
    'wp_bucket_0_0.2', 'wp_bucket_0.2_0.4', 'wp_bucket_0.4_0.6', 'wp_bucket_0.6_0.8', 'wp_bucket_0.8_1.0', 'wp_other', 'wp_total',
    'grok_bucket_0_0.2', 'grok_bucket_0.2_0.4', 'grok_bucket_0.4_0.6', 'grok_bucket_0.6_0.8', 'grok_bucket_0.8_1.0', 'grok_other', 'grok_total',
    'shift_bucket_0_0.2', 'shift_bucket_0.2_0.4', 'shift_bucket_0.4_0.6', 'shift_bucket_0.6_0.8', 'shift_bucket_0.8_1.0',
    'weighted_shift'
]


In [None]:
lin_reliability = pd.read_csv('../supplemental_data/news_reliability/LinRating_Join.csv')

In [None]:
article_maximals = find_reliability_shift_by_buckets_controversial(lin_reliability)
max_reliability_shift_df = pd.DataFrame(article_maximals, columns=bucket_columns_controversial)

In [None]:
def plot_reliability_bucket_charts(filtered_df, fsuffix='', show=True, title=None):
    """
    Plots stacked bar + diagonal comparison charts using reliability score buckets:
    1. Stacked bar and overlay: Proportion of sources in each reliability bucket for Wikipedia and Grokipedia, with diagonal fills illustrating change.
    2. Bar chart: Percentage of articles containing at least one source in each bucket for Wikipedia and Grokipedia.

    Parameters:
        filtered_df (pd.DataFrame): DataFrame with bucket columns (from find_reliability_shift_by_buckets)
        fsuffix (str): Suffix for output filename
        show (bool): If True, calls plt.show() at end.
        title (str): Optional custom title for the whole figure. If None, uses default title.
    """

    import matplotlib.pyplot as plt
    import numpy as np
    from matplotlib.patches import Patch
    import matplotlib.colors as mcolors

    # --- Setup buckets, labels, colors ---
    bucket_labels = ['0.0-0.2', '0.2-0.4', '0.4-0.6', '0.6-0.8', '0.8-1.0', 'other']
    display_names = {
        '0.0-0.2': '0.0-0.2',
        '0.2-0.4': '0.2-0.4',
        '0.4-0.6': '0.4-0.6',
        '0.6-0.8': '0.6-0.8',
        '0.8-1.0': '0.8-1.0',
        'other': 'No score'
    }
    
    # Create color map: green (for 1.0), yellow (for 0.5), red (for 0.0), gray for 'other'
    from matplotlib.colors import to_hex, LinearSegmentedColormap
    
    # Create a green-yellow-red colormap, where 1.0 is green, 0.5 is yellow, 0.0 is red
    spect_cmap = LinearSegmentedColormap.from_list(
        "green_yellow_red", [(0.0, "#D73027"), (0.5, "#FEE08B"), (1.0, "#1A9850")]  # red, yellow, green
    )
    
    # Map bucket labels to their midpoint values for colormap
    bucket_midpoints = {
        '0.0-0.2': 0.1,  # Red end
        '0.2-0.4': 0.3,  # Red-yellow transition
        '0.4-0.6': 0.5,  # Yellow (middle)
        '0.6-0.8': 0.7,  # Yellow-green transition
        '0.8-1.0': 0.9,  # Green end
    }
    
    # Generate colors for each bucket using the colormap
    color_map = {}
    for bucket_label in bucket_labels:
        if bucket_label == 'other':
            color_map[bucket_label] = 'grey'
        else:
            midpoint = bucket_midpoints[bucket_label]
            color_map[bucket_label] = to_hex(spect_cmap(midpoint))

    # --- Aggregate counts as "wp" and "grok" bucket table ---
    # Map bucket labels to actual column name suffixes
    bucket_to_col = {
        '0.0-0.2': '0_0.2',
        '0.2-0.4': '0.2_0.4',
        '0.4-0.6': '0.4_0.6',
        '0.6-0.8': '0.6_0.8',
        '0.8-1.0': '0.8_1.0'
    }
    
    agg = {}
    for bucket_label in bucket_labels:
        if bucket_label == 'other':
            agg[f'wp_{bucket_label}'] = filtered_df['wp_other'].sum()
            agg[f'grok_{bucket_label}'] = filtered_df['grok_other'].sum()
        else:
            # Use the correct column name format
            col_suffix = bucket_to_col[bucket_label]
            agg[f'wp_{bucket_label}'] = filtered_df[f'wp_bucket_{col_suffix}'].sum()
            agg[f'grok_{bucket_label}'] = filtered_df[f'grok_bucket_{col_suffix}'].sum()

    # Make DF of shape: index=['Wikipedia', 'Grokipedia'], columns=bucket_labels
    wp_row = [agg[f'wp_{k}'] for k in bucket_labels]
    grok_row = [agg[f'grok_{k}'] for k in bucket_labels]
    prop_df = pd.DataFrame(
        [wp_row, grok_row],
        columns=bucket_labels,
        index=['Wikipedia', 'Grokipedia']
    )
    prop_df_norm = prop_df.div(prop_df.sum(axis=1), axis=0).fillna(0)

    # --- Plotting stacked bars with diagonal overlays ---
    labels = ['Wikipedia', 'Grokipedia']
    x = np.arange(len(labels))
    bar_sep = 0.09
    width = 0.18

    fig, axs = plt.subplots(1, 2, figsize=(16, 7), gridspec_kw={'width_ratios': [1, 1.7]})
    ax = axs[0]

    # Set up stacking
    bottoms = [0, 0]
    bars_wp = []
    bars_grok = []

    # For synchronized stacking, process in bucket order:
    for j, bucket_label in enumerate(bucket_labels):
        color = color_map.get(bucket_label, 'grey')
        # WP bar proportions
        wp_prop = prop_df_norm.loc['Wikipedia', bucket_label]
        grok_prop = prop_df_norm.loc['Grokipedia', bucket_label]
        bar_wp = ax.bar(x[0] - width/2 - bar_sep/2, wp_prop, width=width,
                        bottom=bottoms[0], color=color, edgecolor='none', zorder=2)
        bar_grok = ax.bar(x[1] + width/2 + bar_sep/2, grok_prop, width=width,
                          bottom=bottoms[1], color=color, edgecolor='none', zorder=2)

        # Diagonal change fill
        wp_top = bottoms[0] + wp_prop
        grok_top = bottoms[1] + grok_prop
        ax.fill_between(
            [x[0] - width/2, x[1] + width/2],
            [wp_top, grok_top],
            [bottoms[0], bottoms[1]],
            color=color, alpha=0.25, zorder=1, linewidth=0
        )
        bars_wp.append(bar_wp)
        bars_grok.append(bar_grok)
        bottoms[0] += wp_prop
        bottoms[1] += grok_prop

    # Set axis ticks and labels
    ax.set_xticks(x)
    ax.set_xticklabels(labels, fontsize=16)
    ax.set_ylabel("Proportion of Citations", fontsize=16)
    ax.set_title("Source Reliability Score Proportion: Wikipedia vs Grokipedia", fontsize=16)

    # Make axis tight with bars
    ax.set_xlim(-0.23, 1.23)
    ax.set_ylim(bottom=0, top=1.01)
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)

    # Custom legend patch (color by bucket) - reversed order - move inside plot to reduce whitespace
    legend_elements = [Patch(facecolor=color_map.get(bucket_label, 'grey'), label=display_names.get(bucket_label, bucket_label), alpha=0.55) for bucket_label in reversed(bucket_labels)]
    ax.legend(handles=legend_elements, title='Reliability Score', loc='upper center', framealpha=0.9)

    # Set figure title (for whole figure)
    plot_title = title if title is not None else "Source Reliability Score Proportion: Wikipedia vs Grokipedia"
    fig.suptitle(plot_title, fontsize=18, y=0.98)
    
    # Tighten subplot spacing to reduce whitespace (leave room for suptitle)
    fig.subplots_adjust(wspace=0.15, left=0.05, right=0.97, top=0.88, bottom=0.1)

    # ---- New plot: % of articles containing at least 1 in each bucket ----
    ax2 = axs[1]
    main_buckets = ['0.0-0.2', '0.2-0.4', '0.4-0.6', '0.6-0.8', '0.8-1.0']
    bucket_display_labels = [display_names[b] for b in main_buckets]
    bar_x = np.arange(len(main_buckets))
    bar_width = 0.36

    n_articles = len(filtered_df)
    percentages = {'Wikipedia': [], 'Grokipedia': []}
    for bucket_label in main_buckets:
        # Use the correct column name format
        col_suffix = bucket_to_col[bucket_label]
        wp_col = f'wp_bucket_{col_suffix}'
        grok_col = f'grok_bucket_{col_suffix}'
        wp_count = (filtered_df[wp_col] > 0).sum()
        grok_count = (filtered_df[grok_col] > 0).sum()
        percentages['Wikipedia'].append(wp_count / n_articles * 100)
        percentages['Grokipedia'].append(grok_count / n_articles * 100)

    # Plot as side-by-side bars with blue/red scheme (same as reliability chart)
    wp_color = "#4977bc"  # Blue for Wikipedia
    grok_color = "#e86b54"  # Red for Grokipedia
    ax2.bar(bar_x - bar_width/2, percentages['Wikipedia'], bar_width,
           label='Wikipedia', color=wp_color, edgecolor='black', alpha=0.7)
    ax2.bar(bar_x + bar_width/2, percentages['Grokipedia'], bar_width,
           label='Grokipedia', color=grok_color, edgecolor='black', alpha=0.7)

    for i, (wp, gk) in enumerate(zip(percentages['Wikipedia'], percentages['Grokipedia'])):
        ax2.text(i - bar_width/2, wp + 1, f"{wp:.1f}%", ha='center', va='bottom', fontsize=10, color="#10426b")
        ax2.text(i + bar_width/2, gk + 1, f"{gk:.1f}%", ha='center', va='bottom', fontsize=10, color="#7a230c")

    ax2.set_xticks(bar_x)
    ax2.set_xticklabels(bucket_display_labels, rotation=14, fontsize=16)
    ax2.set_ylim(0, 105)
    ax2.set_ylabel("Percent of Articles", fontsize=16)
    ax2.set_title("% of Articles Citing Any Source in Reliability Bucket", fontsize=16)
    ax2.legend(loc='upper left', fontsize=16)
    ax2.grid(axis='y', linestyle=':', alpha=0.4)

    fig.tight_layout()
    fig.savefig(f'../graphics/overall_grok_wp_cite_composition_lin_{fsuffix}.pdf')

    if show:
        plt.show()


In [None]:
plot_reliability_bucket_charts(max_reliability_shift_df, fsuffix='controversial')

In [None]:
# Compare embedding similarities between Wikipedia and Grokipedia for controversial vs. non-controversial articles

# Read controversial article titles from file
controversial_titles = set()
with open("../results/controversial_pages_in_grokipedia.txt") as f:
    for line in f:
        title = line.strip()
        if title:
            controversial_titles.add(title.lower())

def normalize_title(s):
    return s.lower().replace("_", " ").strip()

# Normalize the article titles
similarities['normalized_article'] = similarities['title'].apply(normalize_title)

# Split into controversial/non-controversial sets
cont_sim_df = similarities[similarities['normalized_article'].isin(controversial_titles)].copy()
noncont_sim_df = similarities[~similarities['normalized_article'].isin(controversial_titles)].copy()

print(f"Number of controversial articles in similarity df: {len(cont_sim_df)}")
print(f"Number of non-controversial articles in similarity df: {len(noncont_sim_df)}")

cont_mean = cont_sim_df["similarity"].mean()
noncont_mean = noncont_sim_df["similarity"].mean()

# Plot histogram overlays, y axis as percentage (not density)
plt.figure(figsize=(10, 6))
bins = 100

# Get bin counts (not density), then scale to percent
cont_counts, bin_edges = np.histogram(cont_sim_df["similarity"], bins=bins, range=(0,1))
noncont_counts, _ = np.histogram(noncont_sim_df["similarity"], bins=bin_edges)

cont_percents = cont_counts / cont_sim_df.shape[0] * 100 if cont_sim_df.shape[0] > 0 else np.zeros_like(cont_counts)
noncont_percents = noncont_counts / noncont_sim_df.shape[0] * 100 if noncont_sim_df.shape[0] > 0 else np.zeros_like(noncont_counts)
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

plt.bar(bin_centers, cont_percents, width=(bin_edges[1] - bin_edges[0]), 
        color="tab:orange", alpha=0.75, label="Controversial Articles", edgecolor=None)
plt.bar(bin_centers, noncont_percents, width=(bin_edges[1] - bin_edges[0]), 
        color="tab:blue", alpha=0.55, label="Non-controversial Articles", edgecolor=None)

plt.axvline(x=cont_mean, color="black", linestyle=":", linewidth=2, label=f"Controversial Mean={cont_mean:.2f}")
plt.axvline(x=noncont_mean, color="black", linestyle="--", linewidth=2, label=f"Non-controversial Mean={noncont_mean:.2f}")

plt.xlabel("Similarity", fontsize=16)
plt.ylabel("Percent of Articles", fontsize=16)
plt.title("Embedding Similarity for Controversial vs. Non-controversial Articles", fontsize=18)
plt.legend(fontsize=16)
plt.tight_layout()
plt.savefig("../graphics/similarity_distribution_controversial_vs_noncontroversial_percent.pdf")
plt.show()


In [None]:
controversial_pages = set()

with open('../results/controversial_pages_in_grokipedia.txt', 'r') as f:
    for line in f:
        title = line.strip()
        if title:
            controversial_pages.add(title.lower())

licensed_pages = set()

with open('../results/grokipedia_w_license.txt', 'r') as f:
    for line in f:
        title = line.strip()
        if title:
            licensed_pages.add(title.lower())

non_licensed_pages = set()

with open('../results/grokipedia_wo_license.txt', 'r') as f:
    for line in f:
        title = line.strip()
        if title:
            non_licensed_pages.add(title.lower())

In [None]:
len(controversial_pages.intersection(licensed_pages))

In [None]:
len(controversial_pages.intersection(non_licensed_pages))

In [None]:
len(controversial_pages)