In [None]:
import pandas as pd
import json
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
import numpy as np
import math


In [None]:
us = pd.read_csv('../supplemental_data/wikidata_queries/us_members_of_congress.csv')
uk = pd.read_csv('../supplemental_data/wikidata_queries/uk_members_of_parliament.csv')
jp = pd.read_csv('../supplemental_data/wikidata_queries/jp_members_of_diet.csv')
de = pd.read_csv('../supplemental_data/wikidata_queries/de_members_of_bundestag.csv')

us['title'] = us['personLabel'].str.replace(' ', '_')
uk['title'] = uk['personLabel'].str.replace(' ', '_')
jp['title'] = jp['personLabel'].str.replace(' ', '_')
de['title'] = de['personLabel'].str.replace(' ', '_')

In [None]:
reliability_cite_diff = pd.read_csv('../results/reliability_citation_diff.csv')
similarity = pd.read_parquet('../results/embeddings_similarities_pairwise_top1_alignments.parquet')

In [None]:
us = pd.merge(us, reliability_cite_diff, on="title", how="left").dropna()
uk = pd.merge(uk, reliability_cite_diff, on="title", how="left").dropna()
jp = pd.merge(jp, reliability_cite_diff, on="title", how="left").dropna()
de = pd.merge(de, reliability_cite_diff, on="title", how="left").dropna()

us_similarity = pd.merge(us, similarity, on="title", how="left")
uk_similarity = pd.merge(uk, similarity, on="title", how="left")
jp_similarity = pd.merge(jp, similarity, on="title", how="left")
de_similarity = pd.merge(de, similarity, on="title", how="left")

In [None]:
# Combine US and UK party similarity histograms into one subplot with outlined (not filled) histograms

# --- Categorize US parties
def party_category(x):
    if x == "Democratic Party":
        return "Democratic Party"
    elif x == "Republican Party":
        return "Republican Party"
    else:
        return "Other"

us_similarity['party_category'] = us_similarity['parliamentaryGroupLabel'].apply(party_category)
us_dem = us_similarity[us_similarity['party_category'] == 'Democratic Party']['similarity'].dropna()
us_rep = us_similarity[us_similarity['party_category'] == 'Republican Party']['similarity'].dropna()
us_other = us_similarity[us_similarity['party_category'] == 'Other']['similarity'].dropna()
us_hist_data = [us_dem, us_rep, us_other]
us_labels = ["Democratic Party", "Republican Party", "Other"]
us_colors = ["tab:blue", "tab:red", "tab:gray"]

# --- Categorize UK parties
def canonical_party(x):
    xl = x.lower()
    if "labour" in xl:
        return "Labour Party"
    if "conservative" in xl and "unionist" not in xl:
        return "Conservative Party"
    if "reform uk" in xl:
        return "Reform UK"
    return "Other"

party_colors = {
    "Labour Party": "tab:red",
    "Conservative Party": "tab:blue",
    "Reform UK": "tab:gray",
    "Other": "tab:green"
}
uk_similarity['party_hist_cat'] = uk_similarity['parliamentaryGroupLabel'].map(canonical_party)
uk_labour = uk_similarity[uk_similarity['party_hist_cat'] == 'Labour Party']['similarity'].dropna()
uk_cons = uk_similarity[uk_similarity['party_hist_cat'] == 'Conservative Party']['similarity'].dropna()
uk_reform = uk_similarity[uk_similarity['party_hist_cat'] == 'Reform UK']['similarity'].dropna()
uk_other = uk_similarity[uk_similarity['party_hist_cat'] == "Other"]['similarity'].dropna()
uk_hist_data = [uk_labour, uk_cons, uk_reform, uk_other]
uk_labels = ["Labour Party", "Conservative Party", "Reform UK", "Other"]
uk_colors = [
    party_colors["Labour Party"],
    party_colors["Conservative Party"],
    party_colors["Reform UK"],
    party_colors["Other"]
]

# --- Plot subplots
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 6), sharey=True)

# US histogram
axes[0].hist(
    us_hist_data,
    bins=50,
    color=us_colors,
    label=us_labels,
    alpha=1,
    histtype="step",  # outlined, not filled
    linewidth=1.7,
)
axes[0].set_xlabel("Per Chunk Similarity", fontsize=16)
axes[0].set_ylabel("Count", fontsize=16)
axes[0].set_title("US Congress: Similarity by Party", fontsize=18)  # Changed to fontsize=18
axes[0].legend(fontsize=16)

# Plot average similarities as vertical lines for Democrats and Republicans (not "Other")
us_party_means = {
    "Democratic Party": us_dem.mean(),
    "Republican Party": us_rep.mean()
}
us_vline_colors = {"Democratic Party": "tab:blue", "Republican Party": "tab:red"}
for pname, vcolor in us_vline_colors.items():
    axes[0].axvline(us_party_means[pname], color=vcolor, linestyle="--", linewidth=2, label=f"{pname} mean")
# Adjust legend to show only one entry per party
handles0, labels0 = axes[0].get_legend_handles_labels()
unique0 = dict(zip(labels0, handles0))
axes[0].legend(unique0.values(), unique0.keys(), fontsize=16)

# UK histogram
axes[1].hist(
    uk_hist_data,
    bins=50,
    color=uk_colors,
    label=uk_labels,
    alpha=1,
    histtype="step",  # outlined, not filled
    linewidth=1.7,
)
axes[1].set_xlabel("Per Chunk Similarity", fontsize=16)
axes[1].set_title("UK Parliament: Similarity by Parliamentary Group", fontsize=18)  # Changed to fontsize=18
axes[1].legend(fontsize=16)

# Plot average similarities for Labour, Conservative, Reform (not Other)
uk_party_means = {
    "Labour Party": uk_labour.mean(),
    "Conservative Party": uk_cons.mean(),
    "Reform UK": uk_reform.mean()
}
uk_vline_colors = {
    "Labour Party": party_colors["Labour Party"],
    "Conservative Party": party_colors["Conservative Party"],
    "Reform UK": party_colors["Reform UK"]
}
for pname, vcolor in uk_vline_colors.items():
    axes[1].axvline(uk_party_means[pname], color=vcolor, linestyle="--", linewidth=2, label=f"{pname} mean")
# Adjust UK legend as well
handles1, labels1 = axes[1].get_legend_handles_labels()
unique1 = dict(zip(labels1, handles1))
axes[1].legend(unique1.values(), unique1.keys(), fontsize=16)

plt.tight_layout()
plt.savefig('../graphics/mps/us_uk_party_similarity_subplot.pdf')
plt.show()

In [None]:
with open('../results/grok_domains.json') as f:
    grok_domains = json.load(f)

with open('../results/wp_domains.json') as f:
    wp_domains = json.load(f)

In [None]:
all_mps = set(
    us['title'].tolist() +
    uk['title'].tolist() +
    jp['title'].tolist() +
    de['title'].tolist()
)

grok_domains_mps = []
for p in grok_domains:
    for mp in p.keys():
        if mp in all_mps:
            pdict = p[mp]
            for domain, count in pdict.items():
                grok_domains_mps.append({
                    'title': mp,
                    'domain': domain,
                    'count': count
                })

grok_domains_mps_df = pd.DataFrame(grok_domains_mps)

wp_domains_mps = []
for p in wp_domains:
    for mp in p.keys():
        if mp in all_mps:
            pdict = p[mp]
            for domain, count in pdict.items():
                wp_domains_mps.append({
                    'title': mp,
                    'domain': domain,
                    'count': count
                })

wp_domains_mps_df = pd.DataFrame(wp_domains_mps)

In [None]:
mp_domains_compare_df = pd.merge(grok_domains_mps_df, wp_domains_mps_df, on=['title', 'domain'], suffixes=['_grok', '_wp'], how='outer').fillna(0)

In [None]:
reliability_scores = pd.read_csv('../supplemental_data/news_reliability/LinRating_Join.csv')

In [None]:
# Combine all mp data (us, uk, jp, de) into a single DataFrame with convenient source columns

# Add a new column for country to each DataFrame before concatenation
us_mp = us.copy()
us_mp['country'] = 'us'
uk_mp = uk.copy()
uk_mp['country'] = 'uk'
jp_mp = jp.copy()
jp_mp['country'] = 'jp'
de_mp = de.copy()
de_mp['country'] = 'de'

# Combine all country MP DataFrames
all_mp_df = pd.concat([us_mp, uk_mp, jp_mp, de_mp], ignore_index=True)

# Deduplicate if needed
all_mp_df = all_mp_df.drop_duplicates(subset=['title', 'country'])

# Optionally, if you only want one row per title, you could pivot or groupby, but keeping long form for easy query/filter

# Merge the compare_df with the combined all_mp_df
mp_domains_compare_df = pd.merge(
    mp_domains_compare_df,
    all_mp_df,
    on='title',
    how='left'
)


In [None]:
mp_domains_compare_df = pd.merge(mp_domains_compare_df, reliability_scores, on='domain', how='left')

In [None]:
display_df = mp_domains_compare_df.copy()
reliability_df = pd.read_csv('../supplemental_data/perennial_sources_enwiki/enwiki_perennial_list.csv')
display_df = pd.merge(display_df, reliability_df, left_on='domain', right_on='source', how='left')

mask = display_df['parliamentaryGroupLabel'].isna() & display_df['partyLabel'].notna()
display_df.loc[mask, 'parliamentaryGroupLabel'] = display_df.loc[mask, 'partyLabel']
display_df = display_df[[
    'title', 'domain', 'status', 'count_grok', 'count_wp', 'country', 'parliamentaryGroupLabel'
]]

In [None]:
def plot_mp_reliability_by_party(df, party_order=None, title=None, figsize=(14, 8), show=True, savepath=None):
    """
    Plots stacked bar + diagonal comparison charts for MP/politician data grouped by party:
    Shows proportion of sources in each reliability category for Wikipedia and Grokipedia,
    with diagonal fills illustrating change between parties.
    
    Parameters:
        df (pd.DataFrame): DataFrame with columns: 'parliamentaryGroupLabel', 'status', 'count_grok', 'count_wp'
        party_order (list): Optional list of party names to display in order. Others will be grouped as "Other"
        title (str): Optional custom title for the plot. If None, uses default.
        figsize (tuple): Figure size (width, height)
        show (bool): If True, calls plt.show() at end
        savepath (str): Optional path to save the figure
    """
    import matplotlib.pyplot as plt
    import numpy as np
    from matplotlib.patches import Patch
    
    # Normalize status names to standard format
    status_mapping = {
        'Generally reliable': 'reliable',
        'Generally unreliable': 'unreliable',
        'Blacklisted': 'blacklist',
        'No consensus': 'no_consensus',
        'Deprecated': 'deprecated',
        'other': 'other'
    }
    
    # Prepare dataframe
    df_plot = df.copy()
    df_plot['status'] = df_plot['status'].fillna('other')
    df_plot['status_normalized'] = df_plot['status'].map(status_mapping).fillna(df_plot['status'].str.lower())
    
    # Aggregate sums of counts by party and reliability status
    agg = df_plot.groupby(['parliamentaryGroupLabel', 'status_normalized']).agg({
        'count_grok': 'sum', 
        'count_wp': 'sum'
    }).reset_index()
    
    # Setup categories, labels, colors (matching plot_reliability_charts)
    column_order = ['reliable', 'unreliable', 'blacklist', 'no_consensus', 'deprecated', 'other']
    display_names = {
        'reliable': 'Generally reliable',
        'unreliable': 'Generally unreliable',
        'blacklist': 'Blacklisted',
        'no_consensus': 'No consensus',
        'deprecated': 'Deprecated',
        'other': 'Other'
    }
    color_map = {
        'reliable': 'green',
        'unreliable': 'red',
        'blacklist': 'black',
        'no_consensus': 'yellow',
        'deprecated': 'orange',
        'other': 'grey'
    }
    
    # Pivot for 'grok' and 'wp'
    pivot_grok = agg.pivot(index='parliamentaryGroupLabel', columns='status_normalized', values='count_grok').fillna(0)
    pivot_wp = agg.pivot(index='parliamentaryGroupLabel', columns='status_normalized', values='count_wp').fillna(0)
    
    # Only keep columns present
    columns_to_plot_grok = [col for col in column_order if col in pivot_grok.columns]
    columns_to_plot_wp = [col for col in column_order if col in pivot_wp.columns]
    columns_to_plot = [col for col in column_order if col in columns_to_plot_grok or col in columns_to_plot_wp]
    
    pivot_grok = pivot_grok.reindex(columns=columns_to_plot, fill_value=0)
    pivot_wp = pivot_wp.reindex(columns=columns_to_plot, fill_value=0)
    
    # Handle party ordering
    if party_order:
        party_labels = [lab for lab in party_order if lab in pivot_grok.index]
        others = set(pivot_grok.index) - set(party_labels)
        if others:
            # Sum all others into a new "Other" row
            pivot_grok_other = pivot_grok.loc[list(others)].sum().to_frame().T
            pivot_grok_other.index = ['Other']
            pivot_wp_other = pivot_wp.loc[list(others)].sum().to_frame().T
            pivot_wp_other.index = ['Other']
            # Concatenate with ordered parties
            pivot_grok_plot = pd.concat([pivot_grok.loc[party_labels], pivot_grok_other])
            pivot_wp_plot = pd.concat([pivot_wp.loc[party_labels], pivot_wp_other])
        else:
            pivot_grok_plot = pivot_grok.loc[party_labels]
            pivot_wp_plot = pivot_wp.loc[party_labels]
    else:
        pivot_grok_plot = pivot_grok
        pivot_wp_plot = pivot_wp
    
    # Calculate proportions for each parliamentary group for grok and wp
    pivot_prop_grok = pivot_grok_plot.div(pivot_grok_plot.sum(axis=1), axis=0).fillna(0)
    pivot_prop_wp = pivot_wp_plot.div(pivot_wp_plot.sum(axis=1), axis=0).fillna(0)
    
    # Order labels and X axis
    labels = pivot_prop_grok.index.tolist()
    x = np.arange(len(labels))
    
    bar_sep = 0.25   # separate more for visible diagonal/area
    width = 0.28     # a little narrower bars so there's actual spacing
    
    fig, ax = plt.subplots(figsize=figsize)
    
    for i, party in enumerate(labels):
        left = x[i] - width/2 - bar_sep/2
        right = x[i] + width/2 + bar_sep/2
        
        bottom_wp = 0
        bottom_grok = 0
        
        for col in columns_to_plot:
            # WP bar (left)
            ax.bar(left, pivot_prop_wp.loc[party, col], width=width, bottom=bottom_wp,
                    color=color_map.get(col, 'grey'), edgecolor='none', zorder=2, alpha=0.8)
            # Grok bar (right)
            ax.bar(right, pivot_prop_grok.loc[party, col], width=width, bottom=bottom_grok,
                  color=color_map.get(col, 'grey'), edgecolor='none', zorder=2, alpha=0.8)
            
            # Diagonal change fill
            y0 = bottom_wp + pivot_prop_wp.loc[party, col]
            y1 = bottom_grok + pivot_prop_grok.loc[party, col]
            ax.fill_between([left + width/2, right - width/2],
                            [y0, y1], [bottom_wp, bottom_grok],
                            color=color_map.get(col, 'grey'), alpha=0.14, zorder=1, linewidth=0)
            
            # Stack
            bottom_wp += pivot_prop_wp.loc[party, col]
            bottom_grok += pivot_prop_grok.loc[party, col]
    
    # Set axis ticks and labels
    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=45, ha='right')
    ax.set_ylabel("Proportion of Citations")
    plot_title = title if title is not None else "Source Status by Party"
    ax.set_title(plot_title)
    
    # Make axis tight with bars
    ax.set_ylim(bottom=-0.11, top=1.01)
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    
    # Add explicit "Wikipedia" and "Grokipedia" labels for each party on the bottom
    for i, party in enumerate(labels):
        left = x[i] - width/2 - bar_sep/2
        right = x[i] + width/2 + bar_sep/2
        y_min = 0
        ax.text(left, y_min - 0.04, "WP", ha='center', va='top', fontsize=10, color='black')
        ax.text(right, y_min - 0.04, "Grok", ha='center', va='top', fontsize=10, color='black')
    
    # Custom legend for source status colors only (reversed order like plot_reliability_charts)
    legend_elements = [Patch(facecolor=color_map.get(col, 'grey'), label=display_names.get(col, col)) 
                      for col in reversed(columns_to_plot)]
    ax.legend(handles=legend_elements, title='Source Status', bbox_to_anchor=(1, 1), loc='upper left', framealpha=0.9)
    
    plt.tight_layout()
    
    if savepath:
        plt.savefig(savepath)
    
    if show:
        plt.show()

# Prepare US MP dataframe and plot
us_df = display_df[display_df['country'] == 'us'].copy()
plot_mp_reliability_by_party(
    us_df, 
    party_order=['Democratic Party', 'Republican Party'],
    title="US Congress: Source Category Proportions by Party",
    savepath='../graphics/mps/us_party_reliability.pdf'
)

In [None]:
uk_df = display_df[display_df['country'] == 'uk'].copy()
plot_mp_reliability_by_party(
    uk_df,
    party_order=['Labour Party', 'Conservative Party', 'Reform UK'],
    title="UK Parliament: Source Status by Party",
    savepath='../graphics/mps/uk_party_reliability.pdf'
)

In [None]:
display_df = mp_domains_compare_df.copy()
reliability_df = pd.read_csv('../supplemental_data/perennial_sources_enwiki/enwiki_perennial_list.csv')
display_df = pd.merge(display_df, reliability_df, left_on='domain', right_on='source', how='left')

mask = display_df['parliamentaryGroupLabel'].isna() & display_df['partyLabel'].notna()
display_df.loc[mask, 'parliamentaryGroupLabel'] = display_df.loc[mask, 'partyLabel']
display_df = display_df[[
    'title', 'domain', 'status', 'count_grok', 'count_wp', 'country', 'parliamentaryGroupLabel', 'pc1'
]]

def pc1_bucket(x):
    try:
        if x is not None and not (isinstance(x, float) and math.isnan(x)):
            return str(round(float(x) * 5) / 5)
        else:
            return 'No score'
    except (ValueError, TypeError):
        return 'No score'

display_df['pc1_bucket'] = display_df['pc1'].apply(pc1_bucket)

In [None]:
def plot_mp_reliability_buckets_by_party(df, party_order=None, title=None, figsize=(14, 8), show=True, savepath=None):
    """
    Plots stacked bar + diagonal comparison charts for MP/politician data grouped by party using reliability score buckets:
    Shows proportion of sources in each reliability bucket for Wikipedia and Grokipedia,
    with diagonal fills illustrating change between parties.
    
    Parameters:
        df (pd.DataFrame): DataFrame with columns: 'parliamentaryGroupLabel', 'pc1_bucket', 'count_grok', 'count_wp'
        party_order (list): Optional list of party names to display in order. Others will be grouped as "Other"
        title (str): Optional custom title for the plot. If None, uses default.
        figsize (tuple): Figure size (width, height)
        show (bool): If True, calls plt.show() at end
        savepath (str): Optional path to save the figure
    """
    import matplotlib.pyplot as plt
    import numpy as np
    from matplotlib.patches import Patch
    import math
    
    # Prepare dataframe - create pc1_bucket if it doesn't exist
    df_plot = df.copy()
    if 'pc1_bucket' not in df_plot.columns:
        if 'pc1' in df_plot.columns:
            def pc1_bucket(x):
                try:
                    if x is not None and not (isinstance(x, float) and math.isnan(x)):
                        return str(round(float(x) * 5) / 5)
                    else:
                        return 'No score'
                except (ValueError, TypeError):
                    return 'No score'
            df_plot['pc1_bucket'] = df_plot['pc1'].apply(pc1_bucket)
        else:
            raise ValueError("DataFrame must have either 'pc1_bucket' or 'pc1' column")
    
    # Aggregate sums of counts by party and pc1_bucket (reliability score)
    agg = df_plot.groupby(['parliamentaryGroupLabel', 'pc1_bucket']).agg({
        'count_grok': 'sum', 
        'count_wp': 'sum'
    }).reset_index()
    
    # Get sorted order of buckets for x-axis/legend, keeping 'No score' at the end if present
    bucket_vals = [b for b in agg['pc1_bucket'].unique() if b != 'No score']
    try:
        bucket_vals_sorted = sorted([float(x) for x in bucket_vals])
    except Exception:
        bucket_vals_sorted = []
    columns_to_plot = [str(v) for v in bucket_vals_sorted]
    if 'No score' in agg['pc1_bucket'].unique():
        columns_to_plot.append('No score')
    
    # Map bucket values to display names (range format like '0.0-0.2')
    legend_map = {
        '0.2': '0.0-0.2',
        '0.4': '0.2-0.4',
        '0.6': '0.4-0.6',
        '0.8': '0.6-0.8',
        '1.0': '0.8-1.0',
        'No score': 'No score'
    }
    display_names = {str(b): legend_map.get(str(b), str(b)) for b in columns_to_plot}
    if 'No score' in columns_to_plot:
        display_names['No score'] = 'No score'
    
    # Create color map: green (for 1.0), yellow (for 0.5), red (for 0.0), gray for 'No score'
    import matplotlib
    from matplotlib.colors import to_hex, LinearSegmentedColormap
    
    num_buckets = len([c for c in columns_to_plot if c != 'No score'])
    
    # Create a green-yellow-red colormap, where 1.0 is green, 0.5 is yellow, 0.0 is red
    spect_cmap = LinearSegmentedColormap.from_list(
        "green_yellow_red", [(0.0, "#D73027"), (0.5, "#FEE08B"), (1.0, "#1A9850")]  # red, yellow, green
    )
    # Generate colors corresponding to the bucket edges (sorted lowest to highest)
    bucket_values = [float(x) for x in columns_to_plot if x != 'No score']
    if bucket_values:
        # Normalize bucket_values to 0-1 for colormap mapping
        min_b, max_b = min(bucket_values), max(bucket_values)
        if min_b != max_b:
            norm_bucket_values = [(b - min_b) / (max_b - min_b) for b in bucket_values]
        else:
            # Edge case: all buckets are the same; use mid value
            norm_bucket_values = [0.5 for b in bucket_values]
        bucket_colors = [to_hex(spect_cmap(v)) for v in norm_bucket_values]
    else:
        bucket_colors = []
    
    color_map = {str(v): bucket_colors[i] for i, v in enumerate(bucket_values)}
    color_map['No score'] = 'gray'
    
    # Pivot for 'grok' and 'wp'
    pivot_grok = agg.pivot(index='parliamentaryGroupLabel', columns='pc1_bucket', values='count_grok').fillna(0)
    pivot_wp = agg.pivot(index='parliamentaryGroupLabel', columns='pc1_bucket', values='count_wp').fillna(0)
    
    # Ensure all columns to plot are present in pivots (even if zero)
    pivot_grok = pivot_grok.reindex(columns=columns_to_plot, fill_value=0)
    pivot_wp = pivot_wp.reindex(columns=columns_to_plot, fill_value=0)
    
    # Handle party ordering
    if party_order:
        party_labels = [lab for lab in party_order if lab in pivot_grok.index]
        others = set(pivot_grok.index) - set(party_labels)
        if others:
            # Sum all others into a new "Other" row
            pivot_grok_other = pivot_grok.loc[list(others)].sum().to_frame().T
            pivot_grok_other.index = ['Other']
            pivot_wp_other = pivot_wp.loc[list(others)].sum().to_frame().T
            pivot_wp_other.index = ['Other']
            # Concatenate with ordered parties
            pivot_grok_plot = pd.concat([pivot_grok.loc[party_labels], pivot_grok_other])
            pivot_wp_plot = pd.concat([pivot_wp.loc[party_labels], pivot_wp_other])
        else:
            pivot_grok_plot = pivot_grok.loc[party_labels]
            pivot_wp_plot = pivot_wp.loc[party_labels]
    else:
        pivot_grok_plot = pivot_grok
        pivot_wp_plot = pivot_wp
    
    # Calculate proportions for each parliamentary group for grok and wp
    pivot_prop_grok = pivot_grok_plot.div(pivot_grok_plot.sum(axis=1), axis=0).fillna(0)
    pivot_prop_wp = pivot_wp_plot.div(pivot_wp_plot.sum(axis=1), axis=0).fillna(0)
    
    # Order labels and X axis
    labels = pivot_prop_grok.index.tolist()
    x = np.arange(len(labels))
    
    bar_sep = 0.25   # separate more for visible diagonal/area
    width = 0.28     # a little narrower bars so there's actual spacing
    
    fig, ax = plt.subplots(figsize=figsize)
    
    for i, party in enumerate(labels):
        left = x[i] - width/2 - bar_sep/2
        right = x[i] + width/2 + bar_sep/2
        
        bottom_wp = 0
        bottom_grok = 0
        
        for col in columns_to_plot:
            # WP bar (left)
            ax.bar(left, pivot_prop_wp.loc[party, col], width=width, bottom=bottom_wp,
                    color=color_map.get(col, 'grey'), edgecolor='none', zorder=2)
            # Grok bar (right)
            ax.bar(right, pivot_prop_grok.loc[party, col], width=width, bottom=bottom_grok,
                  color=color_map.get(col, 'grey'), edgecolor='none', zorder=2)
            
            # Diagonal change fill
            y0 = bottom_wp + pivot_prop_wp.loc[party, col]
            y1 = bottom_grok + pivot_prop_grok.loc[party, col]
            ax.fill_between([left + width/2, right - width/2],
                            [y0, y1], [bottom_wp, bottom_grok],
                            color=color_map.get(col, 'grey'), alpha=0.14, zorder=1, linewidth=0)
            
            # Stack
            bottom_wp += pivot_prop_wp.loc[party, col]
            bottom_grok += pivot_prop_grok.loc[party, col]
    
    # Set axis ticks and labels
    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=45, ha='right')
    ax.set_ylabel("Proportion of Citations")
    plot_title = title if title is not None else "Source Reliability Score by Party"
    ax.set_title(plot_title)
    
    # Make axis tight with bars
    ax.set_ylim(bottom=-0.11, top=1.01)
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    
    # Add explicit "Wikipedia" and "Grokipedia" labels for each party on the bottom
    for i, party in enumerate(labels):
        left = x[i] - width/2 - bar_sep/2
        right = x[i] + width/2 + bar_sep/2
        y_min = 0
        ax.text(left, y_min - 0.04, "WP", ha='center', va='top', fontsize=10, color='black')
        ax.text(right, y_min - 0.04, "Grok", ha='center', va='top', fontsize=10, color='black')
    
    # Custom legend for reliability buckets (reversed order - highest first)
    rev_legend_cols = list(reversed([col for col in columns_to_plot if col != 'No score']))
    if 'No score' in columns_to_plot:
        rev_legend_cols.insert(0, 'No score')
    
    legend_elements = [
        Patch(
            facecolor=color_map.get(col, 'grey'), 
            label=display_names.get(col, col)
        ) 
        for col in rev_legend_cols
    ]
    ax.legend(handles=legend_elements, title='Reliability Score\n(Lin et al. 2023)', 
              bbox_to_anchor=(1, 1), loc='upper left', framealpha=0.9)
    
    plt.tight_layout()
    
    if savepath:
        plt.savefig(savepath)
    
    if show:
        plt.show()


In [None]:
# Prepare US MP dataframe and plot
us_df = display_df[display_df['country'] == 'us'].copy()
plot_mp_reliability_buckets_by_party(
    us_df,
    party_order=['Democratic Party', 'Republican Party'],
    title="US Congress: Source Reliability Score by Party",
    savepath='../graphics/mps/us_party_reliability_buckets.pdf'
)

In [None]:
# Prepare US MP dataframe and plot
uk_df = display_df[display_df['country'] == 'uk'].copy()
plot_mp_reliability_buckets_by_party(
    uk_df,
    party_order=['Labour Party', 'Conservative Party', 'Reform UK'],
    title="UK Parliament: Source Reliability Score by Party",
    savepath='../graphics/mps/uk_party_reliability_buckets.pdf'
)