# MP Demographics vs Tweet Activity

Analysis of how age and gender of German MPs correlate with their Twitter/X activity.
Includes comparison between all MPs vs MPs on X.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from pathlib import Path
import yaml
from datetime import datetime, timedelta

# Database connection
import sys
sys.path.insert(0, str(Path.cwd().parent / 'src'))
from xminer.io.db import engine
from sqlalchemy import text

print('Libraries imported successfully')

In [None]:
# Configuration
PARAMS_FILE = Path("../src/xminer/config/parameters.yml")

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 12))
YM = f"{YEAR:04d}{MONTH:02d}"

# Date range for previous month's tweets
END_DATE = datetime(2026, 1, 11)  # Today
START_DATE = END_DATE - timedelta(days=30)
START_DATE_STR = START_DATE.strftime('%Y-%m-%d')
END_DATE_STR = END_DATE.strftime('%Y-%m-%d')

# Bilingual date strings
STAND_TEXT_DE = f"Zeitraum: {START_DATE.strftime('%d.%m.%Y')} - {END_DATE.strftime('%d.%m.%Y')}"
STAND_TEXT_EN = f"Period: {START_DATE.strftime('%B %d, %Y')} - {END_DATE.strftime('%B %d, %Y')}"

# Graphics directory
GRAPHICS_BASE_DIR = Path(params.get("graphics_base_dir", "../outputs"))
GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics" / "demographics"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)

# Mobile-friendly dimensions (Instagram portrait)
MOBILE_WIDTH = 1080
MOBILE_HEIGHT = 1350

print(f"Period: {START_DATE_STR} to {END_DATE_STR}")
print(f"Output: {GRAPHICS_DIR}")

In [None]:
# Party colors and normalization
PARTY_COLORS = {
    "CDU/CSU": "#1a1a1a",
    "SPD": "#E3000F",
    "GRÜNE": "#1AA64A",
    "DIE LINKE.": "#BE3075",
    "FDP": "#FFED00",
    "AFD": "#009EE0",
    "BSW": "#009688",
}

# Gender colors - vibrant and accessible
GENDER_COLORS = {
    "männlich": "#4A90D9",
    "weiblich": "#E84393",
    "Male": "#4A90D9",
    "Female": "#E84393",
    "Männer": "#4A90D9",
    "Frauen": "#E84393",
}

# Age group gradient colors (vibrant)
AGE_COLORS = ['#00d2d3', '#54a0ff', '#5f27cd', '#ff9f43', '#ee5a24', '#eb2f06', '#b71540']

def normalize_party(p: str) -> str:
    if p is None:
        return ""
    key = str(p).strip().upper()
    if key in {"CDU", "CSU"}:
        return "CDU/CSU"
    if key.startswith("GRÜN") or "GRUENE" in key or "B90" in key or "BÜNDNIS" in key:
        return "GRÜNE"
    if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
        return "DIE LINKE."
    return key

def get_party_color(party: str) -> str:
    normalized = normalize_party(party)
    return PARTY_COLORS.get(normalized, "#888888")

def translate_gender(gender: str, lang: str = 'en') -> str:
    """Translate German gender to English or keep German"""
    if lang == 'en':
        if gender == "männlich":
            return "Male"
        elif gender == "weiblich":
            return "Female"
    else:  # German
        if gender == "männlich":
            return "Männer"
        elif gender == "weiblich":
            return "Frauen"
    return gender

# Common axis styling for readability
AXIS_STYLE = dict(
    tickfont=dict(size=16, color='white'),
    titlefont=dict(size=18, color='white'),
    gridcolor='#333333',
)

# Instagram-optimized layout
def get_base_layout(title_de, title_en, lang='de', show_period=True):
    title = title_de if lang == 'de' else title_en
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN

    if show_period:
        title_text = f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{stand_text}</span>"
    else:
        title_text = f"<b>{title}</b>"

    return dict(
        title=dict(
            text=title_text,
            x=0.5, xanchor='center',
            font=dict(size=28, color='white', family='Arial Black')
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=16, family='Arial'),
        margin=dict(l=80, r=60, t=160, b=100),
    )

# Add branding/watermark
def add_branding(fig, lang='de'):
    source = "Quelle: Bundestag, X/Twitter" if lang == 'de' else "Source: Bundestag, X/Twitter"
    fig.add_annotation(
        text=f"<b>PoliMetrics</b> | {source}",
        xref="paper", yref="paper",
        x=0.5, y=-0.06,
        showarrow=False,
        font=dict(size=12, color='#666666'),
        xanchor='center'
    )
    return fig

## Query ALL MPs Demographics (Including Non-X Users)

In [None]:
# Query ALL MPs (including those without X accounts)
# Using DISTINCT ON to remove duplicates (Friedrich Merz and Johann Wadephul appear twice)
# Note: 1 MP (Mayra Vriesema) has no birth date - included for gender stats, excluded from age stats
query_all_mps = """
SELECT DISTINCT ON (id)
    p.id,
    p.vorname,
    p.nachname,
    p.partei_kurz AS party,
    p.geschlecht AS gender,
    p.geburtsdatum AS birth_date,
    CASE 
        WHEN p.geburtsdatum IS NOT NULL 
        THEN EXTRACT(YEAR FROM AGE(CURRENT_DATE, p.geburtsdatum))::INT 
        ELSE NULL 
    END AS age,
    p.username,
    CASE WHEN p.username IS NOT NULL THEN true ELSE false END AS has_x_account
FROM politicians_12_2025 p
WHERE p.geschlecht IS NOT NULL
ORDER BY id, username DESC NULLS LAST
"""

print("Querying ALL MPs demographics (deduplicated)...")

with engine.connect() as conn:
    df_all_mps = pd.read_sql(text(query_all_mps), conn)

# Normalize party names
df_all_mps['party_norm'] = df_all_mps['party'].apply(normalize_party)
df_all_mps['gender_en'] = df_all_mps['gender'].apply(lambda x: translate_gender(x, 'en'))
df_all_mps['gender_de'] = df_all_mps['gender'].apply(lambda x: translate_gender(x, 'de'))

# Create age groups (only for MPs with known age)
bins = [20, 30, 40, 50, 60, 70, 80, 100]
labels = ['20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+']
df_all_mps['age_group'] = pd.cut(df_all_mps['age'], bins=bins, labels=labels, right=False)

# Stats
mps_with_age = df_all_mps['age'].notna().sum()
mps_without_age = df_all_mps['age'].isna().sum()

print(f"\nTotal MPs (deduplicated): {len(df_all_mps)}")
print(f"  With known age: {mps_with_age}")
print(f"  Without birth date: {mps_without_age} (excluded from age charts)")
print(f"  With X account: {df_all_mps['has_x_account'].sum()}")
print(f"  Without X account: {(~df_all_mps['has_x_account']).sum()}")
print(f"\nGender distribution (all {len(df_all_mps)} MPs):")
gender_counts = df_all_mps['gender_en'].value_counts()
print(f"  Male: {gender_counts.get('Male', 0)} ({gender_counts.get('Male', 0)/len(df_all_mps)*100:.1f}%)")
print(f"  Female: {gender_counts.get('Female', 0)} ({gender_counts.get('Female', 0)/len(df_all_mps)*100:.1f}%)")

## Query X Users with Tweet Activity

In [None]:
# Query MPs with X accounts and their tweet activity
# Using DISTINCT ON to remove duplicates (Friedrich Merz and Johann Wadephul appear twice)
query_x_users = f"""
SELECT DISTINCT ON (p.id)
    p.username,
    p.vorname,
    p.nachname,
    p.partei_kurz AS party,
    p.geschlecht AS gender,
    p.geburtsdatum AS birth_date,
    EXTRACT(YEAR FROM AGE(CURRENT_DATE, p.geburtsdatum))::INT AS age,
    COUNT(t.tweet_id) AS tweet_count,
    COALESCE(SUM(t.like_count), 0) AS total_likes,
    COALESCE(SUM(t.retweet_count), 0) AS total_retweets,
    COALESCE(SUM(t.impression_count), 0) AS total_impressions,
    COALESCE(AVG(t.like_count), 0) AS avg_likes,
    COALESCE(AVG(t.impression_count), 0) AS avg_impressions
FROM politicians_12_2025 p
LEFT JOIN public.tweets t ON t.username = p.username 
    AND t.created_at >= '{START_DATE_STR}'
    AND t.created_at <= '{END_DATE_STR}'
WHERE p.username IS NOT NULL
    AND p.geburtsdatum IS NOT NULL
    AND p.geschlecht IS NOT NULL
GROUP BY p.id, p.username, p.vorname, p.nachname, p.partei_kurz, p.geschlecht, p.geburtsdatum
ORDER BY p.id, COUNT(t.tweet_id) DESC
"""

print(f"Querying MPs on X with tweet activity (deduplicated)...")
print(f"Period: {START_DATE_STR} to {END_DATE_STR}\n")

with engine.connect() as conn:
    df_x_users = pd.read_sql(text(query_x_users), conn)

# Normalize party names and translate gender
df_x_users['party_norm'] = df_x_users['party'].apply(normalize_party)
df_x_users['gender_en'] = df_x_users['gender'].apply(lambda x: translate_gender(x, 'en'))
df_x_users['gender_de'] = df_x_users['gender'].apply(lambda x: translate_gender(x, 'de'))
df_x_users['age_group'] = pd.cut(df_x_users['age'], bins=bins, labels=labels, right=False)

print(f"MPs with X accounts (deduplicated): {len(df_x_users)}")
print(f"  Total tweets in period: {df_x_users['tweet_count'].sum():,}")
print(f"  MPs who tweeted: {(df_x_users['tweet_count'] > 0).sum()}")
print(f"\nGender distribution (X users):")
x_gender = df_x_users['gender_en'].value_counts()
print(f"  Male: {x_gender.get('Male', 0)} ({x_gender.get('Male', 0)/len(df_x_users)*100:.1f}%)")
print(f"  Female: {x_gender.get('Female', 0)} ({x_gender.get('Female', 0)/len(df_x_users)*100:.1f}%)")

## 1. Compare All MPs vs X Users: Gender Distribution

In [None]:
def create_gender_comparison_donut(lang='de'):
    """Create side-by-side donut charts comparing gender: All MPs vs X Users"""
    from plotly.subplots import make_subplots
    
    # Calculate counts
    all_gender = df_all_mps['gender_en'].value_counts()
    x_gender = df_x_users['gender_en'].value_counts()
    
    if lang == 'de':
        title = 'Wer ist auf X?'
        subtitle = 'Geschlechterverteilung im Vergleich'
        labels_m = f"Männer\n{all_gender.get('Male', 0)}"
        labels_f = f"Frauen\n{all_gender.get('Female', 0)}"
        labels_m_x = f"Männer\n{x_gender.get('Male', 0)}"
        labels_f_x = f"Frauen\n{x_gender.get('Female', 0)}"
        all_label = f'Alle MdBs<br><b>{len(df_all_mps)}</b>'
        x_label = f'MdBs auf X<br><b>{len(df_x_users)}</b>'
    else:
        title = 'Who is on X?'
        subtitle = 'Gender Distribution Comparison'
        labels_m = f"Male\n{all_gender.get('Male', 0)}"
        labels_f = f"Female\n{all_gender.get('Female', 0)}"
        labels_m_x = f"Male\n{x_gender.get('Male', 0)}"
        labels_f_x = f"Female\n{x_gender.get('Female', 0)}"
        all_label = f'All MPs<br><b>{len(df_all_mps)}</b>'
        x_label = f'MPs on X<br><b>{len(df_x_users)}</b>'
    
    colors = [GENDER_COLORS['Male'], GENDER_COLORS['Female']]
    
    fig = make_subplots(
        rows=1, cols=2,
        specs=[[{'type': 'pie'}, {'type': 'pie'}]],
        subplot_titles=[all_label, x_label]
    )
    
    # All MPs donut - show both count and percentage
    fig.add_trace(go.Pie(
        values=[all_gender.get('Male', 0), all_gender.get('Female', 0)],
        labels=[labels_m, labels_f],
        hole=0.55,
        marker=dict(colors=colors, line=dict(color='#0d0d0d', width=3)),
        textinfo='label+percent',
        textfont=dict(size=14, color='white'),
        textposition='inside',
        hovertemplate="<b>%{label}</b><br>%{percent}<extra></extra>",
    ), row=1, col=1)
    
    # X Users donut - show both count and percentage
    fig.add_trace(go.Pie(
        values=[x_gender.get('Male', 0), x_gender.get('Female', 0)],
        labels=[labels_m_x, labels_f_x],
        hole=0.55,
        marker=dict(colors=colors, line=dict(color='#0d0d0d', width=3)),
        textinfo='label+percent',
        textfont=dict(size=14, color='white'),
        textposition='inside',
        hovertemplate="<b>%{label}</b><br>%{percent}<extra></extra>",
    ), row=1, col=2)
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.6em; color:#aaaaaa;'>{subtitle}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=16),
        height=MOBILE_HEIGHT,
        showlegend=False,
        margin=dict(t=180, b=120, l=40, r=40),
    )
    
    # Style subplot titles
    for annotation in fig.layout.annotations:
        annotation.font = dict(size=18, color='white')
    
    # Add insight annotation
    insight_de = "Gleiches Verhältnis: X-Nutzung ist geschlechterunabhängig"
    insight_en = "Same ratio: X adoption is gender-neutral among MPs"
    insight = insight_de if lang == 'de' else insight_en
    
    fig.add_annotation(
        x=0.5, y=-0.02, xref='paper', yref='paper',
        text=f"<i>{insight}</i>",
        showarrow=False,
        font=dict(size=13, color='#aaaaaa'),
        xanchor='center'
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_gender_comparison_donut('de')
fig_en = create_gender_comparison_donut('en')

fig_de.write_image(GRAPHICS_DIR / "01_gender_comparison_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "01_gender_comparison_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 01_gender_comparison_de.png")
print("Saved: 01_gender_comparison_en.png")

# Print the actual numbers
print(f"\nActual numbers:")
print(f"  All MPs: {all_gender.get('Male', 0)} Male ({all_gender.get('Male', 0)/len(df_all_mps)*100:.1f}%), {all_gender.get('Female', 0)} Female ({all_gender.get('Female', 0)/len(df_all_mps)*100:.1f}%)")
print(f"  X Users: {x_gender.get('Male', 0)} Male ({x_gender.get('Male', 0)/len(df_x_users)*100:.1f}%), {x_gender.get('Female', 0)} Female ({x_gender.get('Female', 0)/len(df_x_users)*100:.1f}%)")

fig_en.show()

## 2. Compare All MPs vs X Users: Age Distribution

In [None]:
def create_age_comparison_chart(lang='de'):
    """Create comparison of age distribution: All MPs vs X Users - mirrored pyramid style"""
    
    # Calculate percentages by age group
    all_age = df_all_mps['age_group'].value_counts(normalize=True).sort_index() * 100
    x_age = df_x_users['age_group'].value_counts(normalize=True).sort_index() * 100
    
    age_groups = [str(g) for g in all_age.index]
    
    if lang == 'de':
        title = 'Altersverteilung'
        subtitle = 'Alle MdBs vs. MdBs auf X'
        all_label = 'Alle MdBs'
        x_label = 'MdBs auf X'
    else:
        title = 'Age Distribution'
        subtitle = 'All MPs vs MPs on X'
        all_label = 'All MPs'
        x_label = 'MPs on X'
    
    fig = go.Figure()
    
    # All MPs (left side - negative values for visual)
    fig.add_trace(go.Bar(
        name=all_label,
        y=age_groups,
        x=[-all_age.get(g, 0) for g in all_age.index],
        orientation='h',
        marker_color='#6c5ce7',
        text=[f"{all_age.get(g, 0):.0f}%" for g in all_age.index],
        textposition='inside',
        textfont=dict(size=14, color='white'),
        hovertemplate=f"<b>{all_label}</b><br>%{{y}}: %{{text}}<extra></extra>",
    ))
    
    # X Users (right side - positive values)
    fig.add_trace(go.Bar(
        name=x_label,
        y=age_groups,
        x=[x_age.get(g, 0) for g in all_age.index],
        orientation='h',
        marker_color='#00cec9',
        text=[f"{x_age.get(g, 0):.0f}%" for g in all_age.index],
        textposition='inside',
        textfont=dict(size=14, color='white'),
        hovertemplate=f"<b>{x_label}</b><br>%{{y}}: %{{text}}<extra></extra>",
    ))
    
    max_val = max(all_age.max(), x_age.max()) + 5
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.6em; color:#aaaaaa;'>{subtitle}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        barmode='overlay',
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=16),
        height=MOBILE_HEIGHT,
        xaxis=dict(
            range=[-max_val, max_val],
            tickvals=[-30, -20, -10, 0, 10, 20, 30],
            ticktext=['30%', '20%', '10%', '0', '10%', '20%', '30%'],
            gridcolor='#333333',
            zeroline=True, zerolinecolor='#555555', zerolinewidth=2
        ),
        yaxis=dict(gridcolor='#333333', tickfont=dict(size=16)),
        legend=dict(
            orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5,
            bgcolor='rgba(0,0,0,0)', font=dict(size=16)
        ),
        margin=dict(l=100, r=60, t=180, b=100),
    )
    
    # Add labels for sides
    fig.add_annotation(x=-max_val/2, y=1.12, xref='x', yref='paper',
                       text=f"<b>{all_label}</b>", showarrow=False,
                       font=dict(size=14, color='#6c5ce7'))
    fig.add_annotation(x=max_val/2, y=1.12, xref='x', yref='paper',
                       text=f"<b>{x_label}</b>", showarrow=False,
                       font=dict(size=14, color='#00cec9'))
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_age_comparison_chart('de')
fig_en = create_age_comparison_chart('en')

fig_de.write_image(GRAPHICS_DIR / "02_age_comparison_pyramid_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "02_age_comparison_pyramid_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 02_age_comparison_pyramid_de.png")
print("Saved: 02_age_comparison_pyramid_en.png")

fig_en.show()

## 3. Tweet Activity by Gender

In [None]:
def create_tweets_by_gender_chart(lang='de'):
    """Large number display with gender comparison"""
    
    gender_col = 'gender_de' if lang == 'de' else 'gender_en'
    gender_agg = df_x_users.groupby(gender_col).agg({
        'tweet_count': ['sum', 'mean', 'median'],
        'username': 'count'
    }).round(1)
    gender_agg.columns = ['Total', 'Avg', 'Median', 'Count']
    gender_agg = gender_agg.reset_index()
    gender_agg.columns = ['Gender', 'Total', 'Avg', 'Median', 'Count']
    
    if lang == 'de':
        title = 'Wer tweetet mehr?'
        subtitle = 'Durchschnittliche Tweets pro MdB'
        avg_label = 'Ø Tweets pro MdB'
    else:
        title = 'Who tweets more?'
        subtitle = 'Average tweets per MP'
        avg_label = 'Avg Tweets per MP'
    
    fig = go.Figure()
    
    # Large centered bars with big numbers
    fig.add_trace(go.Bar(
        x=gender_agg['Gender'],
        y=gender_agg['Avg'],
        marker=dict(
            color=[GENDER_COLORS.get(g, '#888888') for g in gender_agg['Gender']],
            line=dict(width=0),
        ),
        text=[f"<b>{v:.1f}</b>" for v in gender_agg['Avg']],
        textposition='outside',
        textfont=dict(color='white', size=48),
        width=0.6,
    ))
    
    # Add count labels below
    for i, row in gender_agg.iterrows():
        fig.add_annotation(
            x=row['Gender'], y=-3,
            text=f"n = {row['Count']} MdBs",
            showarrow=False, font=dict(color='#888888', size=14)
        )
        # Add total tweets info
        fig.add_annotation(
            x=row['Gender'], y=row['Avg'] * 0.5,
            text=f"{int(row['Total']):,} total",
            showarrow=False, font=dict(color='white', size=14)
        )
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        yaxis=dict(
            title=dict(text=avg_label, font=dict(size=18, color='white')),
            tickfont=dict(size=16, color='white'),
            gridcolor='#333333',
            showgrid=True
        ),
        xaxis=dict(
            tickfont=dict(size=24, color='white'),
            gridcolor='#333333'
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=16),
        height=MOBILE_HEIGHT,
        margin=dict(l=100, r=60, t=180, b=120),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_tweets_by_gender_chart('de')
fig_en = create_tweets_by_gender_chart('en')

fig_de.write_image(GRAPHICS_DIR / "03_tweets_by_gender_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "03_tweets_by_gender_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 03_tweets_by_gender_de.png")
print("Saved: 03_tweets_by_gender_en.png")

fig_en.show()

## 4. Tweet Activity by Age Group

In [None]:
def create_tweets_by_age_chart(lang='de'):
    """Colorful bar chart with gradient colors by age"""
    
    age_agg = df_x_users.groupby('age_group', observed=True).agg({
        'tweet_count': 'mean',
        'username': 'count'
    }).reset_index()
    age_agg.columns = ['Age Group', 'Avg Tweets', 'MP Count']
    
    if lang == 'de':
        title = 'Wer tweetet am meisten?'
        subtitle = 'Durchschnittliche Tweets nach Altersgruppe'
        yaxis_title = 'Ø Tweets pro MdB'
        xaxis_title = 'Altersgruppe'
    else:
        title = 'Who tweets the most?'
        subtitle = 'Average tweets by age group'
        yaxis_title = 'Avg Tweets per MP'
        xaxis_title = 'Age Group'
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=age_agg['Age Group'].astype(str),
        y=age_agg['Avg Tweets'],
        marker=dict(
            color=AGE_COLORS[:len(age_agg)],
            line=dict(width=0),
        ),
        text=[f"<b>{v:.1f}</b>" for v in age_agg['Avg Tweets']],
        textposition='outside',
        textfont=dict(color='white', size=20),
        width=0.7,
    ))
    
    # Add MP count annotations
    for i, row in age_agg.iterrows():
        fig.add_annotation(
            x=str(row['Age Group']), y=-2,
            text=f"n={row['MP Count']}",
            showarrow=False, font=dict(color='#888888', size=13)
        )
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        yaxis=dict(
            title=dict(text=yaxis_title, font=dict(size=18, color='white')),
            tickfont=dict(size=16, color='white'),
            gridcolor='#333333'
        ),
        xaxis=dict(
            title=dict(text=xaxis_title, font=dict(size=18, color='white')),
            tickfont=dict(size=18, color='white'),
            gridcolor='#333333'
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=16),
        height=MOBILE_HEIGHT,
        margin=dict(l=100, r=60, t=180, b=120),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_tweets_by_age_chart('de')
fig_en = create_tweets_by_age_chart('en')

fig_de.write_image(GRAPHICS_DIR / "04_tweets_by_age_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "04_tweets_by_age_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 04_tweets_by_age_de.png")
print("Saved: 04_tweets_by_age_en.png")

fig_en.show()

## 5. Age vs Tweet Activity Scatter Plot

In [None]:
def create_age_tweets_scatter(lang='de'):
    """Clean scatter plot with trend line"""
    
    df_active = df_x_users[df_x_users['tweet_count'] > 0].copy()
    
    if lang == 'de':
        title = 'Alter vs. Tweet-Aktivität'
        subtitle = 'Jeder Punkt ist ein MdB'
        xaxis_title = 'Alter (Jahre)'
        yaxis_title = 'Anzahl Tweets'
    else:
        title = 'Age vs Tweet Activity'
        subtitle = 'Each dot is an MP'
        xaxis_title = 'Age (years)'
        yaxis_title = 'Number of Tweets'
    
    fig = go.Figure()
    
    # Add scatter for each party
    for party in sorted(df_active['party_norm'].unique()):
        party_df = df_active[df_active['party_norm'] == party]
        
        fig.add_trace(go.Scatter(
            x=party_df['age'],
            y=party_df['tweet_count'],
            mode='markers',
            name=party,
            marker=dict(
                size=14,
                color=get_party_color(party),
                opacity=0.8,
                line=dict(width=2, color='white')
            ),
            hovertemplate=(
                "<b>%{customdata[0]} %{customdata[1]}</b><br>"
                f"{'Alter' if lang == 'de' else 'Age'}: %{{x}}<br>"
                "Tweets: %{y}<br>"
                f"{'Partei' if lang == 'de' else 'Party'}: %{{customdata[2]}}<br>"
                "<extra></extra>"
            ),
            customdata=party_df[['vorname', 'nachname', 'party_norm']].values
        ))
    
    # Add trend line
    z = np.polyfit(df_active['age'], df_active['tweet_count'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(df_active['age'].min(), df_active['age'].max(), 100)
    
    fig.add_trace(go.Scatter(
        x=x_line, y=p(x_line),
        mode='lines', name='Trend',
        line=dict(color='#ff6b6b', width=3, dash='dash'),
        hoverinfo='skip'
    ))
    
    # Calculate and display correlation
    correlation = df_active['age'].corr(df_active['tweet_count'])
    corr_text = f"r = {correlation:.2f}"
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        xaxis=dict(
            title=dict(text=xaxis_title, font=dict(size=18, color='white')),
            tickfont=dict(size=16, color='white'),
            gridcolor='#333333'
        ),
        yaxis=dict(
            title=dict(text=yaxis_title, font=dict(size=18, color='white')),
            tickfont=dict(size=16, color='white'),
            gridcolor='#333333'
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=14),
        height=MOBILE_HEIGHT,
        margin=dict(l=100, r=60, t=180, b=120),
        legend=dict(
            bgcolor='rgba(0,0,0,0.7)', bordercolor='#444444', borderwidth=1,
            font=dict(size=12), orientation='h', yanchor='top', y=-0.12, xanchor='center', x=0.5
        )
    )
    
    # Add correlation annotation
    fig.add_annotation(
        x=0.95, y=0.95, xref='paper', yref='paper',
        text=f"<b>{corr_text}</b>",
        showarrow=False,
        font=dict(size=18, color='#ff6b6b'),
        bgcolor='rgba(0,0,0,0.5)',
        borderpad=8
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_age_tweets_scatter('de')
fig_en = create_age_tweets_scatter('en')

fig_de.write_image(GRAPHICS_DIR / "05_age_vs_tweets_scatter_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "05_age_vs_tweets_scatter_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

# Print correlation
df_active = df_x_users[df_x_users['tweet_count'] > 0]
print(f"Correlation between age and tweets: {df_active['age'].corr(df_active['tweet_count']):.3f}")
print("\nSaved: 05_age_vs_tweets_scatter_de.png")
print("Saved: 05_age_vs_tweets_scatter_en.png")

fig_en.show()

## 6. Tweet Activity by Gender and Age Group

In [None]:
def create_gender_age_heatmap(lang='de'):
    """Heatmap showing tweet activity by gender and age group"""
    
    gender_col = 'gender_de' if lang == 'de' else 'gender_en'
    
    # Create pivot table
    pivot = df_x_users.pivot_table(
        values='tweet_count', 
        index=gender_col, 
        columns='age_group', 
        aggfunc='mean'
    ).round(1)
    
    if lang == 'de':
        title = 'Tweet-Aktivität'
        subtitle = 'Nach Geschlecht und Alter (Ø Tweets)'
        genders = ['Männer', 'Frauen']
    else:
        title = 'Tweet Activity'
        subtitle = 'By Gender and Age (Avg Tweets)'
        genders = ['Male', 'Female']
    
    # Reorder rows if needed
    pivot = pivot.reindex(genders)
    
    fig = go.Figure(data=go.Heatmap(
        z=pivot.values,
        x=[str(c) for c in pivot.columns],
        y=pivot.index,
        colorscale=[
            [0, '#0d0d0d'],
            [0.25, '#1e3a5f'],
            [0.5, '#3d7ea6'],
            [0.75, '#5dade2'],
            [1, '#00ff88']
        ],
        text=[[f"{v:.1f}" if not np.isnan(v) else "" for v in row] for row in pivot.values],
        texttemplate="%{text}",
        textfont=dict(size=18, color='white'),
        hovertemplate="<b>%{y}</b> | Age %{x}<br>Avg: %{z:.1f} tweets<extra></extra>",
        colorbar=dict(
            title=dict(text='Ø Tweets' if lang == 'de' else 'Avg Tweets', font=dict(color='white')),
            tickfont=dict(color='white')
        )
    ))
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        xaxis=dict(title='Alter' if lang == 'de' else 'Age', tickfont=dict(size=14, color='white')),
        yaxis=dict(tickfont=dict(size=18, color='white')),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=14),
        height=MOBILE_HEIGHT,
        margin=dict(l=120, r=80, t=180, b=100),
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_gender_age_heatmap('de')
fig_en = create_gender_age_heatmap('en')

fig_de.write_image(GRAPHICS_DIR / "07_gender_age_heatmap_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "07_gender_age_heatmap_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 07_gender_age_heatmap_de.png")
print("Saved: 07_gender_age_heatmap_en.png")

fig_en.show()

## 7. Top 10 Most Active Tweeters

In [None]:
def create_top_tweeters_chart(lang='de'):
    """Horizontal bar chart: Top 10 most active MPs - values inside bars to avoid cutoff"""
    
    gender_col = 'gender_de' if lang == 'de' else 'gender_en'
    
    top_10 = df_x_users.nlargest(10, 'tweet_count').copy()
    top_10['name'] = top_10['vorname'] + ' ' + top_10['nachname']
    top_10 = top_10.sort_values('tweet_count', ascending=True)
    
    if lang == 'de':
        title = 'Die Top 10 Twitterer'
        subtitle = 'Aktivste MdBs auf X'
    else:
        title = 'Top 10 Tweeters'
        subtitle = 'Most Active MPs on X'
    
    fig = go.Figure()
    
    max_val = top_10['tweet_count'].max()
    
    fig.add_trace(go.Bar(
        y=top_10['name'],
        x=top_10['tweet_count'],
        orientation='h',
        marker=dict(
            color=[get_party_color(p) for p in top_10['party_norm']],
            line=dict(width=0)
        ),
        width=0.7,
        hovertemplate="<b>%{y}</b><br>Tweets: %{x}<extra></extra>",
    ))
    
    # Add value labels INSIDE bars (right-aligned) to avoid cutoff
    for i, (_, row) in enumerate(top_10.iterrows()):
        gender_icon = "♂" if row['gender_en'] == 'Male' else "♀"
        
        # Value label inside bar (right side)
        fig.add_annotation(
            x=row['tweet_count'] - max_val * 0.02,
            y=row['name'],
            text=f"<b>{row['tweet_count']}</b>",
            showarrow=False,
            font=dict(color='white', size=16),
            xanchor='right'
        )
        
        # Party and demographics label inside bar (left side)
        label = f"{row['party_norm']} | {row['age']}J {gender_icon}"
        fig.add_annotation(
            x=max_val * 0.02,
            y=row['name'],
            text=label,
            showarrow=False,
            font=dict(color='white', size=11),
            xanchor='left'
        )
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        xaxis=dict(
            gridcolor='#333333',
            showticklabels=False,
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=16, color='white')
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=14),
        height=MOBILE_HEIGHT,
        margin=dict(l=200, r=40, t=180, b=100),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_top_tweeters_chart('de')
fig_en = create_top_tweeters_chart('en')

fig_de.write_image(GRAPHICS_DIR / "06_top_tweeters_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "06_top_tweeters_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 06_top_tweeters_de.png")
print("Saved: 06_top_tweeters_en.png")

# Print top tweeters info
top_15 = df_x_users.nlargest(15, 'tweet_count')
print(f"\nTop 15 Tweeters ({START_DATE_STR} to {END_DATE_STR}):\n")
for _, row in top_15.iterrows():
    print(f"{row['vorname']} {row['nachname']:20} | {row['party_norm']:10} | {row['gender_en']:6} | Age {row['age']:2} | {row['tweet_count']:3} tweets")

fig_en.show()

## 8. Top 10 MPs by Reach (Impressions)

In [None]:
def format_number(val: float) -> str:
    """Format large numbers with K/M suffix for readability."""
    if val >= 1_000_000:
        return f"{val/1_000_000:.1f}M"
    elif val >= 1_000:
        return f"{val/1_000:.0f}K"
    else:
        return f"{int(val)}"

def create_top_reach_chart(lang='de'):
    """Horizontal bar chart: Top 10 MPs by reach (impressions) - values inside bars"""
    
    # Filter to MPs with impressions
    df_with_reach = df_x_users[df_x_users['total_impressions'] > 0].copy()
    
    top_10 = df_with_reach.nlargest(10, 'total_impressions').copy()
    top_10['name'] = top_10['vorname'] + ' ' + top_10['nachname']
    top_10 = top_10.sort_values('total_impressions', ascending=True)
    
    if lang == 'de':
        title = 'Die Top 10 nach Reichweite'
        subtitle = 'MdBs mit den meisten Impressionen'
    else:
        title = 'Top 10 by Reach'
        subtitle = 'MPs with the Most Impressions'
    
    fig = go.Figure()
    
    max_val = top_10['total_impressions'].max()
    
    fig.add_trace(go.Bar(
        y=top_10['name'],
        x=top_10['total_impressions'],
        orientation='h',
        marker=dict(
            color=[get_party_color(p) for p in top_10['party_norm']],
            line=dict(width=0)
        ),
        width=0.7,
        hovertemplate="<b>%{y}</b><br>Impressions: %{x:,.0f}<extra></extra>",
    ))
    
    # Add value labels INSIDE bars (right-aligned) to avoid cutoff
    for i, (_, row) in enumerate(top_10.iterrows()):
        gender_icon = "♂" if row['gender_en'] == 'Male' else "♀"
        
        # Value label inside bar (right side) - formatted for readability
        fig.add_annotation(
            x=row['total_impressions'] - max_val * 0.02,
            y=row['name'],
            text=f"<b>{format_number(row['total_impressions'])}</b>",
            showarrow=False,
            font=dict(color='white', size=16),
            xanchor='right'
        )
        
        # Party and demographics label inside bar (left side)
        label = f"{row['party_norm']} | {row['age']}J {gender_icon}"
        fig.add_annotation(
            x=max_val * 0.02,
            y=row['name'],
            text=label,
            showarrow=False,
            font=dict(color='white', size=11),
            xanchor='left'
        )
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        xaxis=dict(
            gridcolor='#333333',
            showticklabels=False,
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=16, color='white')
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=14),
        height=MOBILE_HEIGHT,
        margin=dict(l=200, r=40, t=180, b=100),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_top_reach_chart('de')
fig_en = create_top_reach_chart('en')

fig_de.write_image(GRAPHICS_DIR / "06b_top_reach_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "06b_top_reach_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 06b_top_reach_de.png")
print("Saved: 06b_top_reach_en.png")

# Print top reach info
df_with_reach = df_x_users[df_x_users['total_impressions'] > 0]
top_15_reach = df_with_reach.nlargest(15, 'total_impressions')
print(f"\nTop 15 by Reach ({START_DATE_STR} to {END_DATE_STR}):\n")
for _, row in top_15_reach.iterrows():
    print(f"{row['vorname']} {row['nachname']:20} | {row['party_norm']:10} | {row['gender_en']:6} | Age {row['age']:2} | {format_number(row['total_impressions']):>6} impressions | {row['tweet_count']:3} tweets")

fig_en.show()

## 8b. Top 10 MPs by Likes

In [None]:
def create_top_likes_chart(lang='de'):
    """Horizontal bar chart: Top 10 MPs by likes - values inside bars"""
    
    # Filter to MPs with likes
    df_with_likes = df_x_users[df_x_users['total_likes'] > 0].copy()
    
    top_10 = df_with_likes.nlargest(10, 'total_likes').copy()
    top_10['name'] = top_10['vorname'] + ' ' + top_10['nachname']
    top_10 = top_10.sort_values('total_likes', ascending=True)
    
    if lang == 'de':
        title = 'Die Top 10 nach Likes'
        subtitle = 'MdBs mit den meisten Likes'
    else:
        title = 'Top 10 by Likes'
        subtitle = 'MPs with the Most Likes'
    
    fig = go.Figure()
    
    max_val = top_10['total_likes'].max()
    
    fig.add_trace(go.Bar(
        y=top_10['name'],
        x=top_10['total_likes'],
        orientation='h',
        marker=dict(
            color=[get_party_color(p) for p in top_10['party_norm']],
            line=dict(width=0)
        ),
        width=0.7,
        hovertemplate="<b>%{y}</b><br>Likes: %{x:,.0f}<extra></extra>",
    ))
    
    # Add value labels INSIDE bars (right-aligned) to avoid cutoff
    for i, (_, row) in enumerate(top_10.iterrows()):
        gender_icon = "♂" if row['gender_en'] == 'Male' else "♀"
        
        # Value label inside bar (right side) - formatted for readability
        fig.add_annotation(
            x=row['total_likes'] - max_val * 0.02,
            y=row['name'],
            text=f"<b>{format_number(row['total_likes'])}</b>",
            showarrow=False,
            font=dict(color='white', size=16),
            xanchor='right'
        )
        
        # Party and demographics label inside bar (left side)
        label = f"{row['party_norm']} | {row['age']}J {gender_icon}"
        fig.add_annotation(
            x=max_val * 0.02,
            y=row['name'],
            text=label,
            showarrow=False,
            font=dict(color='white', size=11),
            xanchor='left'
        )
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:#aaaaaa;'>{subtitle} | {STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        xaxis=dict(
            gridcolor='#333333',
            showticklabels=False,
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=16, color='white')
        ),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=14),
        height=MOBILE_HEIGHT,
        margin=dict(l=200, r=40, t=180, b=100),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_top_likes_chart('de')
fig_en = create_top_likes_chart('en')

fig_de.write_image(GRAPHICS_DIR / "06c_top_likes_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "06c_top_likes_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 06c_top_likes_de.png")
print("Saved: 06c_top_likes_en.png")

# Print top likes info
df_with_likes = df_x_users[df_x_users['total_likes'] > 0]
top_15_likes = df_with_likes.nlargest(15, 'total_likes')
print(f"\nTop 15 by Likes ({START_DATE_STR} to {END_DATE_STR}):\n")
for _, row in top_15_likes.iterrows():
    print(f"{row['vorname']} {row['nachname']:20} | {row['party_norm']:10} | {row['gender_en']:6} | Age {row['age']:2} | {format_number(row['total_likes']):>6} likes | {row['tweet_count']:3} tweets")

fig_en.show()

## 9. X Adoption Rate by Gender and Age

In [None]:
def create_x_adoption_chart(lang='de'):
    """Show what percentage of MPs in each age group have X accounts"""
    
    # Calculate adoption rates by age group
    adoption_by_age = df_all_mps.groupby('age_group', observed=True).agg({
        'has_x_account': ['sum', 'count']
    })
    adoption_by_age.columns = ['with_x', 'total']
    adoption_by_age['rate'] = (adoption_by_age['with_x'] / adoption_by_age['total'] * 100).round(1)
    adoption_by_age = adoption_by_age.reset_index()
    
    if lang == 'de':
        title = 'Wer ist auf X?'
        subtitle = 'X-Nutzung nach Altersgruppe'
        yaxis_title = 'Anteil mit X-Account'
    else:
        title = 'Who is on X?'
        subtitle = 'X adoption by age group'
        yaxis_title = '% with X Account'
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=adoption_by_age['age_group'].astype(str),
        y=adoption_by_age['rate'],
        marker=dict(
            color=AGE_COLORS[:len(adoption_by_age)],
            line=dict(width=0),
        ),
        text=[f"<b>{v:.0f}%</b>" for v in adoption_by_age['rate']],
        textposition='outside',
        textfont=dict(color='white', size=20),
        width=0.7,
    ))
    
    # Add ratio annotations
    for i, row in adoption_by_age.iterrows():
        fig.add_annotation(
            x=str(row['age_group']), y=-5,
            text=f"{int(row['with_x'])}/{int(row['total'])}",
            showarrow=False, font=dict(color='#666666', size=12)
        )
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.6em; color:#aaaaaa;'>{subtitle}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color='white', family='Arial Black')
        ),
        yaxis=dict(title=yaxis_title, gridcolor='#333333', range=[0, 110]),
        xaxis=dict(title='Alter' if lang == 'de' else 'Age', gridcolor='#333333', tickfont=dict(size=14)),
        plot_bgcolor='#0d0d0d',
        paper_bgcolor='#0d0d0d',
        font=dict(color='white', size=16),
        height=MOBILE_HEIGHT,
        margin=dict(l=80, r=60, t=180, b=120),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save both versions
fig_de = create_x_adoption_chart('de')
fig_en = create_x_adoption_chart('en')

fig_de.write_image(GRAPHICS_DIR / "08_x_adoption_by_age_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "08_x_adoption_by_age_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 08_x_adoption_by_age_de.png")
print("Saved: 08_x_adoption_by_age_en.png")

fig_en.show()

In [None]:
import os

print(f"\n{'='*80}")
print(f"MP DEMOGRAPHICS vs TWEET ACTIVITY - INSTAGRAM READY")
print(f"{'='*80}\n")

print(f"Tweet Period: {START_DATE_STR} to {END_DATE_STR}")
print(f"Chart Size: {MOBILE_WIDTH}x{MOBILE_HEIGHT}px (Instagram Portrait)\n")

print("KEY FINDINGS:")
print("-" * 40)

# X adoption
x_rate = df_all_mps['has_x_account'].mean() * 100
print(f"  X Adoption: {x_rate:.0f}% of MPs have an X account")

# Gender comparison
all_gender = df_all_mps['gender_en'].value_counts(normalize=True) * 100
x_gender = df_x_users['gender_en'].value_counts(normalize=True) * 100
print(f"  Gender (All MPs): {all_gender.get('Male', 0):.0f}% Male, {all_gender.get('Female', 0):.0f}% Female")
print(f"  Gender (X Users): {x_gender.get('Male', 0):.0f}% Male, {x_gender.get('Female', 0):.0f}% Female")

# Age comparison
print(f"  Avg Age (All MPs): {df_all_mps['age'].mean():.1f} years")
print(f"  Avg Age (X Users): {df_x_users['age'].mean():.1f} years")

# Tweet activity
male_avg = df_x_users[df_x_users['gender_en'] == 'Male']['tweet_count'].mean()
female_avg = df_x_users[df_x_users['gender_en'] == 'Female']['tweet_count'].mean()
print(f"  Avg Tweets (Male): {male_avg:.1f}")
print(f"  Avg Tweets (Female): {female_avg:.1f}")

# Correlation
df_active = df_x_users[df_x_users['tweet_count'] > 0]
correlation = df_active['age'].corr(df_active['tweet_count'])
print(f"  Age-Tweet Correlation: {correlation:.2f}")

print(f"\n{'='*80}")
print(f"GENERATED CHARTS:")
print(f"{'='*80}")

png_files = sorted(GRAPHICS_DIR.glob("*.png"))
de_files = [f for f in png_files if '_de.png' in f.name]
en_files = [f for f in png_files if '_en.png' in f.name]

print(f"\nGerman versions ({len(de_files)} charts):")
for f in de_files:
    size = os.path.getsize(f) / 1024
    print(f"  {f.name} ({size:.0f} KB)")

print(f"\nEnglish versions ({len(en_files)} charts):")
for f in en_files:
    size = os.path.getsize(f) / 1024
    print(f"  {f.name} ({size:.0f} KB)")

print(f"\nTotal: {len(png_files)} charts ready for Instagram!")

# Display all generated charts (English versions)
from IPython.display import Image, display

print("Instagram-Ready Charts (English versions):\n")
print("=" * 60)

en_files = sorted([f for f in GRAPHICS_DIR.glob("*_en.png")])
for png_file in en_files:
    print(f"\n{png_file.stem.replace('_en', '').upper()}")
    display(Image(filename=str(png_file), width=350))
    print()

In [None]:
import os

print(f"\n{'='*80}")
print(f"MP DEMOGRAPHICS vs TWEET ACTIVITY ANALYSIS")
print(f"{'='*80}\n")

print(f"Tweet Period: {START_DATE_STR} to {END_DATE_STR}\n")

print("ALL MPs STATISTICS:")
print(f"  Total MPs in Bundestag: {len(df_all_mps)}")
print(f"  MPs with X account: {df_all_mps['has_x_account'].sum()} ({df_all_mps['has_x_account'].mean()*100:.1f}%)")
print(f"  MPs without X account: {(~df_all_mps['has_x_account']).sum()} ({(~df_all_mps['has_x_account']).mean()*100:.1f}%)")

print(f"\nGENDER COMPARISON:")
all_gender = df_all_mps['gender_en'].value_counts()
x_gender = df_x_users['gender_en'].value_counts()
print(f"  All MPs - Male: {all_gender.get('Male', 0)} ({all_gender.get('Male', 0)/len(df_all_mps)*100:.1f}%), Female: {all_gender.get('Female', 0)} ({all_gender.get('Female', 0)/len(df_all_mps)*100:.1f}%)")
print(f"  X Users - Male: {x_gender.get('Male', 0)} ({x_gender.get('Male', 0)/len(df_x_users)*100:.1f}%), Female: {x_gender.get('Female', 0)} ({x_gender.get('Female', 0)/len(df_x_users)*100:.1f}%)")

print(f"\nAGE COMPARISON:")
print(f"  All MPs - Average age: {df_all_mps['age'].mean():.1f} years")
print(f"  X Users - Average age: {df_x_users['age'].mean():.1f} years")

print(f"\nTWEET ACTIVITY (X Users):")
print(f"  Total tweets: {df_x_users['tweet_count'].sum():,}")
print(f"  MPs who tweeted: {(df_x_users['tweet_count'] > 0).sum()} ({(df_x_users['tweet_count'] > 0).mean()*100:.1f}%)")
print(f"  Average tweets per MP: {df_x_users['tweet_count'].mean():.1f}")

print(f"\nGENDER BREAKDOWN (Tweet Activity):")
for gender in ['Male', 'Female']:
    gender_df = df_x_users[df_x_users['gender_en'] == gender]
    print(f"  {gender}: {len(gender_df)} MPs, {gender_df['tweet_count'].mean():.1f} avg tweets")

# Age-tweet correlation
df_active = df_x_users[df_x_users['tweet_count'] > 0]
correlation = df_active['age'].corr(df_active['tweet_count'])
print(f"\nAGE-TWEET CORRELATION: {correlation:.3f}")

print(f"\n{'='*80}")
print(f"GENERATED CHARTS ({MOBILE_WIDTH}x{MOBILE_HEIGHT}px - Mobile/Instagram Portrait):")
print(f"{'='*80}")

png_files = sorted(GRAPHICS_DIR.glob("*.png"))
for png_file in png_files:
    size = os.path.getsize(png_file) / 1024
    print(f"  {png_file.name} ({size:.1f} KB)")

print(f"\nTotal charts: {len(png_files)}")
print(f"  German versions: {len([f for f in png_files if '_de.png' in f.name])}")
print(f"  English versions: {len([f for f in png_files if '_en.png' in f.name])}")

print(f"\nAnalysis complete!")

In [None]:
# Display all generated charts
from IPython.display import Image, display

print("Generated Charts (English versions):\n")

en_files = sorted([f for f in GRAPHICS_DIR.glob("*_en.png")])
for png_file in en_files:
    print(f"{png_file.name}")
    display(Image(filename=str(png_file), width=400))
    print()