# Politicians vs Election Results Analysis

Analyze how politicians' social media activity relates to their party's 2025 election performance.

In [None]:
# Import libraries
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
import yaml
from datetime import datetime

# Database connection
import sys
sys.path.insert(0, str(Path.cwd().parent / 'src'))
from xminer.io.db import engine
from sqlalchemy import text

print('✅ Libraries imported successfully')

In [None]:
# Configuration
PARAMS_FILE = Path("../src/xminer/config/parameters.yml")

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 12))
YM = f"{YEAR:04d}{MONTH:02d}"

# Graphics directory
GRAPHICS_BASE_DIR = Path(params.get("graphics_base_dir", "../outputs"))
GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics" / "election_analysis"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Output: {GRAPHICS_DIR}")

In [None]:
# Party colors (standard German party colors)
PARTY_COLORS = {
    "CDU": "#000000",
    "CSU": "#0080C9",
    "SPD": "#E3000F",
    "GRÜNE": "#1AA64A",
    "BÜNDNIS 90/DIE GRÜNEN": "#1AA64A",
    "FDP": "#FFED00",
    "AFD": "#009EE0",
    "AfD": "#009EE0",
    "DIE LINKE.": "#BE3075",
    "Die Linke": "#BE3075",
    "BSW": "#009688",
    "SSW": "#003C8F",
}

def get_party_color(party: str) -> str:
    return PARTY_COLORS.get(party, "#888888")

## 1. Load Data from Analysis Views

In [None]:
# Get party-level tweet activity vs results
query_party_activity = """
SELECT *
FROM politicians_tweets_vs_results
WHERE party_change_pkt IS NOT NULL
ORDER BY party_change_pkt DESC
"""

with engine.connect() as conn:
    df_party_activity = pd.read_sql(text(query_party_activity), conn)

print(f"Loaded {len(df_party_activity)} parties")
df_party_activity.head(10)

In [None]:
# Get top active politicians
query_top_politicians = """
SELECT *
FROM top_politicians_by_performance
WHERE rank_in_party <= 10
ORDER BY party_change_pkt DESC NULLS LAST, tweets_2025 DESC
LIMIT 50
"""

with engine.connect() as conn:
    df_top_politicians = pd.read_sql(text(query_top_politicians), conn)

print(f"Loaded {len(df_top_politicians)} top politicians")
df_top_politicians.head(15)

In [None]:
# Get performance category aggregation
query_category = """
SELECT
    performance_category,
    COUNT(DISTINCT partei_kurz) as parties,
    SUM(total_tweets) as total_tweets,
    ROUND(AVG(avg_tweets_per_politician), 1) as avg_tweets_per_pol,
    ROUND(AVG(avg_likes_per_tweet), 1) as avg_likes
FROM politicians_tweets_vs_results
WHERE performance_category IN ('Big Winner (>5%)', 'Winner', 'Loser', 'Big Loser (<-5%)')
GROUP BY performance_category
"""

with engine.connect() as conn:
    df_category = pd.read_sql(text(query_category), conn)

# Order categories
category_order = ['Big Winner (>5%)', 'Winner', 'Loser', 'Big Loser (<-5%)']
df_category['category_order'] = df_category['performance_category'].apply(
    lambda x: category_order.index(x) if x in category_order else 999
)
df_category = df_category.sort_values('category_order')

print(f"Loaded {len(df_category)} performance categories")
df_category

## 2. Visualization 1: Tweet Activity vs Election Performance

In [None]:
# Create scatter plot: Tweet activity vs election change
def create_tweet_vs_performance_plot(df, language='de'):
    
    if language == 'de':
        title = "<b>Social Media Aktivität vs. Wahlergebnis 2025</b><br><sub>Tweets pro Politiker vs. Veränderung zum 2021 Ergebnis</sub>"
        xaxis_title = "<b>Durchschnittliche Tweets pro Politiker (2025)</b>"
        yaxis_title = "<b>Veränderung Zweitstimmen (%)</b>"
    else:
        title = "<b>Social Media Activity vs. Election Results 2025</b><br><sub>Tweets per Politician vs. Change from 2021</sub>"
        xaxis_title = "<b>Average Tweets per Politician (2025)</b>"
        yaxis_title = "<b>Change in Party List Vote (%)</b>"
    
    fig = go.Figure()
    
    # Add scatter points
    for _, row in df.iterrows():
        party_color = get_party_color(row['partei_kurz'])
        
        hover_text = (
            f"<b>{row['partei_kurz']}</b><br>"
            f"Veränderung: {row['party_change_pkt']:+.1f}%<br>"
            f"Politiker: {row['politician_count']}<br>"
            f"Tweets/Pol: {row['avg_tweets_per_politician']:.1f}<br>"
            f"Ø Likes: {row['avg_likes_per_tweet']:.1f}"
        )
        
        fig.add_trace(go.Scatter(
            x=[row['avg_tweets_per_politician']],
            y=[row['party_change_pkt']],
            mode='markers+text',
            marker=dict(
                size=row['politician_count'] * 2,  # Size by number of politicians
                color=party_color,
                line=dict(color='white', width=2)
            ),
            text=row['partei_kurz'],
            textposition='top center',
            textfont=dict(color='white', size=14, family='Arial Black'),
            name=row['partei_kurz'],
            hovertext=hover_text,
            hoverinfo='text',
            showlegend=False
        ))
    
    # Add horizontal line at y=0
    fig.add_hline(y=0, line_dash="dash", line_color="#666666", line_width=1)
    
    fig.update_layout(
        title=dict(
            text=title,
            x=0.5,
            xanchor='center',
            font=dict(size=28, color='white')
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=14, family='Arial'),
        height=1350,  # Instagram portrait
        width=1080,
        margin=dict(b=80, t=140, l=100, r=100),
        xaxis=dict(
            title=xaxis_title,
            gridcolor='#333333',
            title_font=dict(size=18),
            tickfont=dict(size=14),
            range=[-10, df['avg_tweets_per_politician'].max() * 1.1]
        ),
        yaxis=dict(
            title=yaxis_title,
            gridcolor='#333333',
            title_font=dict(size=18),
            tickfont=dict(size=14),
            zeroline=True,
            zerolinecolor='#666666'
        )
    )
    
    return fig

# Create and save both versions
fig_de = create_tweet_vs_performance_plot(df_party_activity, 'de')
fig_en = create_tweet_vs_performance_plot(df_party_activity, 'en')

output_de = GRAPHICS_DIR / "tweet_activity_vs_performance_de.png"
output_en = GRAPHICS_DIR / "tweet_activity_vs_performance_en.png"

fig_de.write_image(output_de, width=1080, height=1350, scale=2)
fig_en.write_image(output_en, width=1080, height=1350, scale=2)

print(f"✅ Saved: {output_de}")
print(f"✅ Saved: {output_en}")

fig_de.show()

## 3. Visualization 2: Winners vs Losers - Tweet Activity Comparison

In [None]:
# Create bar chart comparing winners vs losers
def create_winners_vs_losers_plot(df, language='de'):
    
    if language == 'de':
        title = "<b>Gewinner vs. Verlierer: Social Media Aktivität</b><br><sub>Durchschnittliche Tweets pro Politiker (2025)</sub>"
        yaxis_title = "<b>Tweets pro Politiker</b>"
    else:
        title = "<b>Winners vs. Losers: Social Media Activity</b><br><sub>Average Tweets per Politician (2025)</sub>"
        yaxis_title = "<b>Tweets per Politician</b>"
    
    # Color mapping
    colors = {
        'Big Winner (>5%)': '#00AA00',
        'Winner': '#66CC66',
        'Loser': '#FF9966',
        'Big Loser (<-5%)': '#CC0000'
    }
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=df['performance_category'],
        y=df['avg_tweets_per_pol'],
        marker_color=[colors.get(cat, '#888888') for cat in df['performance_category']],
        text=[f"<b>{val:.1f}</b>" for val in df['avg_tweets_per_pol']],
        textposition='outside',
        textfont=dict(color='white', size=18),
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Tweets/Pol: %{y:.1f}<br>"
            "<extra></extra>"
        )
    ))
    
    fig.update_layout(
        title=dict(
            text=title,
            x=0.5,
            xanchor='center',
            font=dict(size=28, color='white')
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=14, family='Arial'),
        height=1080,  # Instagram square
        width=1080,
        showlegend=False,
        margin=dict(b=120, t=140, l=100, r=100),
        xaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=13),
            tickangle=-20
        ),
        yaxis=dict(
            title=yaxis_title,
            gridcolor='#333333',
            title_font=dict(size=18),
            tickfont=dict(size=14)
        )
    )
    
    return fig

# Create and save
fig_de = create_winners_vs_losers_plot(df_category, 'de')
fig_en = create_winners_vs_losers_plot(df_category, 'en')

output_de = GRAPHICS_DIR / "winners_vs_losers_de.png"
output_en = GRAPHICS_DIR / "winners_vs_losers_en.png"

fig_de.write_image(output_de, width=1080, height=1080, scale=2)
fig_en.write_image(output_en, width=1080, height=1080, scale=2)

print(f"✅ Saved: {output_de}")
print(f"✅ Saved: {output_en}")

fig_de.show()

## 4. Visualization 3: Top Active Politicians by Party Performance

In [None]:
# Most active politicians in winning parties
def create_top_politicians_plot(df, category, language='de'):
    
    if category == 'winners':
        df_filtered = df[df['party_change_pkt'] > 0].head(15)
        if language == 'de':
            title = "<b>Aktivste Politiker in Gewinner-Parteien</b><br><sub>Top 15 nach Tweets (2025)</sub>"
        else:
            title = "<b>Most Active Politicians in Winning Parties</b><br><sub>Top 15 by Tweets (2025)</sub>"
    else:
        df_filtered = df[df['party_change_pkt'] < 0].head(15)
        if language == 'de':
            title = "<b>Aktivste Politiker in Verlierer-Parteien</b><br><sub>Top 15 nach Tweets (2025)</sub>"
        else:
            title = "<b>Most Active Politicians in Losing Parties</b><br><sub>Top 15 by Tweets (2025)</sub>"
    
    if len(df_filtered) == 0:
        print(f"No data for {category}")
        return None
    
    # Sort by tweets descending
    df_plot = df_filtered.sort_values('tweets_2025', ascending=True)
    
    # Create labels
    labels = [f"{row['full_name']} ({row['partei_kurz']})" for _, row in df_plot.iterrows()]
    colors = [get_party_color(row['partei_kurz']) for _, row in df_plot.iterrows()]
    
    # Hover text
    hover_texts = [
        f"<b>{row['full_name']}</b><br>"
        f"Partei: {row['partei_kurz']} ({row['party_change_pkt']:+.1f}%)<br>"
        f"Tweets: {row['tweets_2025']:,}<br>"
        f"Ø Likes: {row['avg_likes_2025']:.1f}<br>"
        f"Bundestag Abstimmungen: {row['bundestag_votes_participated']}"
        for _, row in df_plot.iterrows()
    ]
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        y=labels,
        x=df_plot['tweets_2025'],
        orientation='h',
        marker_color=colors,
        customdata=hover_texts,
        hovertemplate="%{customdata}<extra></extra>"
    ))
    
    if language == 'de':
        xaxis_title = "<b>Anzahl Tweets</b>"
    else:
        xaxis_title = "<b>Number of Tweets</b>"
    
    fig.update_layout(
        title=dict(
            text=title,
            x=0.5,
            xanchor='center',
            font=dict(size=28, color='white')
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=14, family='Arial'),
        height=1350,  # Instagram portrait
        width=1080,
        showlegend=False,
        margin=dict(b=80, t=140, l=350, r=100),
        xaxis=dict(
            title=xaxis_title,
            gridcolor='#333333',
            title_font=dict(size=18),
            tickfont=dict(size=14)
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=12)
        )
    )
    
    return fig

# Create for winners
fig_winners_de = create_top_politicians_plot(df_top_politicians, 'winners', 'de')
fig_winners_en = create_top_politicians_plot(df_top_politicians, 'winners', 'en')

if fig_winners_de:
    output_de = GRAPHICS_DIR / "top_politicians_winners_de.png"
    output_en = GRAPHICS_DIR / "top_politicians_winners_en.png"
    
    fig_winners_de.write_image(output_de, width=1080, height=1350, scale=2)
    fig_winners_en.write_image(output_en, width=1080, height=1350, scale=2)
    
    print(f"✅ Saved: {output_de}")
    print(f"✅ Saved: {output_en}")
    
    fig_winners_de.show()

# Create for losers
fig_losers_de = create_top_politicians_plot(df_top_politicians, 'losers', 'de')
fig_losers_en = create_top_politicians_plot(df_top_politicians, 'losers', 'en')

if fig_losers_de:
    output_de = GRAPHICS_DIR / "top_politicians_losers_de.png"
    output_en = GRAPHICS_DIR / "top_politicians_losers_en.png"
    
    fig_losers_de.write_image(output_de, width=1080, height=1350, scale=2)
    fig_losers_en.write_image(output_en, width=1080, height=1350, scale=2)
    
    print(f"✅ Saved: {output_de}")
    print(f"✅ Saved: {output_en}")
    
    fig_losers_de.show()

## 5. Visualization 4: Party Comparison - Multiple Metrics

In [None]:
# Create grouped bar chart with multiple metrics
def create_party_metrics_plot(df, language='de'):
    
    # Filter to main parties only - use exact names from database
    main_parties = ['AfD', 'CDU', 'SPD', 'CSU', 'BÜNDNIS 90/DIE GRÜNEN', 'DIE LINKE.']
    df_filtered = df[df['partei_kurz'].isin(main_parties)].copy()
    df_filtered = df_filtered.sort_values('party_change_pkt', ascending=False)
    
    if language == 'de':
        title = "<b>Partei-Metriken: Social Media & Wahlergebnis</b><br><sub>Top 6 Parteien</sub>"
    else:
        title = "<b>Party Metrics: Social Media & Election Results</b><br><sub>Top 6 Parties</sub>"
    
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=(
            'Tweets pro Politiker' if language == 'de' else 'Tweets per Politician',
            'Wahlveränderung (%)' if language == 'de' else 'Election Change (%)'
        ),
        specs=[[{'type': 'bar'}, {'type': 'bar'}]]
    )
    
    # Left chart: Tweets per politician
    colors = [get_party_color(p) for p in df_filtered['partei_kurz']]
    
    # Shorten party names for display
    display_names = []
    for party in df_filtered['partei_kurz']:
        if party == 'BÜNDNIS 90/DIE GRÜNEN':
            display_names.append('GRÜNE')
        elif party == 'DIE LINKE.':
            display_names.append('LINKE')
        else:
            display_names.append(party)
    
    fig.add_trace(
        go.Bar(
            x=display_names,
            y=df_filtered['avg_tweets_per_politician'],
            marker_color=colors,
            text=[f"{val:.0f}" for val in df_filtered['avg_tweets_per_politician']],
            textposition='outside',
            textfont=dict(color='white', size=14),
            showlegend=False
        ),
        row=1, col=1
    )
    
    # Right chart: Election change
    change_colors = ['#00AA00' if x > 0 else '#CC0000' for x in df_filtered['party_change_pkt']]
    
    fig.add_trace(
        go.Bar(
            x=display_names,
            y=df_filtered['party_change_pkt'],
            marker_color=change_colors,
            text=[f"{val:+.1f}%" for val in df_filtered['party_change_pkt']],
            textposition='outside',
            textfont=dict(color='white', size=14),
            showlegend=False
        ),
        row=1, col=2
    )
    
    fig.update_layout(
        title=dict(
            text=title,
            x=0.5,
            xanchor='center',
            font=dict(size=28, color='white')
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=14, family='Arial'),
        height=1080,
        width=1080,
        margin=dict(b=80, t=140, l=80, r=80)
    )
    
    # Update all axes
    fig.update_xaxes(gridcolor='#333333', tickfont=dict(size=14))
    fig.update_yaxes(gridcolor='#333333', tickfont=dict(size=12))
    
    # Add horizontal line at y=0 for second chart
    fig.add_hline(y=0, line_dash="dash", line_color="#666666", line_width=1, row=1, col=2)
    
    return fig

# Create and save
fig_de = create_party_metrics_plot(df_party_activity, 'de')
fig_en = create_party_metrics_plot(df_party_activity, 'en')

output_de = GRAPHICS_DIR / "party_metrics_comparison_de.png"
output_en = GRAPHICS_DIR / "party_metrics_comparison_en.png"

fig_de.write_image(output_de, width=1080, height=1080, scale=2)
fig_en.write_image(output_en, width=1080, height=1080, scale=2)

print(f"✅ Saved: {output_de}")
print(f"✅ Saved: {output_en}")

fig_de.show()

## 6. Summary Statistics

In [None]:
print("="*80)
print("SUMMARY STATISTICS")
print("="*80)

print("\n1. Party-Level Metrics:")
print("-"*80)
for _, row in df_party_activity.head(10).iterrows():
    print(f"{row['partei_kurz']:10} | Change: {row['party_change_pkt']:+6.2f}% | "
          f"Tweets/Pol: {row['avg_tweets_per_politician']:>6.1f} | "
          f"Avg Likes: {row['avg_likes_per_tweet']:>7.1f}")

print("\n2. Performance Category Analysis:")
print("-"*80)
for _, row in df_category.iterrows():
    print(f"{row['performance_category']:20} | "
          f"Parties: {row['parties']:>2} | "
          f"Tweets/Pol: {row['avg_tweets_per_pol']:>6.1f} | "
          f"Avg Likes: {row['avg_likes']:>7.1f}")

print("\n3. Top 10 Most Active Politicians:")
print("-"*80)
for _, row in df_top_politicians.head(10).iterrows():
    print(f"{row['full_name']:30} ({row['partei_kurz']:5}) | "
          f"Tweets: {row['tweets_2025']:>5} | "
          f"Avg Likes: {row['avg_likes_2025']:>8.1f} | "
          f"Party: {row['party_change_pkt']:+6.2f}%")

print("\n" + "="*80)
print(f"✅ All visualizations saved to: {GRAPHICS_DIR}")
print("="*80)