# Greenland / USA / Denmark Topic Analysis

Analysis of German MPs' tweets about the Greenland controversy - Trump's push to acquire Greenland from Denmark.

**Context:** In January 2026, President Trump announced tariffs on 8 NATO allies over Greenland, threatening to take action "whether they like it or not." This sparked massive protests in Denmark and Greenland, with 85% of Greenlanders opposing joining the US.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from pathlib import Path
import yaml
from datetime import datetime, timedelta

# Database connection
import sys
sys.path.insert(0, str(Path.cwd().parent / 'src'))
from xminer.io.db import engine
from sqlalchemy import text

print('Libraries imported successfully')

In [None]:
# Configuration
PARAMS_FILE = Path("../src/xminer/config/parameters.yml")

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

# Analysis period: January 2026
START_DATE = datetime(2026, 1, 1)
END_DATE = datetime(2026, 1, 18)  # Up to today
START_DATE_STR = START_DATE.strftime('%Y-%m-%d')
END_DATE_STR = END_DATE.strftime('%Y-%m-%d')

# Politicians table
POLITICIANS_TABLE = "politicians_12_2025"

# Bilingual date strings
STAND_TEXT_DE = f"Januar 2026 ({START_DATE.strftime('%d.%m.')} - {END_DATE.strftime('%d.%m.%Y')})"
STAND_TEXT_EN = f"January 2026 ({START_DATE.strftime('%b %d')} - {END_DATE.strftime('%b %d, %Y')})"

# Graphics directory
GRAPHICS_BASE_DIR = Path(params.get("graphics_base_dir", "../outputs"))
GRAPHICS_DIR = GRAPHICS_BASE_DIR / "202601" / "graphics" / "greenland"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)

# Mobile-friendly dimensions (Instagram portrait)
MOBILE_WIDTH = 1080
MOBILE_HEIGHT = 1350

print(f"Analysis period: {START_DATE_STR} to {END_DATE_STR}")
print(f"Output: {GRAPHICS_DIR}")

In [None]:
# Party colors and styling
PARTY_COLORS = {
    "CDU/CSU": "#1a1a1a",
    "SPD": "#E3000F",
    "GRÜNE": "#1AA64A",
    "DIE LINKE.": "#BE3075",
    "FDP": "#d4a800",  # Slightly darker yellow for white bg
    "AFD": "#009EE0",
    "BSW": "#009688",
}

# Topic colors - more distinctive
TOPIC_COLORS = {
    'Greenland': '#1e88e5',  # Blue - ice/arctic
    'Trump/USA': '#e63946',  # Red - USA
    'Denmark': '#7b1fa2',    # Purple - distinct from Greenland
    'All Topics': '#e9c46a',
}

# Background colors (white theme)
BG_COLOR = '#ffffff'
PLOT_BG_COLOR = '#f8f9fa'
GRID_COLOR = '#e0e0e0'
TEXT_COLOR = '#1a1a1a'
SUBTITLE_COLOR = '#666666'

# Logo path
LOGO_PATH = Path("../data/logo/Polimetric_logo.png")

def normalize_party(p: str) -> str:
    if p is None:
        return ""
    key = str(p).strip().upper()
    if key in {"CDU", "CSU"}:
        return "CDU/CSU"
    if key.startswith("GRÜN") or "GRUENE" in key or "B90" in key or "BÜNDNIS" in key:
        return "GRÜNE"
    if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
        return "DIE LINKE."
    return key

def get_party_color(party: str) -> str:
    normalized = normalize_party(party)
    return PARTY_COLORS.get(normalized, "#888888")

# Base layout for Instagram (white theme) - LARGER FONTS for mobile
def get_base_layout(title_de, title_en, lang='de', show_period=True):
    title = title_de if lang == 'de' else title_en
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    if show_period:
        title_text = f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{stand_text}</span>"
    else:
        title_text = f"<b>{title}</b>"
    
    return dict(
        title=dict(
            text=title_text,
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=18, family='Arial'),
        margin=dict(l=80, r=60, t=160, b=120),
    )

def add_branding(fig, lang='de'):
    """Add PoliMetrics logo, name and source text to figure."""
    source = "Quelle: Bundestag, X" if lang == 'de' else "Source: Bundestag, X"
    
    # Add PoliMetrics text next to logo - positioned lower to avoid x-axis
    fig.add_annotation(
        text="<b>PoliMetrics</b>",
        xref="paper", yref="paper",
        x=0.14, y=-0.08,
        showarrow=False,
        font=dict(size=18, color=TEXT_COLOR),
        xanchor='left'
    )
    
    # Add source text on the right
    fig.add_annotation(
        text=source,
        xref="paper", yref="paper",
        x=0.98, y=-0.08,
        showarrow=False,
        font=dict(size=14, color='#888888'),
        xanchor='right'
    )
    
    # Add logo image (bigger) - positioned lower
    fig.add_layout_image(
        dict(
            source=f"data:image/png;base64,{get_logo_base64()}",
            xref="paper", yref="paper",
            x=0.01, y=-0.065,
            sizex=0.15, sizey=0.15,
            xanchor="left", yanchor="middle",
            opacity=1,
        )
    )
    
    return fig

def get_logo_base64():
    """Load logo and convert to base64."""
    import base64
    logo_path = Path("../data/logo/Polimetric_logo.png")
    if logo_path.exists():
        with open(logo_path, "rb") as f:
            return base64.b64encode(f.read()).decode()
    return ""

def format_number(val):
    if val >= 1_000_000:
        return f"{val/1_000_000:.1f}M"
    elif val >= 1_000:
        return f"{val/1_000:.0f}K"
    return f"{int(val)}"

## Define Topic Keywords

In [None]:
# Topic keywords - Greenland controversy
GREENLAND_KEYWORDS = ['Grönland', 'Greenland', 'Groenland']
DENMARK_KEYWORDS = ['Dänemark', 'Denmark', 'Kopenhagen', 'Copenhagen']
USA_KEYWORDS = ['Trump', 'USA', 'Amerika', 'American', 'Amerikaner']

# Combined topic - tweets mentioning Greenland with USA/Trump or Denmark context
TOPIC_NAME = "Greenland Crisis"

print("Topic Keywords:")
print(f"  Greenland: {GREENLAND_KEYWORDS}")
print(f"  Denmark: {DENMARK_KEYWORDS}")
print(f"  USA/Trump: {USA_KEYWORDS}")

## Query Greenland Topic Tweets

In [None]:
# Build keyword conditions
greenland_cond = " OR ".join([f"t.text ILIKE '%{kw}%'" for kw in GREENLAND_KEYWORDS])
denmark_cond = " OR ".join([f"t.text ILIKE '%{kw}%'" for kw in DENMARK_KEYWORDS])
usa_cond = " OR ".join([f"t.text ILIKE '%{kw}%'" for kw in USA_KEYWORDS])

# Query all Greenland-related tweets in January 2026
query_greenland = f"""
SELECT 
    t.tweet_id,
    t.username,
    t.text,
    t.created_at,
    t.like_count,
    t.reply_count,
    t.retweet_count,
    t.quote_count,
    t.impression_count,
    p.partei_kurz AS party,
    p.vorname || ' ' || p.nachname AS full_name,
    CASE WHEN ({greenland_cond}) THEN true ELSE false END AS mentions_greenland,
    CASE WHEN ({denmark_cond}) THEN true ELSE false END AS mentions_denmark,
    CASE WHEN ({usa_cond}) THEN true ELSE false END AS mentions_usa
FROM tweets t
JOIN {POLITICIANS_TABLE} p ON LOWER(t.username) = LOWER(p.username)
WHERE t.created_at >= '{START_DATE_STR}'
  AND t.created_at < '{END_DATE_STR}'
  AND (({greenland_cond}) OR ({denmark_cond}) OR ({usa_cond}))
ORDER BY t.created_at
"""

print("Querying Greenland topic tweets...")
with engine.connect() as conn:
    df_topic = pd.read_sql(text(query_greenland), conn)

# Normalize party
df_topic['party_norm'] = df_topic['party'].apply(normalize_party)

# Create date column
df_topic['date'] = pd.to_datetime(df_topic['created_at']).dt.date

# Calculate engagement rate (likes + replies + retweets per impression)
df_topic['engagement'] = df_topic['like_count'] + df_topic['reply_count'] + df_topic['retweet_count'] + df_topic['quote_count']
df_topic['engagement_rate'] = df_topic.apply(
    lambda x: (x['engagement'] / x['impression_count'] * 100) if x['impression_count'] > 0 else 0, axis=1
)
df_topic['likes_to_replies'] = df_topic.apply(
    lambda x: x['like_count'] / x['reply_count'] if x['reply_count'] > 0 else x['like_count'], axis=1
)

print(f"\nFound {len(df_topic)} topic-related tweets in January 2026")
print(f"  Mentions Greenland: {df_topic['mentions_greenland'].sum()}")
print(f"  Mentions Denmark: {df_topic['mentions_denmark'].sum()}")
print(f"  Mentions USA/Trump: {df_topic['mentions_usa'].sum()}")

In [None]:
# Query ALL tweets in January 2026 for comparison
query_all = f"""
SELECT 
    t.tweet_id,
    t.created_at,
    t.like_count,
    t.reply_count,
    t.retweet_count,
    t.quote_count,
    t.impression_count,
    p.partei_kurz AS party
FROM tweets t
JOIN {POLITICIANS_TABLE} p ON LOWER(t.username) = LOWER(p.username)
WHERE t.created_at >= '{START_DATE_STR}'
  AND t.created_at < '{END_DATE_STR}'
ORDER BY t.created_at
"""

print("Querying all tweets for comparison...")
with engine.connect() as conn:
    df_all = pd.read_sql(text(query_all), conn)

df_all['party_norm'] = df_all['party'].apply(normalize_party)
df_all['date'] = pd.to_datetime(df_all['created_at']).dt.date
df_all['engagement'] = df_all['like_count'] + df_all['reply_count'] + df_all['retweet_count'] + df_all['quote_count']
df_all['engagement_rate'] = df_all.apply(
    lambda x: (x['engagement'] / x['impression_count'] * 100) if x['impression_count'] > 0 else 0, axis=1
)
df_all['likes_to_replies'] = df_all.apply(
    lambda x: x['like_count'] / x['reply_count'] if x['reply_count'] > 0 else x['like_count'], axis=1
)

print(f"\nTotal tweets in January 2026: {len(df_all)}")
print(f"Topic tweets represent {len(df_topic)/len(df_all)*100:.1f}% of all tweets")

## 1. Tweet Volume Progression Over Time

In [None]:
def create_tweet_progression_chart(lang='de'):
    """Create timeline showing tweet volume progression for Greenland topic."""
    
    # Daily counts for topic tweets
    greenland_daily = df_topic[df_topic['mentions_greenland']].groupby('date').size().reset_index(name='count')
    greenland_daily['topic'] = 'Greenland'
    
    usa_daily = df_topic[df_topic['mentions_usa']].groupby('date').size().reset_index(name='count')
    usa_daily['topic'] = 'Trump/USA'
    
    denmark_daily = df_topic[df_topic['mentions_denmark']].groupby('date').size().reset_index(name='count')
    denmark_daily['topic'] = 'Denmark'
    
    # Fill missing dates
    date_range = pd.date_range(START_DATE, END_DATE - timedelta(days=1), freq='D')
    
    if lang == 'de':
        title = 'Grönland-Krise: Tweet-Verlauf'
        subtitle = 'Tägliche Tweets von MdBs zu Grönland, USA/Trump und Dänemark'
        yaxis_title = 'Anzahl Tweets'
    else:
        title = 'Greenland Crisis: Tweet Timeline'
        subtitle = 'Daily tweets from MPs about Greenland, USA/Trump and Denmark'
        yaxis_title = 'Number of Tweets'
    
    fig = go.Figure()
    
    # Add traces for each topic
    topics = [
        ('Greenland', greenland_daily, TOPIC_COLORS['Greenland']),
        ('Trump/USA', usa_daily, TOPIC_COLORS['Trump/USA']),
        ('Denmark', denmark_daily, TOPIC_COLORS['Denmark']),
    ]
    
    for topic_name, df_daily, color in topics:
        if len(df_daily) > 0:
            fig.add_trace(go.Scatter(
                x=df_daily['date'],
                y=df_daily['count'],
                mode='lines+markers',
                name=topic_name,
                line=dict(color=color, width=3),
                marker=dict(size=10),
                fill='tozeroy',
                fillcolor=f'rgba({int(color[1:3], 16)}, {int(color[3:5], 16)}, {int(color[5:7], 16)}, 0.15)',
            ))
    
    # Add key events annotations - positioned below the legend, BIGGER text
    events = [
        ('2026-01-03', 'Maduro\nCaptured' if lang == 'en' else 'Maduro\nFestnahme'),
        ('2026-01-04', 'Trump:\n"We need\nGreenland"' if lang == 'en' else 'Trump:\n"Wir brauchen\nGrönland"'),
        ('2026-01-09', 'Trump:\n"Hard Way"' if lang == 'en' else 'Trump:\n"Hard Way"'),
        ('2026-01-17', 'Tariffs\nAnnounced' if lang == 'en' else 'Zölle\nangekündigt'),
    ]
    
    for date_str, event_text in events:
        event_date = datetime.strptime(date_str, '%Y-%m-%d').date()
        fig.add_vline(x=event_date, line_dash="dash", line_color="#999999", line_width=2)
        fig.add_annotation(
            x=event_date, y=0.92, yref='paper',
            text=f"<b>{event_text}</b>", showarrow=False,
            font=dict(size=16, color='#333333'),
            textangle=-45
        )
    
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{subtitle} | {stand_text}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        xaxis=dict(
            title='',
            gridcolor=GRID_COLOR,
            tickformat='%d.%m' if lang == 'de' else '%b %d',
            tickfont=dict(size=18, color=TEXT_COLOR),
        ),
        yaxis=dict(
            title=dict(text=yaxis_title, font=dict(size=18, color=TEXT_COLOR)),
            gridcolor=GRID_COLOR,
            tickfont=dict(size=16, color=TEXT_COLOR),
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=16),
        height=MOBILE_HEIGHT,
        legend=dict(
            orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5,
            bgcolor='rgba(255,255,255,0.8)', font=dict(size=18, color=TEXT_COLOR)
        ),
        margin=dict(l=80, r=40, t=220, b=120),
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save
fig_de = create_tweet_progression_chart('de')
fig_en = create_tweet_progression_chart('en')

fig_de.write_image(GRAPHICS_DIR / "01_greenland_tweet_progression_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "01_greenland_tweet_progression_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 01_greenland_tweet_progression_de.png")
print("Saved: 01_greenland_tweet_progression_en.png")

fig_en.show()

## 2. Engagement Rate Comparison: Greenland Topic vs All Tweets

In [None]:
def create_engagement_comparison_chart(lang='de'):
    """Compare engagement metrics between Greenland topic tweets and all tweets."""
    
    # Calculate metrics for topic tweets (Greenland-specific)
    df_greenland = df_topic[df_topic['mentions_greenland']].copy()
    
    topic_metrics = {
        'avg_likes': df_greenland['like_count'].mean(),
        'avg_replies': df_greenland['reply_count'].mean(),
        'avg_retweets': df_greenland['retweet_count'].mean(),
        'avg_impressions': df_greenland['impression_count'].mean(),
    }
    
    all_metrics = {
        'avg_likes': df_all['like_count'].mean(),
        'avg_replies': df_all['reply_count'].mean(),
        'avg_retweets': df_all['retweet_count'].mean(),
        'avg_impressions': df_all['impression_count'].mean(),
    }
    
    if lang == 'de':
        title = 'Engagement: Grönland vs. Alle Tweets'
        subtitle = 'Durchschnittliche Metriken pro Tweet'
        categories = ['Likes', 'Antworten', 'Retweets', 'Impressionen']
        topic_label = 'Grönland-Tweets'
        all_label = 'Alle Tweets'
    else:
        title = 'Engagement: Greenland vs All Tweets'
        subtitle = 'Average metrics per tweet'
        categories = ['Likes', 'Replies', 'Retweets', 'Impressions']
        topic_label = 'Greenland Tweets'
        all_label = 'All Tweets'
    
    # Prepare data - show absolute metrics, not rate
    topic_values = [
        topic_metrics['avg_likes'],
        topic_metrics['avg_replies'],
        topic_metrics['avg_retweets'],
        topic_metrics['avg_impressions'] / 1000,  # Scale impressions to K
    ]
    
    all_values = [
        all_metrics['avg_likes'],
        all_metrics['avg_replies'],
        all_metrics['avg_retweets'],
        all_metrics['avg_impressions'] / 1000,  # Scale impressions to K
    ]
    
    # Update categories to show K for impressions
    if lang == 'de':
        categories[-1] = 'Impressionen\n(in Tausend)'
    else:
        categories[-1] = 'Impressions\n(in K)'
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        name=topic_label,
        x=categories,
        y=topic_values,
        marker_color=TOPIC_COLORS['Greenland'],
        text=[f"{v:.1f}" if v < 100 else f"{v:.0f}" for v in topic_values],
        textposition='outside',
        textfont=dict(size=20, color=TEXT_COLOR),
    ))
    
    fig.add_trace(go.Bar(
        name=all_label,
        x=categories,
        y=all_values,
        marker_color='#aaaaaa',
        text=[f"{v:.1f}" if v < 100 else f"{v:.0f}" for v in all_values],
        textposition='outside',
        textfont=dict(size=20, color=TEXT_COLOR),
    ))
    
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{subtitle} | {stand_text}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        barmode='group',
        xaxis=dict(gridcolor=GRID_COLOR, tickfont=dict(size=18, color=TEXT_COLOR)),
        yaxis=dict(gridcolor=GRID_COLOR, tickfont=dict(size=16, color=TEXT_COLOR)),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=16),
        height=MOBILE_HEIGHT,
        legend=dict(
            orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5,
            bgcolor='rgba(255,255,255,0.8)', font=dict(size=18, color=TEXT_COLOR)
        ),
        margin=dict(l=80, r=40, t=180, b=120),
    )
    
    # Add comparison annotation - show likes multiplier
    likes_ratio = topic_metrics['avg_likes'] / all_metrics['avg_likes'] if all_metrics['avg_likes'] > 0 else 1
    comparison_text = f"{likes_ratio:.1f}x" + (" mehr Likes" if lang == 'de' else " more likes")
    
    fig.add_annotation(
        x=0.5, y=0.95, xref='paper', yref='paper',
        text=f"<b>{comparison_text}</b>",
        showarrow=False,
        font=dict(size=28, color=TOPIC_COLORS['Greenland']),
        bgcolor='rgba(255,255,255,0.8)',
        borderpad=10
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save
fig_de = create_engagement_comparison_chart('de')
fig_en = create_engagement_comparison_chart('en')

fig_de.write_image(GRAPHICS_DIR / "02_engagement_comparison_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "02_engagement_comparison_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 02_engagement_comparison_de.png")
print("Saved: 02_engagement_comparison_en.png")

# Print actual values
df_greenland = df_topic[df_topic['mentions_greenland']]
print(f"\nGreenland tweets: {len(df_greenland)}")
print(f"  Avg likes: {df_greenland['like_count'].mean():.1f}")
print(f"  Avg impressions: {df_greenland['impression_count'].mean():.0f}")
print(f"\nAll tweets: {len(df_all)}")
print(f"  Avg likes: {df_all['like_count'].mean():.1f}")
print(f"  Avg impressions: {df_all['impression_count'].mean():.0f}")

fig_en.show()

## 3. Likes to Replies Ratio Analysis

In [None]:
def create_likes_replies_ratio_chart(lang='de'):
    """Compare likes-to-replies ratio between Greenland topic and all tweets.
    Higher ratio = more approval (likes) relative to discussion (replies)
    Lower ratio = more controversial (more replies/debate)
    """
    
    # Filter tweets with at least 1 reply for meaningful ratio
    df_greenland = df_topic[(df_topic['mentions_greenland']) & (df_topic['reply_count'] > 0)].copy()
    df_all_filtered = df_all[df_all['reply_count'] > 0].copy()
    
    if lang == 'de':
        title = 'Likes-zu-Antworten-Verhältnis'
        subtitle = 'Höher = mehr Zustimmung, Niedriger = mehr Debatte'
        topic_label = 'Grönland-Tweets'
        all_label = 'Alle Tweets'
    else:
        title = 'Likes-to-Replies Ratio'
        subtitle = 'Higher = more approval, Lower = more debate'
        topic_label = 'Greenland Tweets'
        all_label = 'All Tweets'
    
    # Calculate ratio of total likes to total replies (more intuitive)
    topic_ratio = df_greenland['like_count'].sum() / df_greenland['reply_count'].sum() if df_greenland['reply_count'].sum() > 0 else 0
    all_ratio = df_all_filtered['like_count'].sum() / df_all_filtered['reply_count'].sum() if df_all_filtered['reply_count'].sum() > 0 else 0
    
    fig = go.Figure()
    
    categories = [topic_label, all_label]
    values = [topic_ratio, all_ratio]
    colors = [TOPIC_COLORS['Greenland'], '#aaaaaa']
    
    fig.add_trace(go.Bar(
        x=categories,
        y=values,
        marker_color=colors,
        text=[f"<b>{v:.1f}</b>" for v in values],
        textposition='outside',
        textfont=dict(size=48, color=TEXT_COLOR),
        width=0.5,
    ))
    
    # Add sample size annotations
    for i, (cat, n) in enumerate([(topic_label, len(df_greenland)), (all_label, len(df_all_filtered))]):
        fig.add_annotation(
            x=cat, y=-2,
            text=f"n = {n:,}",
            showarrow=False,
            font=dict(size=16, color='#888888')
        )
    
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    # Get max value to set y-axis range with headroom for text
    max_val = max(values) * 1.25
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{subtitle} | {stand_text}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        xaxis=dict(gridcolor=GRID_COLOR, tickfont=dict(size=20, color=TEXT_COLOR)),
        yaxis=dict(
            title=dict(text='Total Likes / Total Replies', font=dict(size=18, color=TEXT_COLOR)),
            gridcolor=GRID_COLOR,
            tickfont=dict(size=16, color=TEXT_COLOR),
            range=[0, max_val],  # Add headroom for text labels
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=16),
        height=MOBILE_HEIGHT,
        margin=dict(l=80, r=40, t=180, b=140),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save
fig_de = create_likes_replies_ratio_chart('de')
fig_en = create_likes_replies_ratio_chart('en')

fig_de.write_image(GRAPHICS_DIR / "03_likes_replies_ratio_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "03_likes_replies_ratio_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 03_likes_replies_ratio_de.png")
print("Saved: 03_likes_replies_ratio_en.png")

# Debug: Show actual values
df_greenland = df_topic[(df_topic['mentions_greenland']) & (df_topic['reply_count'] > 0)]
df_all_filtered = df_all[df_all['reply_count'] > 0]
print(f"\nGreenland: {df_greenland['like_count'].sum():,} likes / {df_greenland['reply_count'].sum():,} replies = {df_greenland['like_count'].sum() / df_greenland['reply_count'].sum():.1f}")
print(f"All tweets: {df_all_filtered['like_count'].sum():,} likes / {df_all_filtered['reply_count'].sum():,} replies = {df_all_filtered['like_count'].sum() / df_all_filtered['reply_count'].sum():.1f}")

fig_en.show()

## 4. Greenland Tweets by Party

In [None]:
def create_party_breakdown_chart(lang='de'):
    """Show which parties are tweeting most about Greenland."""
    
    df_greenland = df_topic[df_topic['mentions_greenland']].copy()
    
    party_counts = df_greenland.groupby('party_norm').agg({
        'tweet_id': 'count',
        'impression_count': 'sum',
        'like_count': 'sum',
    }).reset_index()
    party_counts.columns = ['party', 'tweets', 'impressions', 'likes']
    party_counts = party_counts.sort_values('tweets', ascending=True)
    
    if lang == 'de':
        title = 'Grönland-Tweets nach Partei'
        subtitle = 'Wer spricht über die Grönland-Krise?'
        xaxis_title = 'Anzahl Tweets'
    else:
        title = 'Greenland Tweets by Party'
        subtitle = 'Who is talking about the Greenland crisis?'
        xaxis_title = 'Number of Tweets'
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        y=party_counts['party'],
        x=party_counts['tweets'],
        orientation='h',
        marker_color=[get_party_color(p) for p in party_counts['party']],
        text=[f"<b>{v}</b>" for v in party_counts['tweets']],
        textposition='outside',
        textfont=dict(size=24, color=TEXT_COLOR),
    ))
    
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{subtitle} | {stand_text}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        xaxis=dict(
            title=dict(text=xaxis_title, font=dict(size=18, color=TEXT_COLOR)),
            gridcolor=GRID_COLOR,
            tickfont=dict(size=16, color=TEXT_COLOR),
        ),
        yaxis=dict(gridcolor=GRID_COLOR, tickfont=dict(size=18, color=TEXT_COLOR)),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=16),
        height=MOBILE_HEIGHT,
        margin=dict(l=120, r=80, t=180, b=120),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save
fig_de = create_party_breakdown_chart('de')
fig_en = create_party_breakdown_chart('en')

fig_de.write_image(GRAPHICS_DIR / "04_greenland_by_party_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "04_greenland_by_party_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 04_greenland_by_party_de.png")
print("Saved: 04_greenland_by_party_en.png")

fig_en.show()

## 5. Top Greenland Tweets by Reach

In [None]:
def create_top_tweets_chart(lang='de'):
    """Show top 10 Greenland tweets by impressions."""
    
    df_greenland = df_topic[df_topic['mentions_greenland']].copy()
    top_10 = df_greenland.nlargest(10, 'impression_count').copy()
    
    # Add rank and sort for display (highest at top)
    top_10 = top_10.reset_index(drop=True)
    top_10['rank'] = range(1, len(top_10) + 1)
    top_10 = top_10.sort_values('impression_count', ascending=True)  # Reverse for horizontal bar
    
    # Create display names with rank
    top_10['display_name'] = top_10['full_name'].str.split().str[-1]  # Last name only
    top_10['label'] = top_10['rank'].astype(str) + '. ' + top_10['display_name'] + ' (' + top_10['party_norm'] + ')'
    
    if lang == 'de':
        title = 'Top 10 Grönland-Tweets'
        subtitle = 'MdBs mit der größten Reichweite'
        xaxis_title = 'Impressionen'
    else:
        title = 'Top 10 Greenland Tweets'
        subtitle = 'MPs with the highest reach'
        xaxis_title = 'Impressions'
    
    fig = go.Figure()
    
    # Determine threshold for inside vs outside text (top 3 get inside text)
    max_val = top_10['impression_count'].max()
    threshold = max_val * 0.5  # Bars above 50% of max get text inside
    
    # Create text positions - inside for large bars, outside for small
    text_positions = ['inside' if val > threshold else 'outside' for val in top_10['impression_count']]
    text_colors = ['white' if val > threshold else TEXT_COLOR for val in top_10['impression_count']]
    
    fig.add_trace(go.Bar(
        y=top_10['label'],
        x=top_10['impression_count'],
        orientation='h',
        marker_color=[get_party_color(p) for p in top_10['party_norm']],
        text=[f"<b>{format_number(v)}</b>" for v in top_10['impression_count']],
        textposition=text_positions,
        textfont=dict(size=16),
        width=0.7,
        insidetextanchor='end',
    ))
    
    # Update text colors individually using update_traces
    fig.update_traces(textfont_color=text_colors)
    
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{subtitle} | {stand_text}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        xaxis=dict(
            title=dict(text=xaxis_title, font=dict(size=18, color=TEXT_COLOR)),
            gridcolor=GRID_COLOR,
            tickfont=dict(size=16, color=TEXT_COLOR),
            tickformat=',.0f',
        ),
        yaxis=dict(gridcolor=GRID_COLOR, tickfont=dict(size=16, color=TEXT_COLOR)),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=16),
        height=MOBILE_HEIGHT,
        margin=dict(l=220, r=40, t=180, b=120),
        showlegend=False,
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save
fig_de = create_top_tweets_chart('de')
fig_en = create_top_tweets_chart('en')

fig_de.write_image(GRAPHICS_DIR / "05_top_greenland_tweets_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "05_top_greenland_tweets_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 05_top_greenland_tweets_de.png")
print("Saved: 05_top_greenland_tweets_en.png")

# Show top tweets
df_greenland = df_topic[df_topic['mentions_greenland']]
print("\nTop 5 Greenland tweets by impressions:")
for _, row in df_greenland.nlargest(5, 'impression_count').iterrows():
    print(f"\n@{row['username']} ({row['party_norm']}) - {format_number(row['impression_count'])} views")
    print(f"  {row['text'][:100]}...")

fig_en.show()

## 6. Daily Engagement Rate Comparison

In [None]:
def create_daily_engagement_chart(lang='de'):
    """Compare daily engagement rates between Greenland topic and all tweets."""
    
    df_greenland = df_topic[df_topic['mentions_greenland']].copy()
    
    # Daily engagement rates
    greenland_daily = df_greenland.groupby('date')['engagement_rate'].mean().reset_index()
    all_daily = df_all.groupby('date')['engagement_rate'].mean().reset_index()
    
    # Filter to start from Jan 4 (when Greenland news started)
    start_filter = datetime(2026, 1, 4).date()
    greenland_daily = greenland_daily[greenland_daily['date'] >= start_filter]
    all_daily = all_daily[all_daily['date'] >= start_filter]
    
    if lang == 'de':
        title = 'Tägliche Engagement-Rate'
        subtitle = 'Grönland-Tweets vs. Alle Tweets (ab 04.01.)'
        greenland_label = 'Grönland'
        all_label = 'Alle Tweets'
    else:
        title = 'Daily Engagement Rate'
        subtitle = 'Greenland tweets vs All tweets (from Jan 4)'
        greenland_label = 'Greenland'
        all_label = 'All Tweets'
    
    fig = go.Figure()
    
    # All tweets baseline
    fig.add_trace(go.Scatter(
        x=all_daily['date'],
        y=all_daily['engagement_rate'],
        mode='lines',
        name=all_label,
        line=dict(color='#aaaaaa', width=2, dash='dash'),
        fill='tozeroy',
        fillcolor='rgba(170, 170, 170, 0.1)',
    ))
    
    # Greenland tweets
    if len(greenland_daily) > 0:
        fig.add_trace(go.Scatter(
            x=greenland_daily['date'],
            y=greenland_daily['engagement_rate'],
            mode='lines+markers',
            name=greenland_label,
            line=dict(color=TOPIC_COLORS['Greenland'], width=3),
            marker=dict(size=10),
        ))
    
    stand_text = STAND_TEXT_DE if lang == 'de' else STAND_TEXT_EN
    
    fig.update_layout(
        title=dict(
            text=f"<b>{title}</b><br><span style='font-size:0.55em; color:{SUBTITLE_COLOR};'>{subtitle} | {stand_text}</span>",
            x=0.5, xanchor='center',
            font=dict(size=32, color=TEXT_COLOR, family='Arial Black')
        ),
        xaxis=dict(
            title='',
            gridcolor=GRID_COLOR,
            tickformat='%d.%m' if lang == 'de' else '%b %d',
            tickfont=dict(size=18, color=TEXT_COLOR),
        ),
        yaxis=dict(
            title=dict(text='Engagement Rate (%)', font=dict(size=18, color=TEXT_COLOR)),
            gridcolor=GRID_COLOR,
            tickfont=dict(size=16, color=TEXT_COLOR),
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        paper_bgcolor=BG_COLOR,
        font=dict(color=TEXT_COLOR, size=16),
        height=MOBILE_HEIGHT,
        legend=dict(
            orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5,
            bgcolor='rgba(255,255,255,0.8)', font=dict(size=18, color=TEXT_COLOR)
        ),
        margin=dict(l=80, r=40, t=180, b=120),
    )
    
    fig = add_branding(fig, lang)
    return fig

# Create and save
fig_de = create_daily_engagement_chart('de')
fig_en = create_daily_engagement_chart('en')

fig_de.write_image(GRAPHICS_DIR / "06_daily_engagement_de.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)
fig_en.write_image(GRAPHICS_DIR / "06_daily_engagement_en.png", width=MOBILE_WIDTH, height=MOBILE_HEIGHT, scale=2)

print("Saved: 06_daily_engagement_de.png")
print("Saved: 06_daily_engagement_en.png")

fig_en.show()

## Summary

In [None]:
import os

print(f"\n{'='*80}")
print(f"GREENLAND CRISIS ANALYSIS - COMPLETE")
print(f"{'='*80}\n")

print(f"Analysis Period: {START_DATE_STR} to {END_DATE_STR}")
print(f"Chart Size: {MOBILE_WIDTH}x{MOBILE_HEIGHT}px (Instagram Portrait)\n")

df_greenland = df_topic[df_topic['mentions_greenland']]

print("KEY FINDINGS:")
print("-" * 40)
print(f"  Total tweets in period: {len(df_all):,}")
print(f"  Topic-related tweets: {len(df_topic):,}")
print(f"  Greenland mentions: {len(df_greenland):,}")
print(f"  Denmark mentions: {df_topic['mentions_denmark'].sum():,}")
print(f"  USA/Trump mentions: {df_topic['mentions_usa'].sum():,}")

if len(df_greenland) > 0:
    print(f"\nENGAGEMENT COMPARISON:")
    print(f"  Greenland avg engagement rate: {df_greenland['engagement_rate'].mean():.2f}%")
    print(f"  All tweets avg engagement rate: {df_all['engagement_rate'].mean():.2f}%")
    ratio = df_greenland['engagement_rate'].mean() / df_all['engagement_rate'].mean() if df_all['engagement_rate'].mean() > 0 else 0
    print(f"  Greenland tweets get {ratio:.1f}x more engagement!")

print(f"\n{'='*80}")
print(f"GENERATED CHARTS:")
print(f"{'='*80}")

png_files = sorted(GRAPHICS_DIR.glob("*.png"))
de_files = [f for f in png_files if '_de.png' in f.name]
en_files = [f for f in png_files if '_en.png' in f.name]

print(f"\nGerman versions ({len(de_files)} charts):")
for f in de_files:
    size = os.path.getsize(f) / 1024
    print(f"  {f.name} ({size:.0f} KB)")

print(f"\nEnglish versions ({len(en_files)} charts):")
for f in en_files:
    size = os.path.getsize(f) / 1024
    print(f"  {f.name} ({size:.0f} KB)")

print(f"\nTotal: {len(png_files)} charts ready for Instagram!")