# Top 10 Most-Liked Tweets

Visualize the most popular tweets from German politicians - both for a specific month and all-time.

In [None]:
# Import libraries
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
import yaml
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

# Database connection
import sys
sys.path.insert(0, str(Path.cwd().parent / 'src'))
from xminer.io.db import engine
from sqlalchemy import text

print('‚úÖ Libraries imported successfully')

## Configuration - Set Your Date Range Here

In [None]:
# ========== MANUAL OVERRIDE (Optional) ==========
# Leave as None to auto-calculate previous month, or set specific dates:
MANUAL_START_DATE = None  # e.g. "2025-12-01"
MANUAL_END_DATE = None    # e.g. "2026-01-04"
# =================================================

PARAMS_FILE = Path("../src/xminer/config/parameters.yml")

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 12))
YM = f"{YEAR:04d}{MONTH:02d}"

# Calculate date range
if MANUAL_START_DATE and MANUAL_END_DATE:
    # Use manual dates
    START_DATE = MANUAL_START_DATE
    END_DATE = MANUAL_END_DATE
    start_dt = datetime.strptime(START_DATE, '%Y-%m-%d')
    end_dt = datetime.strptime(END_DATE, '%Y-%m-%d')
    print(f"üìÖ Using manual date range: {START_DATE} to {END_DATE}")
else:
    # Auto-calculate previous month
    today = datetime.now()
    first_day_current_month = today.replace(day=1)
    last_day_previous_month = first_day_current_month - timedelta(days=1)
    first_day_previous_month = last_day_previous_month.replace(day=1)
    
    START_DATE = first_day_previous_month.strftime('%Y-%m-%d')
    END_DATE = last_day_previous_month.strftime('%Y-%m-%d')
    start_dt = first_day_previous_month
    end_dt = last_day_previous_month
    print(f"üìÖ Auto-calculated previous month: {START_DATE} to {END_DATE}")

STAND_TEXT = f"Zeitraum: {start_dt.strftime('%d. %B %Y')} - {end_dt.strftime('%d. %B %Y')}"
STAND_TEXT_EN = f"Period: {start_dt.strftime('%B %d, %Y')} - {end_dt.strftime('%B %d, %Y')}"

# Graphics directory
GRAPHICS_BASE_DIR = Path(params.get("graphics_base_dir", "../outputs"))
GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics" / "top_tweets"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Output: {GRAPHICS_DIR}")

In [None]:
# Party colors
PARTY_COLORS = {
    "CDU/CSU": "#000000",
    "SPD": "#E3000F",
    "GR√úNE": "#1AA64A",
    "B√úNDNIS 90/DIE GR√úNEN": "#1AA64A",
    "DIE LINKE.": "#BE3075",
    "FDP": "#FFED00",
    "AFD": "#009EE0",
    "BSW": "#009688",
}

def normalize_party(p: str) -> str:
    if p is None:
        return ""
    key = str(p).strip().upper()
    if key in {"CDU", "CSU"}:
        return "CDU/CSU"
    if key.startswith("GR√úN") or "GRUENE" in key or "B90" in key or "B√úNDNIS" in key or "B√ú90" in key:
        return "GR√úNE"
    if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
        return "DIE LINKE."
    return key

def get_party_color(party: str) -> str:
    normalized = normalize_party(party)
    return PARTY_COLORS.get(normalized, "#888888")

## Get Top 10 Most-Liked Tweets (Selected Period)

In [None]:
# Query top 10 most-liked tweets for selected period
query_period = f"""
SELECT 
    t.tweet_id,
    t.text,
    t.like_count,
    t.created_at,
    t.username,
    p.vorname,
    p.nachname,
    p.partei_kurz AS party
FROM public.tweets t
JOIN politicians_12_2025 p ON t.username = p.username
WHERE t.created_at >= '{START_DATE}'
  AND t.created_at <= '{END_DATE}'
  AND t.retrieved_at >= '2025-09-01'
ORDER BY t.like_count DESC
LIMIT 10
"""

print(f"Querying top 10 most-liked tweets for {START_DATE} to {END_DATE}...")
print("(Filtering: only tweets retrieved after 2025-09-01)")

with engine.connect() as conn:
    df_period = pd.read_sql(text(query_period), conn)

# Add full name and normalized party
df_period = df_period.assign(
    full_name=df_period['vorname'] + ' ' + df_period['nachname'],
    party_norm=df_period['party'].apply(normalize_party)
)
df_period = df_period.assign(
    party_color=df_period['party_norm'].apply(get_party_color),
    date_str=df_period['created_at'].dt.strftime('%d.%m.%Y'),
    text_short=df_period['text'].str[:60] + '...'
)

print(f"\n‚úÖ Found top 10 tweets with {df_period['like_count'].sum():,} total likes\n")
df_period[['full_name', 'party_norm', 'like_count', 'date_str', 'text_short']]

## Get Top 10 Most-Liked Tweets (All-Time)

In [None]:
# Query top 10 most-liked tweets of all time
query_alltime = """
SELECT 
    t.tweet_id,
    t.text,
    t.like_count,
    t.created_at,
    t.username,
    p.vorname,
    p.nachname,
    p.partei_kurz AS party
FROM public.tweets t
JOIN politicians_12_2025 p ON t.username = p.username
ORDER BY t.like_count DESC
LIMIT 10
"""

print("Querying top 10 most-liked tweets (all-time)...")

with engine.connect() as conn:
    df_alltime = pd.read_sql(text(query_alltime), conn)

# Add full name and normalized party
df_alltime = df_alltime.assign(
    full_name=df_alltime['vorname'] + ' ' + df_alltime['nachname'],
    party_norm=df_alltime['party'].apply(normalize_party)
)
df_alltime = df_alltime.assign(
    party_color=df_alltime['party_norm'].apply(get_party_color),
    date_str=df_alltime['created_at'].dt.strftime('%d.%m.%Y'),
    text_short=df_alltime['text'].str[:60] + '...'
)

# Get date range from data
min_date = df_alltime['created_at'].min()
max_date = df_alltime['created_at'].max()
ALLTIME_TEXT_DE = f"Zeitraum: {min_date.strftime('%d.%m.%Y')} - {max_date.strftime('%d.%m.%Y')}"
ALLTIME_TEXT_EN = f"Period: {min_date.strftime('%m/%d/%Y')} - {max_date.strftime('%m/%d/%Y')}"

print(f"\n‚úÖ Found top 10 all-time tweets with {df_alltime['like_count'].sum():,} total likes")
print(f"Date range: {min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}\n")
df_alltime[['full_name', 'party_norm', 'like_count', 'date_str', 'text_short']]

## Get Top 5 Most-Liked Tweets Per Party (Selected Period)

In [ ]:
# Function to create plot
def create_plot(df, timeframe='period', language='de', show_tweets=False):
    # Sort by likes (ascending for horizontal bars)
    df_plot = df.sort_values('like_count', ascending=True)
    
    if show_tweets:
        # Truncate tweet text for display on chart, add author name
        max_len = 60  # Reduced to make room for author
        labels = [
            f"{row['full_name']}: {row['text'][:max_len]}{'...' if len(row['text']) > max_len else ''}"
            for _, row in df_plot.iterrows()
        ]
    else:
        # Create labels with politician name and party
        labels = [f"{row['full_name']} ({row['party_norm']})" for _, row in df_plot.iterrows()]
    
    # Create hover text
    if language == 'de':
        hover_texts = [
            f"<b>{row['full_name']}</b> ({row['party_norm']})<br>"
            f"Datum: {row['date_str']}<br>"
            f"Likes: {row['like_count']:,}<br><br>"
            f"<i>{row['text'][:200]}...</i>"
            for _, row in df_plot.iterrows()
        ]
    else:
        hover_texts = [
            f"<b>{row['full_name']}</b> ({row['party_norm']})<br>"
            f"Date: {row['created_at'].strftime('%m/%d/%Y')}<br>"
            f"Likes: {row['like_count']:,}<br><br>"
            f"<i>{row['text'][:200]}...</i>"
            for _, row in df_plot.iterrows()
        ]
    
    # Set titles based on language and timeframe
    if language == 'de':
        if timeframe == 'period':
            title_text = f"<b>Top 10 Meist-gelikte Tweets</b><br><sub style='font-size:0.7em;'>{STAND_TEXT}</sub>"
        else:
            title_text = f"<b>Top 10 Meist-gelikte Tweets (Alle Zeiten)</b><br><sub style='font-size:0.7em;'>{ALLTIME_TEXT_DE}</sub>"
        xaxis_title = "<b>Anzahl Likes</b>"
    else:  # English
        if timeframe == 'period':
            title_text = f"<b>Top 10 Most-Liked Tweets</b><br><sub style='font-size:0.7em;'>{STAND_TEXT_EN}</sub>"
        else:
            title_text = f"<b>Top 10 Most-Liked Tweets (All-Time)</b><br><sub style='font-size:0.7em;'>{ALLTIME_TEXT_EN}</sub>"
        xaxis_title = "<b>Number of Likes</b>"
    
    # Create figure
    fig = go.Figure()
    
    # For tweet text versions, add like count inside bars
    if show_tweets:
        fig.add_trace(go.Bar(
            y=labels,
            x=df_plot['like_count'],
            orientation='h',
            marker_color=df_plot['party_color'].tolist(),
            text=[f"<b>{v:,} Likes</b>" for v in df_plot['like_count']],
            textposition='inside',
            insidetextanchor='end',
            textfont=dict(color='white', size=16),
            customdata=hover_texts,
            hovertemplate="%{customdata}<extra></extra>"
        ))
    else:
        # No text on bars for politician name version
        fig.add_trace(go.Bar(
            y=labels,
            x=df_plot['like_count'],
            orientation='h',
            marker_color=df_plot['party_color'].tolist(),
            customdata=hover_texts,
            hovertemplate="%{customdata}<extra></extra>"
        ))
    
    # Calculate height based on display mode - Instagram portrait format (1080x1350)
    if show_tweets:
        # More space needed for tweet text
        chart_height = 1350  # Instagram portrait height
        left_margin = 600  # Adjusted for 1080px width
        y_font_size = 12  # Slightly smaller for Instagram
    else:
        chart_height = 1350  # Instagram portrait height
        left_margin = 200
        y_font_size = 14
    
    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor='center',
            font=dict(size=28, color='white')  # Adjusted for Instagram
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=14, family='Arial'),
        height=chart_height,
        showlegend=False,
        margin=dict(b=60, t=120, l=left_margin, r=100),
        xaxis=dict(
            title=xaxis_title,
            gridcolor='#333333',
            title_font=dict(size=18),
            tickfont=dict(size=14)
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=y_font_size)
        )
    )
    
    return fig

# Create period visualizations (with politician names)
print("="*60)
print("CREATING PERIOD VISUALIZATIONS (Politicians)")
print("="*60)

fig_period_de = create_plot(df_period, 'period', 'de', show_tweets=False)
output_period_de = GRAPHICS_DIR / "top_10_liked_tweets_period_de.png"
fig_period_de.write_image(output_period_de, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_period_de}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

fig_period_en = create_plot(df_period, 'period', 'en', show_tweets=False)
output_period_en = GRAPHICS_DIR / "top_10_liked_tweets_period_en.png"
fig_period_en.write_image(output_period_en, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_period_en}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

# Create all-time visualizations (with politician names)
print("\n" + "="*60)
print("CREATING ALL-TIME VISUALIZATIONS (Politicians)")
print("="*60)

fig_alltime_de = create_plot(df_alltime, 'alltime', 'de', show_tweets=False)
output_alltime_de = GRAPHICS_DIR / "top_10_liked_tweets_alltime_de.png"
fig_alltime_de.write_image(output_alltime_de, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_alltime_de}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

fig_alltime_en = create_plot(df_alltime, 'alltime', 'en', show_tweets=False)
output_alltime_en = GRAPHICS_DIR / "top_10_liked_tweets_alltime_en.png"
fig_alltime_en.write_image(output_alltime_en, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_alltime_en}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

# Create period visualizations WITH TWEET TEXT
print("\n" + "="*60)
print("CREATING PERIOD VISUALIZATIONS (Tweet Text)")
print("="*60)

fig_period_tweets_de = create_plot(df_period, 'period', 'de', show_tweets=True)
output_period_tweets_de = GRAPHICS_DIR / "top_10_liked_tweets_period_with_text_de.png"
fig_period_tweets_de.write_image(output_period_tweets_de, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_period_tweets_de}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

fig_period_tweets_en = create_plot(df_period, 'period', 'en', show_tweets=True)
output_period_tweets_en = GRAPHICS_DIR / "top_10_liked_tweets_period_with_text_en.png"
fig_period_tweets_en.write_image(output_period_tweets_en, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_period_tweets_en}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

# Create all-time visualizations WITH TWEET TEXT
print("\n" + "="*60)
print("CREATING ALL-TIME VISUALIZATIONS (Tweet Text)")
print("="*60)

fig_alltime_tweets_de = create_plot(df_alltime, 'alltime', 'de', show_tweets=True)
output_alltime_tweets_de = GRAPHICS_DIR / "top_10_liked_tweets_alltime_with_text_de.png"
fig_alltime_tweets_de.write_image(output_alltime_tweets_de, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_alltime_tweets_de}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

fig_alltime_tweets_en = create_plot(df_alltime, 'alltime', 'en', show_tweets=True)
output_alltime_tweets_en = GRAPHICS_DIR / "top_10_liked_tweets_alltime_with_text_en.png"
fig_alltime_tweets_en.write_image(output_alltime_tweets_en, width=1080, height=1350, scale=2)
print(f"‚úÖ Saved: {output_alltime_tweets_en}")
print(f"   Dimensions: 1080x1350px (Instagram portrait)")

print("\n‚úÖ All visualizations created successfully!")
print("\nüìä 8 files created:")
print("   - 4 with politician names (1080x1350px)")
print("   - 4 with tweet text (1080x1350px)")
print("   - All in Instagram portrait format")

# Display period German version
fig_period_de.show()

## Create All Visualizations

In [None]:
# Function to create plot
def create_plot(df, timeframe='period', language='de', show_tweets=False):
    # Sort by likes (ascending for horizontal bars)
    df_plot = df.sort_values('like_count', ascending=True)
    
    if show_tweets:
        # Truncate tweet text for display on chart, add author name
        max_len = 60  # Reduced to make room for author
        labels = [
            f"{row['full_name']}: {row['text'][:max_len]}{'...' if len(row['text']) > max_len else ''}"
            for _, row in df_plot.iterrows()
        ]
    else:
        # Create labels with politician name and party
        labels = [f"{row['full_name']} ({row['party_norm']})" for _, row in df_plot.iterrows()]
    
    # Create hover text
    if language == 'de':
        hover_texts = [
            f"<b>{row['full_name']}</b> ({row['party_norm']})<br>"
            f"Datum: {row['date_str']}<br>"
            f"Likes: {row['like_count']:,}<br><br>"
            f"<i>{row['text'][:200]}...</i>"
            for _, row in df_plot.iterrows()
        ]
    else:
        hover_texts = [
            f"<b>{row['full_name']}</b> ({row['party_norm']})<br>"
            f"Date: {row['created_at'].strftime('%m/%d/%Y')}<br>"
            f"Likes: {row['like_count']:,}<br><br>"
            f"<i>{row['text'][:200]}...</i>"
            for _, row in df_plot.iterrows()
        ]
    
    # Set titles based on language and timeframe
    if language == 'de':
        if timeframe == 'period':
            title_text = f"<b>Top 10 Meist-gelikte Tweets</b><br><sub style='font-size:0.7em;'>{STAND_TEXT}</sub>"
        else:
            title_text = f"<b>Top 10 Meist-gelikte Tweets (Alle Zeiten)</b><br><sub style='font-size:0.7em;'>{ALLTIME_TEXT_DE}</sub>"
        xaxis_title = "<b>Anzahl Likes</b>"
    else:  # English
        if timeframe == 'period':
            title_text = f"<b>Top 10 Most-Liked Tweets</b><br><sub style='font-size:0.7em;'>{STAND_TEXT_EN}</sub>"
        else:
            title_text = f"<b>Top 10 Most-Liked Tweets (All-Time)</b><br><sub style='font-size:0.7em;'>{ALLTIME_TEXT_EN}</sub>"
        xaxis_title = "<b>Number of Likes</b>"
    
    # Create figure
    fig = go.Figure()
    
    # For tweet text versions, add like count inside bars
    if show_tweets:
        fig.add_trace(go.Bar(
            y=labels,
            x=df_plot['like_count'],
            orientation='h',
            marker_color=df_plot['party_color'].tolist(),
            text=[f"<b>{v:,} Likes</b>" for v in df_plot['like_count']],
            textposition='inside',
            insidetextanchor='end',
            textfont=dict(color='white', size=16),
            customdata=hover_texts,
            hovertemplate="%{customdata}<extra></extra>"
        ))
    else:
        # No text on bars for politician name version
        fig.add_trace(go.Bar(
            y=labels,
            x=df_plot['like_count'],
            orientation='h',
            marker_color=df_plot['party_color'].tolist(),
            customdata=hover_texts,
            hovertemplate="%{customdata}<extra></extra>"
        ))
    
    # Calculate height based on display mode
    if show_tweets:
        # More space needed for tweet text
        chart_height = max(1200, 140 * len(df_plot))  # Increased from 120 to 140 for better spacing
        left_margin = 800  # Increased from 700 to accommodate author names
        y_font_size = 13  # Font size for tweet text on Y-axis
    else:
        chart_height = max(800, 80 * len(df_plot))
        left_margin = 250
        y_font_size = 16  # Font size for politician names on Y-axis
    
    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor='center',
            font=dict(size=32, color='white')
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=16, family='Arial'),
        height=chart_height,
        showlegend=False,
        margin=dict(b=80, t=140, l=left_margin, r=120),
        xaxis=dict(
            title=xaxis_title,
            gridcolor='#333333',
            title_font=dict(size=20),
            tickfont=dict(size=16)
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=y_font_size)
        )
    )
    
    return fig

# Create period visualizations (with politician names)
print("="*60)
print("CREATING PERIOD VISUALIZATIONS (Politicians)")
print("="*60)

fig_period_de = create_plot(df_period, 'period', 'de', show_tweets=False)
output_period_de = GRAPHICS_DIR / "top_10_liked_tweets_period_de.png"
fig_period_de.write_image(output_period_de, width=1080, height=fig_period_de.layout.height, scale=2)
print(f"‚úÖ Saved: {output_period_de}")
print(f"   Dimensions: 1080x{fig_period_de.layout.height}px")

fig_period_en = create_plot(df_period, 'period', 'en', show_tweets=False)
output_period_en = GRAPHICS_DIR / "top_10_liked_tweets_period_en.png"
fig_period_en.write_image(output_period_en, width=1080, height=fig_period_en.layout.height, scale=2)
print(f"‚úÖ Saved: {output_period_en}")
print(f"   Dimensions: 1080x{fig_period_en.layout.height}px")

# Create all-time visualizations (with politician names)
print("\n" + "="*60)
print("CREATING ALL-TIME VISUALIZATIONS (Politicians)")
print("="*60)

fig_alltime_de = create_plot(df_alltime, 'alltime', 'de', show_tweets=False)
output_alltime_de = GRAPHICS_DIR / "top_10_liked_tweets_alltime_de.png"
fig_alltime_de.write_image(output_alltime_de, width=1080, height=fig_alltime_de.layout.height, scale=2)
print(f"‚úÖ Saved: {output_alltime_de}")
print(f"   Dimensions: 1080x{fig_alltime_de.layout.height}px")

fig_alltime_en = create_plot(df_alltime, 'alltime', 'en', show_tweets=False)
output_alltime_en = GRAPHICS_DIR / "top_10_liked_tweets_alltime_en.png"
fig_alltime_en.write_image(output_alltime_en, width=1080, height=fig_alltime_en.layout.height, scale=2)
print(f"‚úÖ Saved: {output_alltime_en}")
print(f"   Dimensions: 1080x{fig_alltime_en.layout.height}px")

# Create period visualizations WITH TWEET TEXT
print("\n" + "="*60)
print("CREATING PERIOD VISUALIZATIONS (Tweet Text)")
print("="*60)

fig_period_tweets_de = create_plot(df_period, 'period', 'de', show_tweets=True)
output_period_tweets_de = GRAPHICS_DIR / "top_10_liked_tweets_period_with_text_de.png"
fig_period_tweets_de.write_image(output_period_tweets_de, width=1400, height=fig_period_tweets_de.layout.height, scale=2)
print(f"‚úÖ Saved: {output_period_tweets_de}")
print(f"   Dimensions: 1400x{fig_period_tweets_de.layout.height}px")

fig_period_tweets_en = create_plot(df_period, 'period', 'en', show_tweets=True)
output_period_tweets_en = GRAPHICS_DIR / "top_10_liked_tweets_period_with_text_en.png"
fig_period_tweets_en.write_image(output_period_tweets_en, width=1400, height=fig_period_tweets_en.layout.height, scale=2)
print(f"‚úÖ Saved: {output_period_tweets_en}")
print(f"   Dimensions: 1400x{fig_period_tweets_en.layout.height}px")

# Create all-time visualizations WITH TWEET TEXT
print("\n" + "="*60)
print("CREATING ALL-TIME VISUALIZATIONS (Tweet Text)")
print("="*60)

fig_alltime_tweets_de = create_plot(df_alltime, 'alltime', 'de', show_tweets=True)
output_alltime_tweets_de = GRAPHICS_DIR / "top_10_liked_tweets_alltime_with_text_de.png"
fig_alltime_tweets_de.write_image(output_alltime_tweets_de, width=1400, height=fig_alltime_tweets_de.layout.height, scale=2)
print(f"‚úÖ Saved: {output_alltime_tweets_de}")
print(f"   Dimensions: 1400x{fig_alltime_tweets_de.layout.height}px")

fig_alltime_tweets_en = create_plot(df_alltime, 'alltime', 'en', show_tweets=True)
output_alltime_tweets_en = GRAPHICS_DIR / "top_10_liked_tweets_alltime_with_text_en.png"
fig_alltime_tweets_en.write_image(output_alltime_tweets_en, width=1400, height=fig_alltime_tweets_en.layout.height, scale=2)
print(f"‚úÖ Saved: {output_alltime_tweets_en}")
print(f"   Dimensions: 1400x{fig_alltime_tweets_en.layout.height}px")

print("\n‚úÖ All visualizations created successfully!")
print("\nüìä 8 files created:")
print("   - 4 with politician names (1080px wide)")
print("   - 4 with tweet text (1400px wide)")

# Display period German version
fig_period_de.show()

In [None]:
print("=" * 80)
print(f"PERIOD TOP 10 ({START_DATE} to {END_DATE})")
print("=" * 80)
print(f"Total likes: {df_period['like_count'].sum():,}")
print(f"Average likes: {df_period['like_count'].mean():,.0f}")
print()
print("Top Tweet:")
top_p = df_period.iloc[0]
print(f"  {top_p['full_name']} ({top_p['party_norm']})")
print(f"  {top_p['like_count']:,} likes on {top_p['date_str']}")
print(f"  '{top_p['text'][:100]}...'")
print()
print("By Party:")
for party, count in df_period['party_norm'].value_counts().items():
    print(f"  {party}: {count} tweet(s)")

print("\n" + "=" * 80)
print("ALL-TIME TOP 10")
print("=" * 80)
print(f"Date range: {min_date.strftime('%d.%m.%Y')} - {max_date.strftime('%d.%m.%Y')}")
print(f"Total likes: {df_alltime['like_count'].sum():,}")
print(f"Average likes: {df_alltime['like_count'].mean():,.0f}")
print()
print("Top Tweet:")
top_a = df_alltime.iloc[0]
print(f"  {top_a['full_name']} ({top_a['party_norm']})")
print(f"  {top_a['like_count']:,} likes on {top_a['date_str']}")
print(f"  '{top_a['text'][:100]}...'")
print()
print("By Party:")
for party, count in df_alltime['party_norm'].value_counts().items():
    print(f"  {party}: {count} tweet(s)")

In [None]:
# Query top 5 most-liked tweets per party for selected period
query_period_per_party = f"""
WITH ranked_tweets AS (
    SELECT 
        t.tweet_id,
        t.text,
        t.like_count,
        t.created_at,
        t.username,
        p.vorname,
        p.nachname,
        p.partei_kurz AS party,
        ROW_NUMBER() OVER (PARTITION BY p.partei_kurz ORDER BY t.like_count DESC) as rank
    FROM public.tweets t
    JOIN politicians_12_2025 p ON t.username = p.username
    WHERE t.created_at >= '{START_DATE}'
      AND t.created_at <= '{END_DATE}'
      AND t.retrieved_at >= '2025-09-01'
)
SELECT 
    tweet_id,
    text,
    like_count,
    created_at,
    username,
    vorname,
    nachname,
    party
FROM ranked_tweets
WHERE rank <= 5
ORDER BY party, like_count DESC
"""

print(f"Querying top 5 most-liked tweets per party for {START_DATE} to {END_DATE}...")
print("(Filtering: only tweets retrieved after 2025-09-01)")

with engine.connect() as conn:
    df_period_per_party = pd.read_sql(text(query_period_per_party), conn)

# Add full name and normalized party
df_period_per_party = df_period_per_party.assign(
    full_name=df_period_per_party['vorname'] + ' ' + df_period_per_party['nachname'],
    party_norm=df_period_per_party['party'].apply(normalize_party)
)
df_period_per_party = df_period_per_party.assign(
    party_color=df_period_per_party['party_norm'].apply(get_party_color),
    date_str=df_period_per_party['created_at'].dt.strftime('%d.%m.%Y'),
    text_short=df_period_per_party['text'].str[:60] + '...'
)

print(f"\n‚úÖ Found {len(df_period_per_party)} tweets across {df_period_per_party['party_norm'].nunique()} parties")
print(f"Total likes: {df_period_per_party['like_count'].sum():,}\n")

# Show summary by party
print("Tweets per party:")
for party, group in df_period_per_party.groupby('party_norm'):
    print(f"  {party}: {len(group)} tweets, {group['like_count'].sum():,} total likes")

df_period_per_party[['party_norm', 'full_name', 'like_count', 'date_str', 'text_short']].head(15)

## Get Top 5 Most-Liked Tweets Per Party (All-Time)

In [None]:
# Query top 5 most-liked tweets per party (all-time)
query_alltime_per_party = """
WITH ranked_tweets AS (
    SELECT 
        t.tweet_id,
        t.text,
        t.like_count,
        t.created_at,
        t.username,
        p.vorname,
        p.nachname,
        p.partei_kurz AS party,
        ROW_NUMBER() OVER (PARTITION BY p.partei_kurz ORDER BY t.like_count DESC) as rank
    FROM public.tweets t
    JOIN politicians_12_2025 p ON t.username = p.username
    WHERE t.retrieved_at >= '2025-09-01'
)
SELECT 
    tweet_id,
    text,
    like_count,
    created_at,
    username,
    vorname,
    nachname,
    party
FROM ranked_tweets
WHERE rank <= 5
ORDER BY party, like_count DESC
"""

print("Querying top 5 most-liked tweets per party (all-time)...")
print("(Filtering: only tweets retrieved after 2025-09-01)")

with engine.connect() as conn:
    df_alltime_per_party = pd.read_sql(text(query_alltime_per_party), conn)

# Add full name and normalized party
df_alltime_per_party = df_alltime_per_party.assign(
    full_name=df_alltime_per_party['vorname'] + ' ' + df_alltime_per_party['nachname'],
    party_norm=df_alltime_per_party['party'].apply(normalize_party)
)
df_alltime_per_party = df_alltime_per_party.assign(
    party_color=df_alltime_per_party['party_norm'].apply(get_party_color),
    date_str=df_alltime_per_party['created_at'].dt.strftime('%d.%m.%Y'),
    text_short=df_alltime_per_party['text'].str[:60] + '...'
)

# Get date range from data
min_date_pp = df_alltime_per_party['created_at'].min()
max_date_pp = df_alltime_per_party['created_at'].max()
ALLTIME_PP_TEXT_DE = f"Zeitraum: {min_date_pp.strftime('%d.%m.%Y')} - {max_date_pp.strftime('%d.%m.%Y')}"
ALLTIME_PP_TEXT_EN = f"Period: {min_date_pp.strftime('%m/%d/%Y')} - {max_date_pp.strftime('%m/%d/%Y')}"

print(f"\n‚úÖ Found {len(df_alltime_per_party)} tweets across {df_alltime_per_party['party_norm'].nunique()} parties")
print(f"Date range: {min_date_pp.strftime('%Y-%m-%d')} to {max_date_pp.strftime('%Y-%m-%d')}")
print(f"Total likes: {df_alltime_per_party['like_count'].sum():,}\n")

# Show summary by party
print("Tweets per party:")
for party, group in df_alltime_per_party.groupby('party_norm'):
    print(f"  {party}: {len(group)} tweets, {group['like_count'].sum():,} total likes")

df_alltime_per_party[['party_norm', 'full_name', 'like_count', 'date_str', 'text_short']].head(15)

In [None]:
# Function to create per-party plots
def create_party_plot(df, party_name, timeframe='period', language='de', show_tweets=False):
    # Filter for specific party and sort by likes
    df_party = df[df['party_norm'] == party_name].copy()
    df_plot = df_party.sort_values('like_count', ascending=True)
    
    if len(df_plot) == 0:
        print(f"‚ö†Ô∏è  No tweets found for party: {party_name}")
        return None
    
    if show_tweets:
        # Truncate tweet text for display on chart, add author name
        max_len = 60
        labels = [
            f"{row['full_name']}: {row['text'][:max_len]}{'...' if len(row['text']) > max_len else ''}"
            for _, row in df_plot.iterrows()
        ]
    else:
        # Create labels with politician name and party
        labels = [f"{row['full_name']} ({row['party_norm']})" for _, row in df_plot.iterrows()]
    
    # Create hover text
    if language == 'de':
        hover_texts = [
            f"<b>{row['full_name']}</b> ({row['party_norm']})<br>"
            f"Datum: {row['date_str']}<br>"
            f"Likes: {row['like_count']:,}<br><br>"
            f"<i>{row['text'][:200]}...</i>"
            for _, row in df_plot.iterrows()
        ]
    else:
        hover_texts = [
            f"<b>{row['full_name']}</b> ({row['party_norm']})<br>"
            f"Date: {row['created_at'].strftime('%m/%d/%Y')}<br>"
            f"Likes: {row['like_count']:,}<br><br>"
            f"<i>{row['text'][:200]}...</i>"
            for _, row in df_plot.iterrows()
        ]
    
    # Set titles based on language and timeframe
    if language == 'de':
        if timeframe == 'period':
            title_text = f"<b>Top 5 Meist-gelikte Tweets - {party_name}</b><br><sub style='font-size:0.7em;'>{STAND_TEXT}</sub>"
        else:
            title_text = f"<b>Top 5 Meist-gelikte Tweets - {party_name} (Alle Zeiten)</b><br><sub style='font-size:0.7em;'>{ALLTIME_PP_TEXT_DE}</sub>"
        xaxis_title = "<b>Anzahl Likes</b>"
    else:  # English
        if timeframe == 'period':
            title_text = f"<b>Top 5 Most-Liked Tweets - {party_name}</b><br><sub style='font-size:0.7em;'>{STAND_TEXT_EN}</sub>"
        else:
            title_text = f"<b>Top 5 Most-Liked Tweets - {party_name} (All-Time)</b><br><sub style='font-size:0.7em;'>{ALLTIME_PP_TEXT_EN}</sub>"
        xaxis_title = "<b>Number of Likes</b>"
    
    # Create figure
    fig = go.Figure()
    
    # Get party color
    party_color = get_party_color(party_name)
    
    # For tweet text versions, add like count inside bars
    if show_tweets:
        fig.add_trace(go.Bar(
            y=labels,
            x=df_plot['like_count'],
            orientation='h',
            marker_color=party_color,
            text=[f"<b>{v:,} Likes</b>" for v in df_plot['like_count']],
            textposition='inside',
            insidetextanchor='end',
            textfont=dict(color='white', size=16),
            customdata=hover_texts,
            hovertemplate="%{customdata}<extra></extra>"
        ))
    else:
        # No text on bars for politician name version
        fig.add_trace(go.Bar(
            y=labels,
            x=df_plot['like_count'],
            orientation='h',
            marker_color=party_color,
            customdata=hover_texts,
            hovertemplate="%{customdata}<extra></extra>"
        ))
    
    # Instagram square format (1080x1080) for top 5
    if show_tweets:
        chart_height = 1080  # Instagram square
        left_margin = 600
        y_font_size = 12
    else:
        chart_height = 1080  # Instagram square
        left_margin = 200
        y_font_size = 14
    
    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor='center',
            font=dict(size=26, color='white')
        ),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white', size=14, family='Arial'),
        height=chart_height,
        showlegend=False,
        margin=dict(b=60, t=120, l=left_margin, r=100),
        xaxis=dict(
            title=xaxis_title,
            gridcolor='#333333',
            title_font=dict(size=18),
            tickfont=dict(size=14)
        ),
        yaxis=dict(
            gridcolor='#333333',
            tickfont=dict(size=y_font_size)
        )
    )
    
    return fig

print("‚úÖ Per-party plot function defined")

In [None]:
# Create visualizations for each party - PERIOD
print("="*60)
print("CREATING PER-PARTY VISUALIZATIONS (Period)")
print("="*60)

parties = df_period_per_party['party_norm'].unique()

for party in sorted(parties):
    print(f"\n--- {party} ---")
    
    # Politician names version (German)
    fig = create_party_plot(df_period_per_party, party, 'period', 'de', show_tweets=False)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_period_de.png"
        fig.write_image(output_path, width=1080, height=1080, scale=2)
        print(f"‚úÖ Saved: {output_path} (1080x1080px - Instagram square)")
    
    # Politician names version (English)
    fig = create_party_plot(df_period_per_party, party, 'period', 'en', show_tweets=False)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_period_en.png"
        fig.write_image(output_path, width=1080, height=1080, scale=2)
        print(f"‚úÖ Saved: {output_path} (1080x1080px - Instagram square)")
    
    # Tweet text version (German)
    fig = create_party_plot(df_period_per_party, party, 'period', 'de', show_tweets=True)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_period_with_text_de.png"
        fig.write_image(output_path, width=1080, height=1080, scale=2)
        print(f"‚úÖ Saved: {output_path} (1080x1080px - Instagram square)")
    
    # Tweet text version (English)
    fig = create_party_plot(df_period_per_party, party, 'period', 'en', show_tweets=True)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_period_with_text_en.png"
        fig.write_image(output_path, width=1080, height=1080, scale=2)
        print(f"‚úÖ Saved: {output_path} (1080x1080px - Instagram square)")

print(f"\n‚úÖ Created {len(parties) * 4} per-party period visualizations (Instagram square format)")

In [None]:
# Create visualizations for each party - ALL-TIME
print("="*60)
print("CREATING PER-PARTY VISUALIZATIONS (All-Time)")
print("="*60)

parties_alltime = df_alltime_per_party['party_norm'].unique()

for party in sorted(parties_alltime):
    print(f"\n--- {party} ---")
    
    # Politician names version (German)
    fig = create_party_plot(df_alltime_per_party, party, 'alltime', 'de', show_tweets=False)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_alltime_de.png"
        fig.write_image(output_path, width=1080, height=fig.layout.height, scale=2)
        print(f"‚úÖ Saved: {output_path}")
    
    # Politician names version (English)
    fig = create_party_plot(df_alltime_per_party, party, 'alltime', 'en', show_tweets=False)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_alltime_en.png"
        fig.write_image(output_path, width=1080, height=fig.layout.height, scale=2)
        print(f"‚úÖ Saved: {output_path}")
    
    # Tweet text version (German)
    fig = create_party_plot(df_alltime_per_party, party, 'alltime', 'de', show_tweets=True)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_alltime_with_text_de.png"
        fig.write_image(output_path, width=1400, height=fig.layout.height, scale=2)
        print(f"‚úÖ Saved: {output_path}")
    
    # Tweet text version (English)
    fig = create_party_plot(df_alltime_per_party, party, 'alltime', 'en', show_tweets=True)
    if fig:
        safe_party = party.replace('/', '_').replace(' ', '_')
        output_path = GRAPHICS_DIR / f"top_5_{safe_party}_alltime_with_text_en.png"
        fig.write_image(output_path, width=1400, height=fig.layout.height, scale=2)
        print(f"‚úÖ Saved: {output_path}")

print(f"\n‚úÖ Created {len(parties_alltime) * 4} per-party all-time visualizations")

In [None]:
# Display tweet links for easy screenshotting and create Excel file
import openpyxl
from openpyxl.styles import Font, Alignment, PatternFill
from openpyxl.utils.dataframe import dataframe_to_rows

# Prepare data for Excel
excel_data = []

print("=" * 80)
print("OVERALL TOP 10 - PERIOD TWEET LINKS (for screenshots)")
print("=" * 80)
for idx, row in df_period.sort_values('like_count', ascending=False).iterrows():
    tweet_url = f"https://twitter.com/{row['username']}/status/{row['tweet_id']}"
    print(f"\n{row['like_count']:,} likes | {row['full_name']} (@{row['username']}) | {row['party_norm']}")
    print(f"üì± {tweet_url}")
    print(f"   \"{row['text'][:100]}...\"")
    
    excel_data.append({
        'Category': 'Overall Top 10 - Period',
        'Party': row['party_norm'],
        'Name': row['full_name'],
        'Username': row['username'],
        'Likes': row['like_count'],
        'Date': row['date_str'],
        'Tweet URL': tweet_url,
        'Tweet Text': row['text']
    })

print("\n\n" + "=" * 80)
print("OVERALL TOP 10 - ALL-TIME TWEET LINKS (for screenshots)")
print("=" * 80)
for idx, row in df_alltime.sort_values('like_count', ascending=False).iterrows():
    tweet_url = f"https://twitter.com/{row['username']}/status/{row['tweet_id']}"
    print(f"\n{row['like_count']:,} likes | {row['full_name']} (@{row['username']}) | {row['party_norm']}")
    print(f"üì± {tweet_url}")
    print(f"   \"{row['text'][:100]}...\"")
    
    excel_data.append({
        'Category': 'Overall Top 10 - All-Time',
        'Party': row['party_norm'],
        'Name': row['full_name'],
        'Username': row['username'],
        'Likes': row['like_count'],
        'Date': row['date_str'],
        'Tweet URL': tweet_url,
        'Tweet Text': row['text']
    })

print("\n\n" + "=" * 80)
print("PER-PARTY TOP 5 - PERIOD TWEET LINKS (for screenshots)")
print("=" * 80)
for party in sorted(df_period_per_party['party_norm'].unique()):
    print(f"\n{'‚îÄ' * 80}")
    print(f"üèõÔ∏è  {party}")
    print('‚îÄ' * 80)
    party_tweets = df_period_per_party[df_period_per_party['party_norm'] == party].sort_values('like_count', ascending=False)
    for idx, row in party_tweets.iterrows():
        tweet_url = f"https://twitter.com/{row['username']}/status/{row['tweet_id']}"
        print(f"\n{row['like_count']:,} likes | {row['full_name']} (@{row['username']})")
        print(f"üì± {tweet_url}")
        print(f"   \"{row['text'][:100]}...\"")
        
        excel_data.append({
            'Category': f'Per-Party Top 5 - Period - {party}',
            'Party': row['party_norm'],
            'Name': row['full_name'],
            'Username': row['username'],
            'Likes': row['like_count'],
            'Date': row['date_str'],
            'Tweet URL': tweet_url,
            'Tweet Text': row['text']
        })

print("\n\n" + "=" * 80)
print("PER-PARTY TOP 5 - ALL-TIME TWEET LINKS (for screenshots)")
print("=" * 80)
for party in sorted(df_alltime_per_party['party_norm'].unique()):
    print(f"\n{'‚îÄ' * 80}")
    print(f"üèõÔ∏è  {party}")
    print('‚îÄ' * 80)
    party_tweets = df_alltime_per_party[df_alltime_per_party['party_norm'] == party].sort_values('like_count', ascending=False)
    for idx, row in party_tweets.iterrows():
        tweet_url = f"https://twitter.com/{row['username']}/status/{row['tweet_id']}"
        print(f"\n{row['like_count']:,} likes | {row['full_name']} (@{row['username']})")
        print(f"üì± {tweet_url}")
        print(f"   \"{row['text'][:100]}...\"")
        
        excel_data.append({
            'Category': f'Per-Party Top 5 - All-Time - {party}',
            'Party': row['party_norm'],
            'Name': row['full_name'],
            'Username': row['username'],
            'Likes': row['like_count'],
            'Date': row['date_str'],
            'Tweet URL': tweet_url,
            'Tweet Text': row['text']
        })

# Create Excel file
df_excel = pd.DataFrame(excel_data)

# Create Excel writer
excel_path = GRAPHICS_DIR / "top_liked_tweets_links.xlsx"
with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
    df_excel.to_excel(writer, sheet_name='Tweet Links', index=False)
    
    # Get the worksheet
    worksheet = writer.sheets['Tweet Links']
    
    # Set column widths
    worksheet.column_dimensions['A'].width = 40  # Category
    worksheet.column_dimensions['B'].width = 15  # Party
    worksheet.column_dimensions['C'].width = 25  # Name
    worksheet.column_dimensions['D'].width = 20  # Username
    worksheet.column_dimensions['E'].width = 12  # Likes
    worksheet.column_dimensions['F'].width = 12  # Date
    worksheet.column_dimensions['G'].width = 60  # Tweet URL
    worksheet.column_dimensions['H'].width = 80  # Tweet Text
    
    # Format header row
    header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
    header_font = Font(bold=True, color='FFFFFF', size=12)
    
    for cell in worksheet[1]:
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = Alignment(horizontal='center', vertical='center')
    
    # Format data rows
    for row in worksheet.iter_rows(min_row=2, max_row=worksheet.max_row):
        # Make URLs clickable
        row[6].hyperlink = row[6].value
        row[6].font = Font(color='0563C1', underline='single')
        
        # Wrap text for tweet column
        row[7].alignment = Alignment(wrap_text=True, vertical='top')
        
        # Format likes as number with comma separator
        row[4].number_format = '#,##0'

print(f"\n\n‚úÖ Excel file created: {excel_path}")
print(f"   Total tweets: {len(excel_data)}")
print(f"   Columns: {', '.join(df_excel.columns)}")

## View All Tweet Links in Table Format

In [None]:
# Display the Excel data in a scrollable table format
from IPython.display import display, HTML

# Create a display version with truncated tweet text for better readability
df_display = df_excel.copy()
df_display['Tweet Text Short'] = df_display['Tweet Text'].str[:80] + '...'
df_display = df_display.drop('Tweet Text', axis=1)

# Reorder columns for better display
df_display = df_display[['Category', 'Party', 'Name', 'Username', 'Likes', 'Date', 'Tweet URL', 'Tweet Text Short']]

print(f"Total tweets in Excel: {len(df_excel)}")
print(f"\nBreakdown by category:")
print(df_excel['Category'].value_counts().to_string())
print("\n" + "="*80)
print("SCROLLABLE TABLE (All Tweet Links)")
print("="*80 + "\n")

# Create scrollable HTML table
html_table = df_display.to_html(index=False, escape=False, classes='table table-striped')

scrollable_html = f"""
<div style="max-height: 600px; overflow-y: auto; overflow-x: auto; border: 1px solid #ddd; padding: 10px; background-color: white;">
    <style>
        .scrollable-table {{
            width: 100%;
            border-collapse: collapse;
        }}
        .scrollable-table th {{
            position: sticky;
            top: 0;
            background-color: #366092;
            color: white;
            font-weight: bold;
            padding: 10px;
            text-align: left;
            border: 1px solid #ddd;
            z-index: 10;
        }}
        .scrollable-table td {{
            padding: 8px;
            border: 1px solid #ddd;
            text-align: left;
        }}
        .scrollable-table tr:nth-child(even) {{
            background-color: #f9f9f9;
        }}
        .scrollable-table tr:hover {{
            background-color: #e8f4f8;
        }}
        .scrollable-table a {{
            color: #0563C1;
            text-decoration: underline;
        }}
    </style>
    {html_table.replace('<table border="1" class="dataframe table table-striped">', '<table class="scrollable-table">')}
</div>
"""

display(HTML(scrollable_html))