# Trends Visualization for Social Media

This notebook creates standardized visualizations from X (Twitter) trends data for posting on X and Instagram.

**Chart Types:**
1. Word Cloud - Top trending words
2. Horizontal Bar Chart - Top N trends by tweet volume
3. Timeline Chart - Trend evolution over time
4. Pie Chart - Distribution of trending topics by category

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from pathlib import Path
import yaml
from datetime import datetime, timezone
from collections import Counter
import re

# Database connection
import sys
sys.path.insert(0, str(Path.cwd().parent / 'src'))
from xminer.io.db import engine
from sqlalchemy import text

In [None]:
# --- Load parameters from config ---
PARAMS_FILE = Path("../src/xminer/config/parameters.yml")
assert PARAMS_FILE.exists(), f"parameters.yml not found: {PARAMS_FILE}"

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 11))
YM = f"{YEAR:04d}{MONTH:02d}"

STAND_TEXT = f"Erhoben fÃ¼r {MONTH:02d}/{YEAR}"

# Output directory for graphics
GRAPHICS_BASE_DIR = Path(
    params.get(
        "graphics_base_dir",
        "../outputs",
    )
)

GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics" / "trends"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Graphics will be saved to: {GRAPHICS_DIR}")
print(f"Period: {YEAR}-{MONTH:02d}")

In [None]:
# --- Social Media Export Sizes ---
# X (Twitter): 1200x675 (16:9), Instagram: 1080x1080 (1:1) or 1080x1350 (4:5)

EXPORT_SIZES = {
    "x_twitter": {"width": 1200, "height": 675},      # X/Twitter landscape
    "instagram_square": {"width": 1080, "height": 1080},  # Instagram square
    "instagram_portrait": {"width": 1080, "height": 1350}, # Instagram portrait
}

# Default export size
DEFAULT_SIZE = "x_twitter"

## Load Trends Data from Database

In [None]:
# Query trends data for the specified month
query = f"""
SELECT 
    woeid,
    place_name,
    trend_name,
    tweet_count,
    rank,
    retrieved_at,
    source_version
FROM public.x_trends
WHERE 
    EXTRACT(YEAR FROM retrieved_at) = {YEAR}
    AND EXTRACT(MONTH FROM retrieved_at) = {MONTH}
    AND place_name = 'Germany'
ORDER BY retrieved_at DESC, rank ASC
"""

with engine.connect() as conn:
    df_trends = pd.read_sql(text(query), conn)

# Convert timestamp to datetime
df_trends['retrieved_at'] = pd.to_datetime(df_trends['retrieved_at'], utc=True)

print(f"Loaded {len(df_trends):,} trend records")
print(f"Date range: {df_trends['retrieved_at'].min()} to {df_trends['retrieved_at'].max()}")
print(f"\nUnique trends: {df_trends['trend_name'].nunique():,}")
df_trends.head(10)

## Data Preprocessing

In [None]:
# Get top trends by total tweet count (aggregated across all snapshots)
top_trends = (
    df_trends.groupby('trend_name')
    .agg({
        'tweet_count': 'sum',
        'rank': 'mean',
        'retrieved_at': 'count'  # number of times it appeared
    })
    .rename(columns={'retrieved_at': 'appearances'})
    .sort_values('tweet_count', ascending=False)
    .reset_index()
)

print(f"\nTop 10 trends by total tweet volume:")
top_trends.head(10)

In [None]:
# Extract words from trend names for word analysis
def extract_words(trend_name):
    """Extract meaningful words from trend names (remove hashtags, mentions, etc.)"""
    if pd.isna(trend_name):
        return []
    
    # Remove hashtags and mentions symbols but keep the words
    cleaned = re.sub(r'[#@]', '', str(trend_name))
    
    # Split on non-alphanumeric characters
    words = re.findall(r'\w+', cleaned)
    
    # Filter out very short words and common stop words
    stop_words = {'der', 'die', 'das', 'den', 'dem', 'des', 'ein', 'eine', 'und', 'in', 'zu', 'mit', 'von', 'auf', 'fÃ¼r'}
    meaningful_words = [w for w in words if len(w) > 2 and w.lower() not in stop_words]
    
    return meaningful_words

# Extract all words
all_words = []
for trend_name in df_trends['trend_name']:
    all_words.extend(extract_words(trend_name))

word_counts = Counter(all_words)
print(f"\nTotal words extracted: {len(all_words):,}")
print(f"Unique words: {len(word_counts):,}")
print(f"\nTop 20 most common words:")
word_counts.most_common(20)

## Visualization Functions

In [None]:
def save_figure(fig, filename, export_size=DEFAULT_SIZE, show_fig=True):
    """
    Save Plotly figure to multiple formats for social media.
    
    Args:
        fig: Plotly figure object
        filename: Base filename without extension
        export_size: One of 'x_twitter', 'instagram_square', 'instagram_portrait'
        show_fig: Whether to display the figure in notebook
    """
    size = EXPORT_SIZES.get(export_size, EXPORT_SIZES[DEFAULT_SIZE])
    
    # Save with size suffix
    save_path = GRAPHICS_DIR / f"{filename}_{export_size}.png"
    fig.write_image(
        save_path,
        width=size['width'],
        height=size['height'],
        scale=2  # High resolution for social media
    )
    print(f"âœ… Saved: {save_path} ({size['width']}x{size['height']}px)")
    
    if show_fig:
        fig.show()
    
    return fig

## 1. Word Cloud Visualization

In [None]:
# For word cloud, we'll use a custom scatter-based approach with Plotly
def create_word_scatter(word_counts_dict, top_n=50, title="Top Trending Words"):
    """
    Create a bubble chart of top words (alternative to word cloud).
    Each bubble size represents word frequency.
    """
    # Get top N words
    top_words = dict(word_counts_dict.most_common(top_n))
    
    # Create DataFrame
    df_words = pd.DataFrame([
        {'word': word, 'count': count}
        for word, count in top_words.items()
    ])
    
    # Assign random positions for visual variety
    np.random.seed(42)
    df_words['x'] = np.random.rand(len(df_words))
    df_words['y'] = np.random.rand(len(df_words))
    
    # Create bubble chart
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=df_words['x'],
        y=df_words['y'],
        mode='text+markers',
        text=df_words['word'],
        marker=dict(
            size=df_words['count'],
            sizemode='diameter',
            sizeref=df_words['count'].max() / 100,
            color=df_words['count'],
            colorscale='Viridis',
            showscale=False,
            line=dict(width=1, color='white')
        ),
        textfont=dict(
            size=df_words['count'] / df_words['count'].max() * 30 + 10,
            color='white'
        ),
        hovertemplate='<b>%{text}</b><br>Count: %{marker.size:,}<extra></extra>'
    ))
    
    title_text = f"{title}<br><sub style='font-size:0.85em;'>{STAND_TEXT}</sub>"
    
    fig.update_layout(
        title=dict(text=title_text, x=0.5, xanchor='center'),
        xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        yaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white'),
        margin=dict(l=20, r=20, t=100, b=20),
        hovermode='closest'
    )
    
    return fig

# Create word bubble chart
fig_words = create_word_scatter(
    word_counts,
    top_n=50,
    title="Meistgenannte WÃ¶rter in Trends"
)

# Save for X/Twitter
save_figure(fig_words, "trending_words", export_size="x_twitter")

## 2. Top Trends Bar Chart

In [None]:
def create_top_trends_bar(df_trends_agg, top_n=15, title="Top Trends nach Tweet-Volumen"):
    """
    Create horizontal bar chart of top trends.
    """
    # Get top N
    df_top = df_trends_agg.head(top_n).copy()
    
    # Reverse order for horizontal bar (top at top)
    df_top = df_top.iloc[::-1]
    
    # Create color gradient
    colors = px.colors.sequential.Viridis_r[:len(df_top)]
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        y=df_top['trend_name'],
        x=df_top['tweet_count'],
        orientation='h',
        marker_color=colors,
        text=[f"{v:,.0f}" for v in df_top['tweet_count']],
        textposition='outside',
        textfont=dict(color='white', size=12),
        hovertemplate=(
            "<b>%{y}</b><br>"
            "Tweets: %{x:,.0f}<br>"
            "<extra></extra>"
        )
    ))
    
    title_text = f"{title}<br><sub style='font-size:0.85em;'>{STAND_TEXT}</sub>"
    
    fig.update_layout(
        title=dict(text=title_text, x=0.5, xanchor='center', font=dict(size=20)),
        xaxis_title="Anzahl Tweets",
        yaxis_title="",
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white'),
        margin=dict(l=200, r=100, t=100, b=60),
        height=max(500, 40 * len(df_top)),
        xaxis=dict(gridcolor='#333333'),
        yaxis=dict(gridcolor='#333333')
    )
    
    return fig

# Create bar chart
fig_bar = create_top_trends_bar(top_trends, top_n=15)

# Save for X/Twitter and Instagram
save_figure(fig_bar, "top_trends_bar", export_size="x_twitter")
save_figure(fig_bar, "top_trends_bar", export_size="instagram_square", show_fig=False)

## 3. Timeline Visualization

In [None]:
def create_trends_timeline(df_trends, top_n_trends=10, title="Trend-Entwicklung Ã¼ber Zeit"):
    """
    Create timeline showing how top trends evolved over the month.
    """
    # Get top N trends overall
    top_trend_names = (
        df_trends.groupby('trend_name')['tweet_count']
        .sum()
        .nlargest(top_n_trends)
        .index.tolist()
    )
    
    # Filter to those trends
    df_timeline = df_trends[df_trends['trend_name'].isin(top_trend_names)].copy()
    
    # Aggregate by date and trend
    df_timeline['date'] = df_timeline['retrieved_at'].dt.date
    df_daily = (
        df_timeline.groupby(['date', 'trend_name'])['tweet_count']
        .sum()
        .reset_index()
    )
    
    # Create line chart
    fig = go.Figure()
    
    colors = px.colors.qualitative.Set3
    
    for i, trend_name in enumerate(top_trend_names):
        df_trend = df_daily[df_daily['trend_name'] == trend_name]
        
        fig.add_trace(go.Scatter(
            x=df_trend['date'],
            y=df_trend['tweet_count'],
            mode='lines+markers',
            name=trend_name,
            line=dict(width=2, color=colors[i % len(colors)]),
            marker=dict(size=6),
            hovertemplate=(
                f"<b>{trend_name}</b><br>"
                "Date: %{x}<br>"
                "Tweets: %{y:,.0f}<br>"
                "<extra></extra>"
            )
        ))
    
    title_text = f"{title}<br><sub style='font-size:0.85em;'>{STAND_TEXT}</sub>"
    
    fig.update_layout(
        title=dict(text=title_text, x=0.5, xanchor='center', font=dict(size=20)),
        xaxis_title="Datum",
        yaxis_title="Anzahl Tweets",
        plot_bgcolor='#1a1a1a',
        paper_bgcolor='#1a1a1a',
        font=dict(color='white'),
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01,
            bgcolor='rgba(0,0,0,0.5)'
        ),
        margin=dict(l=60, r=40, t=100, b=60),
        xaxis=dict(gridcolor='#333333'),
        yaxis=dict(gridcolor='#333333'),
        hovermode='x unified'
    )
    
    return fig

# Create timeline
fig_timeline = create_trends_timeline(df_trends, top_n_trends=8)

# Save for X/Twitter
save_figure(fig_timeline, "trends_timeline", export_size="x_twitter")

## 4. Trend Categories Pie Chart

In [None]:
def categorize_trend(trend_name):
    """
    Simple categorization of trends based on keywords.
    You can customize this based on your needs.
    """
    if pd.isna(trend_name):
        return "Sonstiges"
    
    trend_lower = str(trend_name).lower()
    
    # Politics
    if any(word in trend_lower for word in ['bundestag', 'politik', 'wahl', 'spd', 'cdu', 'fdp', 'grÃ¼ne', 'afd', 'scholz', 'merz']):
        return "Politik"
    
    # Sports
    if any(word in trend_lower for word in ['fuÃŸball', 'fussball', 'bundesliga', 'champions', 'sport', 'bayern', 'dortmund']):
        return "Sport"
    
    # Entertainment
    if any(word in trend_lower for word in ['film', 'musik', 'serie', 'netflix', 'spotify', 'konzert']):
        return "Unterhaltung"
    
    # Technology
    if any(word in trend_lower for word in ['tech', 'ai', 'ki', 'computer', 'apple', 'samsung', 'google']):
        return "Technologie"
    
    # News/Events
    if any(word in trend_lower for word in ['news', 'breaking', 'ukraine', 'israel', 'gaza']):
        return "Nachrichten"
    
    return "Sonstiges"

def create_category_pie(df_trends_agg, title="Trend-Kategorien Verteilung"):
    """
    Create pie chart showing distribution of trend categories.
    """
    # Add categories
    df_cat = df_trends_agg.copy()
    df_cat['category'] = df_cat['trend_name'].apply(categorize_trend)
    
    # Aggregate by category
    category_counts = (
        df_cat.groupby('category')['tweet_count']
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )
    
    # Create pie chart
    colors = px.colors.qualitative.Set2
    
    fig = go.Figure()
    
    fig.add_trace(go.Pie(
        labels=category_counts['category'],
        values=category_counts['tweet_count'],
        marker=dict(colors=colors),
        textinfo='label+percent',
        hovertemplate=(
            "<b>%{label}</b><br>"
            "Tweets: %{value:,.0f}<br>"
            "Anteil: %{percent}<br>"
            "<extra></extra>"
        ),
        textfont=dict(size=14, color='white')
    ))
    
    title_text = f"{title}<br><sub style='font-size:0.85em;'>{STAND_TEXT}</sub>"
    
    fig.update_layout(
        title=dict(text=title_text, x=0.5, xanchor='center', font=dict(size=20)),
        paper_bgcolor='#1a1a1a',
        font=dict(color='white'),
        margin=dict(l=20, r=20, t=100, b=20)
    )
    
    return fig, category_counts

# Create category pie chart
fig_pie, category_stats = create_category_pie(top_trends)

print("\nCategory distribution:")
print(category_stats)

# Save for Instagram (square format works well for pie charts)
save_figure(fig_pie, "trends_categories", export_size="instagram_square")
save_figure(fig_pie, "trends_categories", export_size="x_twitter", show_fig=False)

## Summary: Export All Charts

In [None]:
# Summary of all exported files
import os

print(f"\n{'='*80}")
print(f"All charts exported to: {GRAPHICS_DIR}")
print(f"{'='*80}\n")

# List all PNG files in the directory
png_files = sorted(GRAPHICS_DIR.glob("*.png"))
for png_file in png_files:
    size = os.path.getsize(png_file) / 1024  # KB
    print(f"  ðŸ“Š {png_file.name} ({size:.1f} KB)")

print(f"\nâœ… Total: {len(png_files)} chart(s) ready for social media posting!")
print(f"\nCharts created:")
print(f"  1. Trending Words (word bubble chart)")
print(f"  2. Top Trends Bar Chart (horizontal bars)")
print(f"  3. Trends Timeline (line chart over time)")
print(f"  4. Category Distribution (pie chart)")
print(f"\nFormats: X/Twitter (1200x675), Instagram Square (1080x1080)")