In [1]:
import pandas as pd
from pathlib import Path
import yaml

# --- Load parameters.yml (same file the pipelines use) ---
PARAMS_FILE = Path("C:/Users/felix/Documents/xminer/src/xminer/config/parameters.yml")
assert PARAMS_FILE.exists(), f"parameters.yml not found: {PARAMS_FILE}"

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 12))
YM = f"{YEAR:04d}{MONTH:02d}"

STAND_TEXT = f"Erhoben für {MONTH:02d}/{YEAR}"  # << das nutzt der Plot

GRAPHICS_BASE_DIR = Path(
    params.get(
        "graphics_base_dir",
        r"C:/Users/felix/Documents/xminer/outputs",
    )
)

GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)


In [2]:
month = "12"
year = "2025"

trends_path = Path(f'C:/Users/felix/Documents/xminer/outputs/{year}{month}/trends/x_trends_{year}{month}.csv')
assert trends_path.exists(), f"File not found: {trends_path}"

df_trends = pd.read_csv(trends_path, low_memory=False)
df_trends.head()


Unnamed: 0,woeid,place_name,trend_name,tweet_count,rank,retrieved_at,source_version
0,23424829,Germany,Advent,53378.0,1,2025-12-01 07:00:03.179267+00:00,v2
1,23424829,Germany,Wochenstart,,2,2025-12-01 07:00:03.179267+00:00,v2
2,23424829,Germany,#Gießen,39296.0,3,2025-12-01 07:00:03.179267+00:00,v2
3,23424829,Germany,#Giessen,39268.0,4,2025-12-01 07:00:03.179267+00:00,v2
4,23424829,Germany,Antifa,69637.0,5,2025-12-01 07:00:03.179267+00:00,v2


In [None]:
# Build the dataframe
df_trend_counts = (
    df_trends["trend_name"]
    .value_counts()
    .head(10)
    .rename_axis("Trend Name")
    .reset_index(name="Anzahl")
)

df_trend_counts = df_trend_counts.rename(columns={"Trend Name": "Trend\nName", "Anzahl": "Anzahl\n(Count)"})


df_trend_counts.index = range(1, len(df_trend_counts) + 1)


# Trends to highlight
HIGHLIGHT_TRENDS = {
    "Meinungsfreiheit",
    "Zensur",
    "HateAid",
    "Feuerwerk",
    "Europe",
}

def color_trends(row):
    if row["Trend\nName"] in HIGHLIGHT_TRENDS:
        return ["background-color: #ffcccc; color: #000000"] * len(row)
    return [""] * len(row)

df_trend_counts_style = (
    df_trend_counts
    .style
    .apply(color_trends, axis=1)
)

df_trend_counts_style


Unnamed: 0,Trend Name,Anzahl (Count)
1,#dasperfektedinner,41
2,Christmas,39
3,Meinungsfreiheit,34
4,Zensur,29
5,Advent,27
6,HateAid,26
7,Frohe Weihnachten,25
8,Wochenstart,25
9,Feuerwerk,25
10,Europe,24


In [6]:
month = "12"
year = "2025"

base_path = Path(f'C:/Users/felix/Documents/xminer/data/politicians_{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_politicians = pd.read_csv(base_path, low_memory=False, sep=';')

print("Shape:", df_politicians.shape)

Shape: (632, 32)


In [7]:
title = df_politicians['AKAD_TITEL'].astype('string').fillna('').str.strip()
first = df_politicians['VORNAME'].astype('string').fillna('').str.strip().str.split().str[0]
last  = df_politicians['NACHNAME'].astype('string').fillna('').str.strip()

df_politicians['FULLNAME'] = (title + ' ' + first + ' ' + last).str.split().str.join(' ')


In [8]:
base_path = Path(f'C:/Users/felix/Documents/xminer/outputs/{year}{month}/tweets/tweets_{year}{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_tweets = pd.read_csv(base_path, low_memory=False)

# Parse datetime columns if present
for col in ['created_at', 'retrieved_at']:
    if col in df_tweets.columns:
        df_tweets[col] = pd.to_datetime(df_tweets[col], utc=True, errors='coerce')

print("Shape:", df_tweets.shape)

df_tweets = df_tweets.merge(
    df_politicians[['USERNAME', 'FULLNAME', 'PARTEI_KURZ']],
    right_on='USERNAME',
    left_on='username',
    how='left'
)


UNION_MAP = {"CDU": "CDU/CSU", "CSU": "CDU/CSU"}

def normalize_party(df: pd.DataFrame) -> pd.DataFrame:
    if "PARTEI_KURZ" in df.columns:
        df["PARTEI_KURZ"] = (
            df["PARTEI_KURZ"]
            .astype(str)
            .str.strip()
            .str.upper()
            .replace(UNION_MAP)
        )
    return df

df_tweets = normalize_party(df_tweets)
df_tweets.PARTEI_KURZ.value_counts()

Shape: (8529, 19)


PARTEI_KURZ
AFD                      4830
BÜNDNIS 90/DIE GRÜNEN    1442
CDU/CSU                  1167
DIE LINKE.                809
SPD                       281
Name: count, dtype: int64

In [9]:
df_tweets.head()

Unnamed: 0,tweet_id,author_id,username,created_at,text,lang,conversation_id,in_reply_to_user_id,possibly_sensitive,like_count,...,quote_count,bookmark_count,impression_count,source,entities,referenced_tweets,retrieved_at,USERNAME,FULLNAME,PARTEI_KURZ
0,1995281744342446453,1412287272,AfDProtschka,2025-12-01 00:00:13+00:00,@julius__boehm Deren Kunde. Sind unsere Wähler.,de,1995204308154163339,9.696286e+17,False,1,...,0,0,39,,"{'mentions': [{'id': '969628633819213824', 'en...","[{'id': 1995204308154163339, 'type': 'replied_...",2025-12-03 02:37:51.362875+00:00,AfDProtschka,Stephan Protschka,AFD
1,1995282899185320085,1412287272,AfDProtschka,2025-12-01 00:04:48+00:00,@FamUnt Ihre Kunden sind zum Großteil unsere W...,de,1995184775129113032,71547090.0,False,185,...,0,1,1112,,"{'mentions': [{'id': '71547087', 'end': 7, 'st...","[{'id': 1995184775129113032, 'type': 'replied_...",2025-12-03 02:37:51.362873+00:00,AfDProtschka,Stephan Protschka,AFD
2,1995296477669679302,1690037692866686976,AArpaschi,2025-12-01 00:58:45+00:00,Genau so machen wir das! https://t.co/Qq5OhUs7CW,de,1995296477669679302,,False,5,...,0,0,120,,"{'urls': [{'end': 48, 'url': 'https://t.co/Qq5...","[{'id': 1995111205753475283, 'type': 'quoted'}]",2025-12-03 08:53:18.082250+00:00,AArpaschi,Alexander Arpaschi,AFD
3,1995354270393045350,1640320419474337794,ThomasF70168477,2025-12-01 04:48:24+00:00,RT @Birgit_Kelle: Erinnern sich alle an Avery ...,de,1995354270393045350,,False,0,...,0,0,0,,"{'mentions': [{'id': '782067510', 'end': 16, '...","[{'id': 1995192847461474424, 'type': 'retweete...",2025-12-03 08:38:18.693550+00:00,ThomasF70168477,Thomas Fetsch,AFD
4,1995354300306870644,1640320419474337794,ThomasF70168477,2025-12-01 04:48:31+00:00,RT @Birgit_Kelle: Dies Land wird nicht durch I...,de,1995354300306870644,,False,0,...,0,0,0,,"{'mentions': [{'id': '782067510', 'end': 16, '...","[{'id': 1995231554147389765, 'type': 'retweete...",2025-12-03 08:38:18.693547+00:00,ThomasF70168477,Thomas Fetsch,AFD


In [44]:
PARTY_COLORS = {
    "CDU/CSU": "#000000",
    "CDU": "#000000",
    "CSU": "#000000",
    "SPD": "#E3000F",
    "GRÜNE": "#1AA64A",
    "BÜNDNIS 90/DIE GRÜNEN": "#1AA64A",
    "BÜNDNIS 90/DIE GRÜNEN": "#1AA64A",
    "DIE GRÜNEN": "#1AA64A",
    "DIE LINKE.": "#BE3075",
    "LINKE": "#BE3075",
    "FDP": "#FFED00",
    "AFD": "#009EE0",
    "BSW": "#009688",
    "FW": "#F28F00",
    "SSW": "#00A3E0",
    "PIRATEN": "#FF8800",
    "PARTEI": "#9E9E9E",
    "ÖDP": "#FF6A00",
    "OEDP": "#FF6A00",
}

dfs = []

for term in ["Meinungsfreiheit", "Zensur", "HateAid", "Feuerwerk", "Europe"]:
    df_filtered = df_tweets[df_tweets["text"].str.contains(term, na=False)]

    counts = (
        df_filtered.groupby("PARTEI_KURZ")
        .size()
        .rename(f"Anzahl\n\"{term}\"\nPosts")
        .reset_index()
    )

    def color_party(row):
        color = PARTY_COLORS.get(row["PARTEI_KURZ"], "")
        return [f"background-color: {color}"] * len(row) if color else [""] * len(row)

    counts_style = counts.style.apply(color_party, axis=1)
    
    dfs.append(counts)

counts_total = (
    df_tweets.groupby("PARTEI_KURZ")
    .size()
    .rename("Anzahl\nPosts\nInsgesamt")
    .reset_index()
)

dfs_merged = []

for df in dfs:
    df = df.merge(counts_total, on="PARTEI_KURZ", how="left")
    df = df.rename(columns={"PARTEI_KURZ": "Partei"})
    df["Partei"] = df["Partei"].replace({"BÜNDNIS 90/DIE GRÜNEN": "DIE GRÜNEN"})
    dfs_merged.append(df)

dfs_styled = []

for df in dfs_merged:
    # df = df.rename(columns={"PARTEI_KURZ": "Partei"})
    def color_party(row):
        color = PARTY_COLORS.get(row["Partei"], "")
        return [f"background-color: {color}"] * len(row) if color else [""] * len(row)
    
    df_style = (
        df
        .style
        .apply(color_party, axis=1)
        .set_table_styles([
    {"selector": "th", "props": [
        ("white-space", "pre-line"),
        ("text-align", "center"),
        ("vertical-align", "middle"),
    ]}
])
)   

    dfs_styled.append(df_style)

In [45]:
for df_style in dfs_styled:
    display(df_style)

Unnamed: 0,Partei,"Anzahl ""Meinungsfreiheit"" Posts",Anzahl Posts Insgesamt
0,AFD,41,4830
1,DIE GRÜNEN,5,1442
2,CDU/CSU,6,1167
3,DIE LINKE.,4,809
4,SPD,2,281


Unnamed: 0,Partei,"Anzahl ""Zensur"" Posts",Anzahl Posts Insgesamt
0,AFD,34,4830
1,DIE GRÜNEN,1,1442
2,CDU/CSU,2,1167
3,DIE LINKE.,1,809
4,SPD,1,281


Unnamed: 0,Partei,"Anzahl ""HateAid"" Posts",Anzahl Posts Insgesamt
0,AFD,48,4830
1,DIE GRÜNEN,13,1442
2,CDU/CSU,3,1167
3,DIE LINKE.,3,809
4,SPD,6,281


Unnamed: 0,Partei,"Anzahl ""Feuerwerk"" Posts",Anzahl Posts Insgesamt
0,AFD,3,4830
1,DIE GRÜNEN,6,1442
2,DIE LINKE.,1,809


Unnamed: 0,Partei,"Anzahl ""Europe"" Posts",Anzahl Posts Insgesamt
0,AFD,15,4830
1,DIE GRÜNEN,11,1442
2,CDU/CSU,32,1167
3,DIE LINKE.,1,809
4,SPD,3,281


In [48]:
PARTY_COLORS = {
    "CDU/CSU": "#000000",
    "CDU": "#000000",
    "CSU": "#000000",
    "SPD": "#E3000F",
    "GRÜNE": "#1AA64A",
    "BÜNDNIS 90/DIE GRÜNEN": "#1AA64A",
    "DIE GRÜNEN": "#1AA64A",
    "DIE LINKE.": "#BE3075",
    "LINKE": "#BE3075",
    "FDP": "#FFED00",
    "AFD": "#009EE0",
    "BSW": "#009688",
    "FW": "#F28F00",
    "SSW": "#00A3E0",
    "PIRATEN": "#FF8800",
    "PARTEI": "#9E9E9E",
    "ÖDP": "#FF6A00",
    "OEDP": "#FF6A00",
}

TREND_TRANSLATIONS = {
    "Meinungsfreiheit": "Freedom of speech",
    "Zensur": "Censorship",
    "HateAid": "HateAid",
    "Feuerwerk": "Fireworks",
    "Europe": "Europe",
}

dfs = []

for term in ["Meinungsfreiheit", "Zensur", "HateAid", "Feuerwerk", "Europe"]:
    df_filtered = df_tweets[df_tweets["text"].str.contains(term, na=False)]

    term_en = TREND_TRANSLATIONS.get(term, term)
    col_name = f'Count\n"{term}"\n({term_en})\nPosts'

    counts = (
        df_filtered.groupby("PARTEI_KURZ")
        .size()
        .rename(col_name)
        .reset_index()
    )

    def color_party(row):
        color = PARTY_COLORS.get(row["PARTEI_KURZ"], "")
        return [f"background-color: {color}"] * len(row) if color else [""] * len(row)

    counts_style = counts.style.apply(color_party, axis=1)

    dfs.append(counts)

counts_total = (
    df_tweets.groupby("PARTEI_KURZ")
    .size()
    .rename("Count\nTotal\nPosts")
    .reset_index()
)

dfs_merged = []

for df in dfs:
    df = df.merge(counts_total, on="PARTEI_KURZ", how="left")
    df = df.rename(columns={"PARTEI_KURZ": "Party"})
    df["Party"] = df["Party"].replace({"BÜNDNIS 90/DIE GRÜNEN": "DIE GRÜNEN"})
    dfs_merged.append(df)

dfs_styled = []

for df in dfs_merged:
    def color_party(row):
        color = PARTY_COLORS.get(row["Party"], "")
        return [f"background-color: {color}"] * len(row) if color else [""] * len(row)

    df_style = (
        df
        .style
        .apply(color_party, axis=1)
        .set_table_styles([
            {"selector": "th", "props": [
                ("white-space", "pre-line"),
                ("text-align", "center"),
                ("vertical-align", "middle"),
            ]}
        ])
    )

    dfs_styled.append(df_style)


In [49]:
for df_style in dfs_styled:
    display(df_style)

Unnamed: 0,Party,"Count ""Meinungsfreiheit"" (Freedom of speech) Posts",Count Total Posts
0,AFD,41,4830
1,DIE GRÜNEN,5,1442
2,CDU/CSU,6,1167
3,DIE LINKE.,4,809
4,SPD,2,281


Unnamed: 0,Party,"Count ""Zensur"" (Censorship) Posts",Count Total Posts
0,AFD,34,4830
1,DIE GRÜNEN,1,1442
2,CDU/CSU,2,1167
3,DIE LINKE.,1,809
4,SPD,1,281


Unnamed: 0,Party,"Count ""HateAid"" (HateAid) Posts",Count Total Posts
0,AFD,48,4830
1,DIE GRÜNEN,13,1442
2,CDU/CSU,3,1167
3,DIE LINKE.,3,809
4,SPD,6,281


Unnamed: 0,Party,"Count ""Feuerwerk"" (Fireworks) Posts",Count Total Posts
0,AFD,3,4830
1,DIE GRÜNEN,6,1442
2,DIE LINKE.,1,809


Unnamed: 0,Party,"Count ""Europe"" (Europe) Posts",Count Total Posts
0,AFD,15,4830
1,DIE GRÜNEN,11,1442
2,CDU/CSU,32,1167
3,DIE LINKE.,1,809
4,SPD,3,281


In [33]:
df_style
df_renamed = df_style.rename(columns={"Trend Name": "Trend\nName", "Anzahl": "Anzahl\n(Count)"})

df_renamed.style.set_table_styles([
    {"selector": "th", "props": [("white-space", "pre-line")]}
])


AttributeError: 'Styler' object has no attribute 'rename'

In [9]:
from xminer.utils.utils_plots import (
        plot_party_hbar,
        plot_party_stack_tweets_engagement,
        plot_party_pie_pct,
        STAND_TEXT,
        GRAPHICS_DIR,
    )