In [1]:
import pandas as pd
from pathlib import Path
import yaml

# --- Load parameters.yml (same file the pipelines use) ---
PARAMS_FILE = Path("C:/Users/felix/Documents/xminer/src/xminer/config/parameters.yml")
assert PARAMS_FILE.exists(), f"parameters.yml not found: {PARAMS_FILE}"

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 11))
YM = f"{YEAR:04d}{MONTH:02d}"

STAND_TEXT = f"Erhoben f√ºr {MONTH:02d}/{YEAR}"  # << das nutzt der Plot

GRAPHICS_BASE_DIR = Path(
    params.get(
        "graphics_base_dir",
        r"C:/Users/felix/Documents/xminer/outputs",
    )
)

GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)


In [2]:
month = "11"
year = "2025"

base_path = Path(f'C:/Users/felix/Documents/xminer/data/politicians_{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_politicians = pd.read_csv(base_path, low_memory=False, sep=';')

print("Shape:", df_politicians.shape)

Shape: (632, 32)


In [3]:
title = df_politicians['AKAD_TITEL'].astype('string').fillna('').str.strip()
first = df_politicians['VORNAME'].astype('string').fillna('').str.strip().str.split().str[0]
last  = df_politicians['NACHNAME'].astype('string').fillna('').str.strip()

df_politicians['FULLNAME'] = (title + ' ' + first + ' ' + last).str.split().str.join(' ')


In [4]:
base_path = Path(f'C:/Users/felix/Documents/xminer/outputs/{year}{month}/tweets/tweets_{year}{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_tweets = pd.read_csv(base_path, low_memory=False)

# Parse datetime columns if present
for col in ['created_at', 'retrieved_at']:
    if col in df_tweets.columns:
        df_tweets[col] = pd.to_datetime(df_tweets[col], utc=True, errors='coerce')

print("Shape:", df_tweets.shape)

df_tweets = df_tweets.merge(
    df_politicians[['USERNAME', 'FULLNAME', 'PARTEI_KURZ']],
    right_on='USERNAME',
    left_on='username',
    how='left'
)


UNION_MAP = {"CDU": "CDU/CSU", "CSU": "CDU/CSU"}

def normalize_party(df: pd.DataFrame) -> pd.DataFrame:
    if "PARTEI_KURZ" in df.columns:
        df["PARTEI_KURZ"] = (
            df["PARTEI_KURZ"]
            .astype(str)
            .str.strip()
            .str.upper()
            .replace(UNION_MAP)
        )
    return df

df_tweets = normalize_party(df_tweets)
df_tweets.PARTEI_KURZ.value_counts()

Shape: (9367, 19)


PARTEI_KURZ
AFD                      5499
CDU/CSU                  1401
B√úNDNIS 90/DIE GR√úNEN    1258
DIE LINKE.                864
SPD                       345
Name: count, dtype: int64

In [None]:
df_no_retweets.shape, df_tweets.shape

((6423, 22), (9367, 22))

In [12]:
df_tweets[['text',
    "like_count",
    "reply_count",
    "retweet_count",  # change if your column is named differently
    "quote_count",
    "bookmark_count",
    "impression_count",
]].head(20)

Unnamed: 0,text,like_count,reply_count,retweet_count,quote_count,bookmark_count,impression_count
0,@EEricoh Nein. Warum? Seien Sie bitte nicht so...,0,0,0,0,0,26
1,Guten Morgen aus Staufen.\nMein Musiktipp f√ºr ...,1,1,0,0,0,623
2,https://t.co/p4MEkfA391,1,1,0,0,0,314
3,W√§hrend in Brandenburg Kliniken schlie√üen und ...,4,0,0,0,0,55
4,"Das ist eine interessante Entwicklung, die sic...",9,0,1,0,0,292
5,RT @UlrichVosgerau: Wir gehen jetzt in unserer...,0,0,185,0,0,0
6,"Ihr wollt die totale Chatkontrolle, aber die P...",4885,89,656,13,54,33899
7,@polenz_r @welt Alle Ihre Denkmuster sind so d...,2,0,0,0,0,13
8,@polenz_r @welt Ich stimme Ihnen zu: Machen Si...,1,0,0,0,0,2
9,Der Verfassungsschutz entlarvt sich selbst in ...,5,0,0,0,2,182


In [11]:
df_no_retweets[df_no_retweets["username"] == "Ricarda_Lang"][['text',
    "like_count",
    "reply_count",
    "retweet_count",  # change if your column is named differently
    "quote_count",
    "bookmark_count",
    "impression_count",
]]

Unnamed: 0,text,like_count,reply_count,retweet_count,quote_count,bookmark_count,impression_count
1492,Vor genau einem Jahr ver√∂ffentlichte Olaf Scho...,1778,105,66,10,32,102487
2597,"‚ÄûIch glaube nicht mehr l√§nger, dass Demokratie...",7095,540,1245,49,183,203440
2598,Stattdessen braucht es europ√§ische Alternative...,657,159,39,65,20,131616
4065,"Das Deutschlandticket wird immer teurer, weil ...",6356,620,1023,43,63,176321
4523,"@MickyBeisenherz Aber geile Schuhe, muss man s...",1030,33,5,1,0,15941
5257,Statt jetzt wieder 3 Tage √ºber einen offensich...,4746,574,599,37,46,131992
5271,Finde ich den Satz von Friedrich Merz peinlich...,1982,51,138,4,10,29394
5620,"Keine Sorge, das trickelt bestimmt bald down. ...",3632,505,473,38,78,237284
6417,Winterlauf ü•∂üèÉ‚Äç‚ôÄÔ∏èHabt einen sch√∂nen Sonntag! ht...,2669,287,24,10,20,169319
6654,"Die Deutsche Bahn, wenn es im Winter schneit. ...",484,57,18,2,5,16009


In [5]:
df_no_retweets = df_tweets[~df_tweets["text"].str.match(r"^\s*RT\s+@", na=False)]

In [6]:
from xminer.utils.utils_plots import (
        plot_party_hbar,
        plot_party_stack_tweets_engagement,
        plot_party_pie_pct,
        STAND_TEXT,
        GRAPHICS_DIR,
    )

In [7]:
metric_cols = [
    "like_count",
    "reply_count",
    "retweet_count",  # change if your column is named differently
    "quote_count",
    "bookmark_count",
    "impression_count",
]

agg_map = {"tweets_total": ("username", "size")}
for col in metric_cols:
    agg_map[f"{col}_sum"] = (col, "sum")
    agg_map[f"{col}_median"] = (col, "median")
    agg_map[f"{col}_mean"] = (col, "mean")

agg_map["FULLNAME"] = ("FULLNAME", "first")
agg_map["PARTEI_KURZ"] = ("PARTEI_KURZ", "first")

df_users = (
    df_no_retweets
    .groupby("username")
    .agg(**agg_map)
    .reset_index()
)

df_users.head()


Unnamed: 0,username,tweets_total,like_count_sum,like_count_median,like_count_mean,reply_count_sum,reply_count_median,reply_count_mean,retweet_count_sum,retweet_count_median,...,quote_count_median,quote_count_mean,bookmark_count_sum,bookmark_count_median,bookmark_count_mean,impression_count_sum,impression_count_median,impression_count_mean,FULLNAME,PARTEI_KURZ
0,AArpaschi,29,78,2.0,2.689655,10,0.0,0.344828,23,0.0,...,0.0,0.068966,1,0.0,0.034483,16851,52.0,581.068966,Alexander Arpaschi,AFD
1,AMattfeldt,17,7,0.0,0.411765,1,0.0,0.058824,0,0.0,...,0.0,0.0,0,0.0,0.0,1695,65.0,99.705882,Andreas Mattfeldt,CDU/CSU
2,AfDProtschka,99,8498,26.0,85.838384,844,3.0,8.525253,1345,5.0,...,0.0,0.333333,95,0.0,0.959596,115368,484.0,1165.333333,Stephan Protschka,AFD
3,AfDRehm,28,136,3.0,4.857143,20,0.0,0.714286,50,1.0,...,0.0,0.035714,6,0.0,0.214286,4921,68.0,175.75,Lukas Rehm,AFD
4,AlexanderWolfHH,38,70,1.0,1.842105,6,0.0,0.157895,27,1.0,...,0.0,0.0,0,0.0,0.0,3737,60.5,98.342105,Dr. Alexander Wolf,AFD


In [8]:
plot_party_hbar(
    df_users, 
    'username', 
    'like_count_sum', 
    top_n=20, 
    party_col='PARTEI_KURZ',
    title='MdBs mit den meisten Likes', 
    x_label='Summe Likes',
    save_name="top20_sum_likes",
)

‚úÖ Plot saved to: C:\Users\felix\Documents\xminer\outputs\202511\graphics\top20_sum_likes.png


In [15]:
plot_party_hbar(
    df_users, 
    'username', 
    'reply_count_sum', 
    top_n=20,
    party_col='PARTEI_KURZ',
    title='MdBs mit den meisten Antworten auf Posts', 
    x_label='Summe Antworten',
    save_name="top20_sum_replies",
)

‚úÖ Plot saved to: C:\Users\felix\Documents\xminer\outputs\202511\graphics\top20_sum_replies.png


In [16]:
plot_party_hbar(
    df_users, 
    'username', 
    'like_count_mean', 
    top_n=20,
    party_col='PARTEI_KURZ', 
    title='MdBs mit den meisten Likes im Durchschnitt pro Post', 
    x_label='Durschnitt Likes',
    save_name="top20_mean_likes",
)

‚úÖ Plot saved to: C:\Users\felix\Documents\xminer\outputs\202511\graphics\top20_mean_likes.png
