In [2]:
import pandas as pd
from pathlib import Path
import yaml

# --- Load parameters.yml (same file the pipelines use) ---
PARAMS_FILE = Path("/Users/margespinderi/Documents/PoliMetrics/xminer/src/xminer/config/parameters.yml")
assert PARAMS_FILE.exists(), f"parameters.yml not found: {PARAMS_FILE}"

with PARAMS_FILE.open("r", encoding="utf-8") as f:
    params = yaml.safe_load(f) or {}

YEAR = int(params.get("year", 2025))
MONTH = int(params.get("month", 11))
YM = f"{YEAR:04d}{MONTH:02d}"

STAND_TEXT = f"Erhoben für {MONTH:02d}/{YEAR}"  # << das nutzt der Plot

GRAPHICS_BASE_DIR = Path(
    params.get(
        "graphics_base_dir",
        r"//Users/margespinderi/Documents/PoliMetrics/xminer/outputs",
    )
)

GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics"
GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)


In [2]:
import pandas as pd
from pathlib import Path

month = "11"
year = "2025"

base_path = Path(f'C:/Users/felix/Documents/xminer/data/politicians_{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_politicians = pd.read_csv(base_path, low_memory=False, sep=';')

print("Shape:", df_politicians.shape)
df_politicians.head(10)

Shape: (632, 32)


Unnamed: 0,KEY_COLUMN,ID,GEBURTSDATUM,GEBURTSORT,GEBURTSLAND,STERBEDATUM,GESCHLECHT,FAMILIENSTAND,RELIGION,BERUF,...,WP_WP,WP_MDBWP_VON,WP_MDBWP_BIS,WP_WKR_NUMMER,WP_WKR_NAME,WP_WKR_LAND,WP_LISTE,WP_MANDATSART,WP_INSTITUTIONEN,USERNAME
0,1,11000756,16.01.1948,Berlin,Deutschland,,männlich,"geschieden, 3 Kinder",konfessionslos,Rechtsanwalt,...,21.0,25.03.2025,,83.0,Berlin-Treptow-Köpenick,BE,BE,Direktwahl,,GregorGysi
1,2,11002718,18.02.1961,Aachen,Deutschland,,männlich,"verheiratet, 3 Kinder",römisch-katholisch,Ministerpräsident a. D.,...,21.0,25.03.2025,,86.0,Aachen I,NW,NW,Direktwahl,,ArminLaschet
2,3,11002720,19.01.1968,Dessau,Deutschland,,weiblich,"geschieden, 1 Kind",,Dipl.-Agraringenieurin,...,21.0,25.03.2025,,70.0,Anhalt – Dessau – Wittenberg,ST,ST,Landesliste,,
3,4,11002733,09.06.1961,Lorsch,Deutschland,,männlich,"verheiratet, 2 Kinder",evangelisch,Dipl.-Mathematiker,...,21.0,25.03.2025,,187.0,Bergstraße,HE,HE,Direktwahl,,meister_schafft
4,5,11002735,11.11.1955,Brilon,Deutschland,,männlich,"verheiratet, 3 Kinder",römisch-katholisch,Rechtsanwalt,...,21.0,25.03.2025,,146.0,Hochsauerlandkreis,NW,NW,Direktwahl,,_FriedrichMerz
5,6,11002735,11.11.1955,Brilon,Deutschland,,männlich,"verheiratet, 3 Kinder",römisch-katholisch,Rechtsanwalt,...,21.0,25.03.2025,,146.0,Hochsauerlandkreis,NW,NW,Direktwahl,,bundeskanzler
6,7,11002754,17.05.1962,Düren,Deutschland,,männlich,"verheiratet, 1 Kind",evangelisch,Politikwissenschaftler,...,21.0,25.03.2025,,89.0,Düren,NW,NW,Direktwahl,,_ThomasRachel
7,8,11002765,02.07.1965,Meckenheim,Deutschland,,männlich,"verheiratet, 3 Kinder",katholisch,Rechtsanwalt,...,21.0,25.03.2025,,97.0,Rhein-Sieg-Kreis II,NW,NW,Direktwahl,,n_roettgen
8,9,11003034,31.03.1958,Stralsund,Deutschland,,männlich,"geschieden, 2 Kinder",,Wirtschaftswissenschaftler,...,21.0,25.03.2025,,14.0,Rostock – Landkreis Rostock II,MV,MV,Landesliste,,DietmarBartsch
9,10,11003132,03.05.1966,Friedrichroda,Deutschland,,weiblich,"verpartnert, 2 Kinder",evangelisch,MdB,...,21.0,25.03.2025,,192.0,Erfurt – Weimar – Weimarer Land II,TH,TH,Landesliste,,GoeringEckardt


In [3]:
base_path = Path(f'C:/Users/felix/Documents/xminer/outputs/{year}{month}/tweets/tweets_individual_month_{year}{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_politicians = pd.read_csv(base_path, low_memory=False)

# Parse datetime columns if present
for col in ['created_at', 'retrieved_at']:
    if col in df_politicians.columns:
        df_politicians[col] = pd.to_datetime(df_politicians[col], utc=True, errors='coerce')

print("Shape:", df_politicians.shape)

UNION_MAP = {"CDU": "CDU/CSU", "CSU": "CDU/CSU"}

def normalize_party(df: pd.DataFrame) -> pd.DataFrame:
    if "partei_kurz" in df.columns:
        df["partei_kurz"] = (
            df["partei_kurz"]
            .astype(str)
            .str.strip()
            .str.upper()
            .replace(UNION_MAP)
        )
    return df

df_politicians = normalize_party(df_politicians)
df_politicians.partei_kurz.value_counts()

Shape: (261, 28)


partei_kurz
AFD                      98
CDU/CSU                  71
BÜNDNIS 90/DIE GRÜNEN    39
SPD                      29
DIE LINKE.               24
Name: count, dtype: int64

In [4]:
df_summary = (
    df_politicians
    .groupby("partei_kurz")[["n_tweets", "impressions_sum"]]
    .sum()
    .assign(
        n_tweets_pct=lambda x: x["n_tweets"] / x["n_tweets"].sum(),
        impressions_pct=lambda x: x["impressions_sum"] / x["impressions_sum"].sum(),
    )
    .reset_index()
)


In [8]:
df_summary[['partei_kurz', 'n_tweets', 'impressions_sum', 'impressions_pct']]

Unnamed: 0,partei_kurz,n_tweets,impressions_sum,impressions_pct
0,AFD,5499,35012459,0.398997
1,BÜNDNIS 90/DIE GRÜNEN,1258,16883758,0.192405
2,CDU/CSU,1401,24059586,0.27418
3,DIE LINKE.,864,5679638,0.064724
4,SPD,345,6115752,0.069694


In [12]:
df_party.columns

Index(['partei_kurz', 'tweets', 'likes_sum', 'replies_sum', 'retweets_sum',
       'quotes_sum', 'bookmarks_sum', 'impressions_sum', 'engagement_sum',
       'engagement_rate_mean', 'like_to_reply_mean', 'retweet_to_like_mean',
       'likes_per_1k_followers_mean', 'engagement_per_1k_followers_mean',
       'verified_share', 'protected_share', 'engagement_rate_total'],
      dtype='object')

In [11]:
base_path = Path(f'C:/Users/felix/Documents/xminer/outputs/{year}{month}/tweets/tweets_party_month_{year}{month}.csv')
assert base_path.exists(), f'File not found: {base_path}'

df_party = pd.read_csv(base_path, low_memory=False)

# Parse datetime columns if present
for col in ['created_at', 'retrieved_at']:
    if col in df_party.columns:
        df_party[col] = pd.to_datetime(df_party[col], utc=True, errors='coerce')

print("Shape:", df_party.shape)
df_party[['partei_kurz', 'tweets', 'impressions_sum']]

Shape: (5, 17)


Unnamed: 0,partei_kurz,tweets,impressions_sum
0,AFD,5499,35012459
1,CDU/CSU,1401,24059586
2,BÜNDNIS 90/DIE GRÜNEN,1258,16883758
3,DIE LINKE.,864,5679638
4,SPD,345,6115752


In [13]:
df_party.columns

Index(['partei_kurz', 'tweets', 'likes_sum', 'replies_sum', 'retweets_sum',
       'quotes_sum', 'bookmarks_sum', 'impressions_sum', 'engagement_sum',
       'engagement_rate_mean', 'like_to_reply_mean', 'retweet_to_like_mean',
       'likes_per_1k_followers_mean', 'engagement_per_1k_followers_mean',
       'verified_share', 'protected_share', 'engagement_rate_total'],
      dtype='object')

In [22]:
import pandas as pd

# Step 1: Subset
sum_cols = [
    "tweets",
    "likes_sum",
    "replies_sum",
    "retweets_sum",
    "quotes_sum",
    "bookmarks_sum",
    "impressions_sum",
    "engagement_sum",
]
sub = df_party[["partei_kurz", *sum_cols]].copy()

# Step 2: Merge CDU + CSU
sub["partei_kurz"] = sub["partei_kurz"].replace({"CDU": "CDU/CSU", "CSU": "CDU/CSU"})

# Step 3: Group + sum
result = sub.groupby("partei_kurz", as_index=False).sum()

# Step 4: Percentage columns for each sum col (safe against zero totals)
totals = result[sum_cols].sum()
for col in sum_cols:
    total = totals[col]
    result[f"{col}_pct"] = result[col] / total if total else 0

result


Unnamed: 0,partei_kurz,tweets,likes_sum,replies_sum,retweets_sum,quotes_sum,bookmarks_sum,impressions_sum,engagement_sum,tweets_pct,likes_sum_pct,replies_sum_pct,retweets_sum_pct,quotes_sum_pct,bookmarks_sum_pct,impressions_sum_pct,engagement_sum_pct
0,AFD,5499,2159756,130184,1613786,9797,39701,35012459,3953224,0.587061,0.758867,0.239967,0.748615,0.32532,0.635074,0.398997,0.701317
1,BÜNDNIS 90/DIE GRÜNEN,1258,253078,150120,172614,6724,7848,16883758,590384,0.134301,0.088923,0.276715,0.080073,0.223277,0.12554,0.192405,0.104736
2,CDU/CSU,1401,286657,176011,221662,9637,9553,24059586,703520,0.149568,0.100722,0.32444,0.102826,0.320007,0.152814,0.27418,0.124807
3,DIE LINKE.,864,77989,41457,102994,2006,2787,5679638,227233,0.092239,0.027403,0.076417,0.047778,0.066611,0.044582,0.064724,0.040312
4,SPD,345,68547,44735,44640,1951,2625,6115752,162498,0.036831,0.024085,0.08246,0.020708,0.064785,0.041991,0.069694,0.028828


In [25]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path

# -------------------------------------------------
# Party colors (your version)
# -------------------------------------------------
PARTY_COLORS = {
    "CDU/CSU": "#000000",
    "CDU": "#000000",
    "CSU": "#000000",
    "SPD": "#E3000F",
    "GRÜNE": "#1AA64A",
    "BÜNDNIS 90/DIE GRÜNEN": "#1AA64A",
    "DIE LINKE.": "#BE3075",
    "LINKE": "#BE3075",
    "FDP": "#FFED00",
    "AFD": "#009EE0",
    "BSW": "#009688",
    "FW": "#F28F00",
    "SSW": "#00A3E0",
    "PIRATEN": "#FF8800",
    "PARTEI": "#9E9E9E",
    "ÖDP": "#FF6A00",
    "OEDP": "#FF6A00",
}

# -------------------------------------------------
# Normalize party names (same logic as your other code)
# -------------------------------------------------
def normalize_party(p: str) -> str:
    if p is None:
        return ""
    key = str(p).strip().upper()

    if key in {"CDU", "CSU"}:
        return "CDU/CSU"

    if key.startswith("GRÜN") or key.startswith("GRUEN") or "GRUENE" in key or "B90" in key:
        return "GRÜNE"

    if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
        return "DIE LINKE."

    if key in {"ÖDP", "OEDP"}:
        return "ÖDP"

    return key


# -------------------------------------------------
# Pie chart function
# -------------------------------------------------
def plot_party_pie_pct(
    df_individual_base,
    pct_col: str,          # e.g., "followers_pct" (0–1 fractions)
    sum_col: str,          # e.g., "followers_sum" (absolute)
    save_name: str | None = None,
    title="Kumulierte Follower je Partei",
):
    for col in (pct_col, sum_col):
        if col not in df_individual_base.columns:
            raise ValueError(f"{col} not found in DataFrame")

    df = df_individual_base.copy()
    df["party_norm"] = df["partei_kurz"].apply(normalize_party)

    agg = df.groupby("party_norm").agg({pct_col: "sum", sum_col: "sum"}).sort_values(by=pct_col, ascending=False)

    parties = agg.index.tolist()
    values = agg[pct_col].to_numpy()
    sums = agg[sum_col].to_numpy()
    colors = [PARTY_COLORS.get(p, "#888888") for p in parties]

    try:
        global STAND_TEXT
        stand_text = STAND_TEXT
    except NameError:
        stand_text = None
    title_text = f"{title}<br><sub style='font-size:0.85em;'>{stand_text}</sub>" if stand_text else title

    fig = go.Figure(
        go.Pie(
            labels=parties,
            values=values,
            marker=dict(colors=colors),
            textinfo="label+percent",
            hovertemplate="%{label}<br>%{customdata[0]:.2%} Anteil<br>%{customdata[1]:,.0f} total<extra></extra>",
            customdata=np.stack([values, sums], axis=-1),
        )
    )

    fig.update_layout(title=dict(text=title_text, x=0.5), height=600, margin=dict(t=100, b=20, l=20, r=20))

    if save_name:
        try:
            global GRAPHICS_DIR
        except NameError:
            raise RuntimeError("GRAPHICS_DIR not defined.")
        save_path = Path(GRAPHICS_DIR) / f"{save_name}.png"
        fig.write_image(save_path, width=900, height=600, scale=2)
        print(f"✅ Pie chart saved to: {save_path}")

    return fig

# Usage example:
# fig = plot_party_pie_pct(df_individual_base, pct_col="followers_pct", sum_col="followers_sum", save_name="party_followers_pct")
# fig.show()


# Usage (example)
fig = plot_party_pie_pct(result, pct_col="likes_sum_pct", sum_col="likes_sum", save_name="party_impressions_pct")
fig.show()
    

✅ Pie chart saved to: C:\Users\felix\Documents\xminer\outputs\202511\graphics\party_impressions_pct.png


In [17]:
df_summary[['partei_kurz', 'n_tweets', 'impressions_sum', 'impressions_pct']]

Unnamed: 0,partei_kurz,n_tweets,impressions_sum,impressions_pct
0,AFD,5499,35012459,0.398997
1,BÜNDNIS 90/DIE GRÜNEN,1258,16883758,0.192405
2,CDU/CSU,1401,24059586,0.27418
3,DIE LINKE.,864,5679638,0.064724
4,SPD,345,6115752,0.069694


In [21]:
result[['partei_kurz', 'tweets', 'impressions_sum', 'impressions_pct']]

Unnamed: 0,partei_kurz,tweets,impressions_sum,impressions_pct
0,AFD,5499,35012459,0.398997
1,BÜNDNIS 90/DIE GRÜNEN,1258,16883758,0.192405
2,CDU/CSU,1401,24059586,0.27418
3,DIE LINKE.,864,5679638,0.064724
4,SPD,345,6115752,0.069694


In [32]:
def plot_party_stack_tweets_engagement(
    df_party,
    tweets_pct_col: str = "tweets_pct",
    engagement_pct_col: str = "engagement_sum_pct",
    party_col: str = "partei_kurz",
    title: str | None = None,
    save_name: str | None = None,   # filename ohne Pfad/Extension
    min_inside_pct: float = 0.08,   # Schwelle: ab wann Text "inside", sonst "outside"
):
    import pandas as pd
    import plotly.graph_objects as go
    from pathlib import Path

    PARTY_COLORS = {
        "CDU/CSU": "#000000",
        "CDU": "#000000",
        "CSU": "#000000",
        "SPD": "#E3000F",
        "GRÜNE": "#1AA64A",
        "GRUENE": "#1AA64A",
        "B90/GRUENE": "#1AA64A",
        "DIE LINKE": "#BE3075",
        "LINKE": "#BE3075",
        "FDP": "#FFED00",
        "AFD": "#009EE0",
        "BSW": "#009688",
        "FW": "#F28F00",
        "SSW": "#00A3E0",
        "PIRATEN": "#FF8800",
        "PARTEI": "#9E9E9E",
        "ÖDP": "#FF6A00",
        "OEDP": "#FF6A00",
    }

    def _normalize_party_value(p: str) -> str:
        if p is None:
            return ""
        key = str(p).strip().upper()
        if key in {"CDU", "CSU"}:
            return "CDU/CSU"
        if key.startswith("GRÜN") or key.startswith("GRUEN") or "GRUENE" in key or "GRÜNE" in key or "B90" in key:
            return "GRÜNE"
        if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
            return "DIE LINKE"
        if key in {"ÖDP", "OEDP"}:
            return "ÖDP"
        if key in {"AFD", "ALTERNATIVE FÜR DEUTSCHLAND", "ALTERNATIVE FUER DEUTSCHLAND"}:
            return "AFD"
        return key

    def _is_dark_color(hex_color: str) -> bool:
        """Bestimmen, ob eine Farbe 'dunkel' ist (für Textfarbe innen)."""
        hex_color = hex_color.lstrip("#")
        r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
        brightness = (r * 299 + g * 587 + b * 114) / 1000
        return brightness < 140

    # --- basic checks ---
    for col in (party_col, tweets_pct_col, engagement_pct_col):
        if col not in df_party.columns:
            raise ValueError(f"Missing required column: {col}")

    work = df_party.copy()
    work[party_col] = work[party_col].astype(str).str.strip()

    # Zwei Balken: Anteil Tweets / Anteil Engagement
    x_vals = ["Anteil Tweets", "Anteil Impressions"]

    fig = go.Figure()

    # Ein Trace pro Partei (stacked)
    for _, row in work.iterrows():
        p = row[party_col]
        key = _normalize_party_value(p)
        color = PARTY_COLORS.get(key, "#888888")

        # y-Werte (Anteile 0–1)
        y_vals = [row[tweets_pct_col], row[engagement_pct_col]]

        # Text als Prozentangabe
        text_vals = [f"{v * 100:.1f} %" if v is not None else "" for v in y_vals]

        # Position & Textfarbe abhängig von Anteil
        text_positions = []
        text_colors = []
        for v in y_vals:
            if v is None:
                text_positions.append("outside")
                text_colors.append("#000000")
                continue

            if v >= min_inside_pct:
                # groß genug → inside
                text_positions.append("inside")
                text_colors.append("#FFFFFF" if _is_dark_color(color) else "#000000")
            else:
                # kleine Segmente → outside
                text_positions.append("outside")
                text_colors.append("#000000")

        fig.add_bar(
            name=key,
            x=x_vals,
            y=y_vals,
            marker_color=color,
            text=text_vals,
            textposition=text_positions,
            textfont=dict(color=text_colors, size=11),
            hovertemplate=(
                f"Partei: {key}<br>"
                "Kategorie: %{x}<br>"
                "Anteil: %{y:.1%}<extra></extra>"
            ),
        )

    # ---- Titel + globaler Stand-Text wie in plot_party_hbar ----
    try:
        global STAND_TEXT
        stand_text = STAND_TEXT
    except NameError:
        stand_text = None

    if title and stand_text:
        title_text = f"{title}<br><sub style='font-size:0.85em; line-height:0.5;'>{stand_text}</sub>"
        top_margin = 100
    elif title:
        title_text = title
        top_margin = 50
    elif stand_text:
        title_text = stand_text
        top_margin = 60
    else:
        title_text = None
        top_margin = 40

    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor="center",
            yanchor="top",
            yref="container",
            font=dict(size=20),
        ),
        barmode="stack",
        xaxis_title="",
        yaxis_title="Anteil",
        yaxis=dict(tickformat=".0%"),
        margin=dict(l=40, r=40, t=top_margin, b=40),
        legend_title_text="Partei",
        uniformtext_minsize=8,
        uniformtext_mode="show",
    )

    # ---- Optional: speichern ----
    if save_name:
        try:
            global GRAPHICS_DIR
        except NameError:
            raise RuntimeError("GRAPHICS_DIR not defined globally. Initialize it before calling the function.")

        save_path = Path(GRAPHICS_DIR) / f"{save_name}.png"
        fig.write_image(save_path, width=900, height=600, scale=2)
        print(f"✅ Plot saved to: {save_path}")

    return fig


In [33]:
fig = plot_party_stack_tweets_engagement(
    df_party=df_summary, 
    tweets_pct_col="n_tweets_pct" ,
    engagement_pct_col="impressions_pct",         # your aggregated df
    title="Anteile nach Partei: Tweets vs. Impressions",
    save_name="party_share_tweets_impressions"
)
fig.show()


✅ Plot saved to: C:\Users\felix\Documents\xminer\outputs\202510\graphics\party_share_tweets_impressions.png


In [30]:
def plot_party_hbar(
    df_profiles,
    y_col: str,  # e.g. "username"
    x_col: str,
    top_n: int = 10,
    party_col: str = "partei_kurz",
    title: str | None = None,
    x_label: str | None = None,
    save_name: str | None = None,   # << NEW parameter: filename without path
):
    import pandas as pd
    import plotly.graph_objects as go
    from pathlib import Path

    PARTY_COLORS = {
        "CDU/CSU": "#000000",
        "CDU": "#000000",
        "CSU": "#000000",
        "SPD": "#E3000F",
        "GRÜNE": "#1AA64A",
        "GRUENE": "#1AA64A",
        "B90/GRUENE": "#1AA64A",
        "DIE LINKE": "#BE3075",
        "LINKE": "#BE3075",
        "FDP": "#FFED00",
        "AFD": "#009EE0",
        "BSW": "#009688",
        "FW": "#F28F00",
        "SSW": "#00A3E0",
        "PIRATEN": "#FF8800",
        "PARTEI": "#9E9E9E",
        "ÖDP": "#FF6A00",
        "OEDP": "#FF6A00",
    }

    def _normalize_party_value(p: str) -> str:
        if p is None:
            return ""
        key = str(p).strip().upper()
        if key in {"CDU", "CSU"}:
            return "CDU/CSU"
        if key.startswith("GRÜN") or key.startswith("GRUEN") or "GRUENE" in key or "GRÜNE" in key or "B90" in key:
            return "GRÜNE"
        if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
            return "DIE LINKE"
        if key in {"ÖDP", "OEDP"}:
            return "ÖDP"
        if key in {"AFD", "ALTERNATIVE FÜR DEUTSCHLAND", "ALTERNATIVE FUER DEUTSCHLAND"}:
            return "AFD"
        return key

    def _resolve_party_colors(series: pd.Series) -> list[str]:
        parties = series.astype("string").fillna("")
        return [PARTY_COLORS.get(_normalize_party_value(p), "#888888") for p in parties]

    def _is_dark_color(hex_color: str) -> bool:
        """Determine if a color is dark based on brightness."""
        hex_color = hex_color.lstrip("#")
        r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
        brightness = (r * 299 + g * 587 + b * 114) / 1000
        return brightness < 140

    for col in (y_col, x_col):
        if col not in df_profiles.columns:
            raise ValueError(f"Missing required column: {col}")
    if "FULLNAME" not in df_profiles.columns:
        raise ValueError("Missing required column: FULLNAME")

    work = df_profiles.copy()
    if party_col not in work.columns:
        work[party_col] = None

    # --- combined label for y-axis ---
    work["_label"] = work["FULLNAME"].astype(str).str.strip() + " (" + work[y_col].astype(str).str.strip() + ")"

    work = work.sort_values(x_col, ascending=False).head(top_n).copy()

    categories = work["_label"].tolist()[::-1]
    work["_y_cat"] = pd.Categorical(work["_label"], categories=categories, ordered=True)

    colors = _resolve_party_colors(work[party_col])

    # Decide whether each bar is short or long
    max_x = work[x_col].max()
    threshold = 0.15 * max_x
    text_positions = ["outside" if x < threshold else "inside" for x in work[x_col]]

    # Choose text colors: white for dark bars (inside), black otherwise
    text_colors = []
    for c, pos in zip(colors, text_positions):
        if pos == "outside":
            text_colors.append("#000000")
        else:
            text_colors.append("#FFFFFF" if _is_dark_color(c) else "#000000")

    x_title = x_label or x_col

    fig = go.Figure(
        go.Bar(
            x=work[x_col],
            y=work["_y_cat"],
            orientation="h",
            marker_color=colors,
            text=[f"{v:,.0f}" for v in work[x_col]],
            textposition=text_positions,
            insidetextanchor="end",
            textfont=dict(
                color=text_colors,
            ),
            customdata=work[[party_col, y_col, "FULLNAME"]].astype(str).values,
            hovertemplate=(
                "Name: %{customdata[2]} (%{customdata[1]})<br>"
                f"{x_title}: %{{x:,.0f}}<br>"
                f"{party_col}: %{{customdata[0]}}<extra></extra>"
            ),
        )
    )

    # ---- Titel + globaler Stand-Text zusammenbauen ----
    try:
        global STAND_TEXT
        stand_text = STAND_TEXT
    except NameError:
        stand_text = None

    if title and stand_text:
        title_text = f"{title}<br><sub style='font-size:0.85em; line-height:0.5;'>{stand_text}</sub>"
        top_margin = 100
    elif title:
        title_text = title
        top_margin = 50
    elif stand_text:
        title_text = stand_text
        top_margin = 60
    else:
        title_text = None
        top_margin = 40

    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor="center",
            yanchor="top",
            yref="container",
            font=dict(size=20),
        ),
        xaxis_title=x_title,
        yaxis_title="",
        yaxis=dict(categoryorder="array", categoryarray=categories),
        bargap=0.25,
        margin=dict(l=10, r=40, t=top_margin, b=10),
        height=max(300, 35 * len(work)),
        uniformtext_minsize=8,
        uniformtext_mode="show",
    )

    fig.update_traces(cliponaxis=False, texttemplate="%{text}")

    # ---- Optional: Save the figure ----
    if save_name:
        try:
            global GRAPHICS_DIR
        except NameError:
            raise RuntimeError("GRAPHICS_DIR not defined globally. Initialize it before calling the function.")

        save_path = Path(GRAPHICS_DIR) / f"{save_name}.png"
        fig.write_image(save_path, width=900, height=600, scale=2)
        print(f"✅ Plot saved to: {save_path}")

    return fig


In [17]:
df_profiles.columns

Index(['username', 'name_curr', 'partei_kurz', 'followers_count_prev',
       'followers_count_curr', 'following_count_prev', 'following_count_curr',
       'tweet_count_prev', 'tweet_count_curr', 'listed_count_prev',
       'listed_count_curr', 'delta_followers_count', 'delta_following_count',
       'delta_tweet_count', 'delta_listed_count', 'pct_followers_count',
       'pct_following_count', 'pct_tweet_count', 'pct_listed_count',
       'retrieved_at_prev', 'retrieved_at_curr', 'snapshot_span_days',
       'AKAD_TITEL', 'VORNAME', 'NACHNAME', 'FULLNAME'],
      dtype='object')

In [None]:
def plot_party_hbar(
    df_profiles,
    y_col: str,  # e.g. "username"
    x_col: str,
    top_n: int = 10,
    party_col: str = "partei_kurz",
    title: str | None = None,
    x_label: str | None = None,
    save_name: str | None = None,   # << NEW parameter: filename without path
):
    import pandas as pd
    import plotly.graph_objects as go
    from pathlib import Path

    PARTY_COLORS = {
        "CDU/CSU": "#000000",
        "CDU": "#000000",
        "CSU": "#000000",
        "SPD": "#E3000F",
        "GRÜNE": "#1AA64A",
        "GRUENE": "#1AA64A",
        "B90/GRUENE": "#1AA64A",
        "DIE LINKE": "#BE3075",
        "LINKE": "#BE3075",
        "FDP": "#FFED00",
        "AFD": "#009EE0",
        "BSW": "#009688",
        "FW": "#F28F00",
        "SSW": "#00A3E0",
        "PIRATEN": "#FF8800",
        "PARTEI": "#9E9E9E",
        "ÖDP": "#FF6A00",
        "OEDP": "#FF6A00",
    }

    def _normalize_party_value(p: str) -> str:
        if p is None:
            return ""
        key = str(p).strip().upper()
        if key in {"CDU", "CSU"}:
            return "CDU/CSU"
        if key.startswith("GRÜN") or key.startswith("GRUEN") or "GRUENE" in key or "GRÜNE" in key or "B90" in key:
            return "GRÜNE"
        if key in {"LINKE", "DIE LINKE", "DIE LINKE."}:
            return "DIE LINKE"
        if key in {"ÖDP", "OEDP"}:
            return "ÖDP"
        if key in {"AFD", "ALTERNATIVE FÜR DEUTSCHLAND", "ALTERNATIVE FUER DEUTSCHLAND"}:
            return "AFD"
        return key

    def _resolve_party_colors(series: pd.Series) -> list[str]:
        parties = series.astype("string").fillna("")
        return [PARTY_COLORS.get(_normalize_party_value(p), "#888888") for p in parties]

    def _is_dark_color(hex_color: str) -> bool:
        """Determine if a color is dark based on brightness."""
        hex_color = hex_color.lstrip("#")
        r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
        brightness = (r * 299 + g * 587 + b * 114) / 1000
        return brightness < 140

    for col in (y_col, x_col):
        if col not in df_profiles.columns:
            raise ValueError(f"Missing required column: {col}")
    if "FULLNAME" not in df_profiles.columns:
        raise ValueError("Missing required column: FULLNAME")

    work = df_profiles.copy()
    if party_col not in work.columns:
        work[party_col] = None

    # --- combined label for y-axis ---
    work["_label"] = work["FULLNAME"].astype(str).str.strip() + " (" + work[y_col].astype(str).str.strip() + ")"

    work = work.sort_values(x_col, ascending=False).head(top_n).copy()

    categories = work["_label"].tolist()[::-1]
    work["_y_cat"] = pd.Categorical(work["_label"], categories=categories, ordered=True)

    colors = _resolve_party_colors(work[party_col])

    # Decide whether each bar is short or long
    max_x = work[x_col].max()
    threshold = 0.15 * max_x
    text_positions = ["outside" if x < threshold else "inside" for x in work[x_col]]

    # Choose text colors: white for dark bars (inside), black otherwise
    text_colors = []
    for c, pos in zip(colors, text_positions):
        if pos == "outside":
            text_colors.append("#000000")
        else:
            text_colors.append("#FFFFFF" if _is_dark_color(c) else "#000000")

    x_title = x_label or x_col

    fig = go.Figure(
        go.Bar(
            x=work[x_col],
            y=work["_y_cat"],
            orientation="h",
            marker_color=colors,
            text=[f"{v*100:.0f} %" for v in work[x_col]],
            textposition=text_positions,
            insidetextanchor="end",
            textfont=dict(
                color=text_colors,
            ),
            customdata=work[[party_col, y_col, "FULLNAME"]].astype(str).values,
            hovertemplate=(
                "Name: %{customdata[2]} (%{customdata[1]})<br>"
                f"{x_title}: %{{x:,.0f}}<br>"
                f"{party_col}: %{{customdata[0]}}<extra></extra>"
            ),
        )
    )

    # ---- Titel + globaler Stand-Text zusammenbauen ----
    try:
        global STAND_TEXT
        stand_text = STAND_TEXT
    except NameError:
        stand_text = None

    if title and stand_text:
        title_text = f"{title}<br><sub style='font-size:0.85em; line-height:0.5;'>{stand_text}</sub>"
        top_margin = 100
    elif title:
        title_text = title
        top_margin = 50
    elif stand_text:
        title_text = stand_text
        top_margin = 60
    else:
        title_text = None
        top_margin = 40

    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor="center",
            yanchor="top",
            yref="container",
            font=dict(size=20),

        ),
        xaxis_title=x_title,
        yaxis_title="",
        yaxis=dict(categoryorder="array", categoryarray=categories),
        bargap=0.25,
        margin=dict(l=10, r=40, t=top_margin, b=10),
        height=max(300, 35 * len(work)),
        uniformtext_minsize=8,
        uniformtext_mode="show",
    )
    fig.update_xaxes(tickformat=".0%")

    fig.update_traces(cliponaxis=False, texttemplate="%{text}")

    # ---- Optional: Save the figure ----
    if save_name:
        try:
            global GRAPHICS_DIR
        except NameError:
            raise RuntimeError("GRAPHICS_DIR not defined globally. Initialize it before calling the function.")

        save_path = Path(GRAPHICS_DIR) / f"{save_name}.png"
        fig.write_image(save_path, width=900, height=600, scale=2)
        print(f"✅ Plot saved to: {save_path}")

    return fig


: 