# UK - Anbindung Paid Themen 

* Um was handelt es sich hier  (Kurzbeschreibung Inhalt):
- Alle SoMe Total Tabellen in einer consolidierten Tabelle


---
* QUELLEN:  
- datif_pz_uk_{}.03_transformed.linkedin_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.youtube_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.meta_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.outbrain_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.instagram_organic_stories_total


* ZIEL:  
- datif_pz_uk_{}.03_transformed.consolidated_socials_paid_organic_total


---
* Versionen (aktuelle immer oben):
- 29.10.2025 Max Mustermann: Add Engagements Columns
- 07.10.2025 Max Mustermann: init

#Imports

In [0]:
from pyspark.sql.functions import col, lit, to_date, year, udf, concat_ws, when
from pyspark.sql.types import StringType, StructType, StructField
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [0]:
%run ../../common/nb_init

#Functions

# Target Schema

In [0]:
target_schema_name = '03_transformed'
target_path = 'funnel'

# Function for strategy topics

In [0]:
strategische_spalten = [
    'Strategie2030', 'FinanzierungEnergiewende', 'EMobilitaet',
    'VernetzeEnergiewelt', 'TransformationGasnetzeWasserstoff', 'ErneuerbareEnergien',
    'DisponibleErzeugung', 'IntelligenteStromnetze', 'AAlsArbeitgeberIn',
    'NachhaltigkeitCSRESG', 'MarkeA'
]

def get_top_topic(*args):
    thema_werte = dict(zip(strategische_spalten, args))
    top = max(thema_werte.items(), key=lambda x: x[1] if x[1] is not None else 0)
    return top[0] if top[1] is not None and top[1] > 0.8 else None

get_top_thema_udf = F.udf(get_top_topic, StringType())

# Function for topics

In [0]:
def get_top_3_topics(*args):
    thema_werte = dict(zip(strategische_spalten, args))
    # Nur Werte >= 0.8 behalten
    gefiltert = [(thema, wert) for thema, wert in thema_werte.items() if wert is not None and wert >= 0.8]
    # Sortiert nach Wert absteigend
    top3 = sorted(gefiltert, key=lambda x: x[1], reverse=True)[:3]
    # Liste auf Länge 3 bringen (auffüllen mit None)
    return [t[0] for t in top3] + ["Kein strategisches Thema"] * (3 - len(top3))

top3_schema = StructType([
    StructField("StrategischesThema1", StringType(), True),
    StructField("StrategischesThema2", StringType(), True),
    StructField("StrategischesThema3", StringType(), True),
])

get_top3_thema_udf = udf(get_top_3_topics, top3_schema)

#Configure Tables

In [0]:
relevant_tables = [
    {
        'table_name': '03_transformed.youtube_paid_organic_total',
        # --- Allgemein ---
        'organic_id': 'OrganicID',
        'paid_id': 'AdID',
        'source': 'Source',
        'channel': 'YouTube',
        'organic_created_date': 'Organic_CreatedDate',
        'paid_created_date': 'Paid_CreatedDate',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'url': 'VideoURL',
        'post_type': 'Video',
        'title': 'VideoDescription',
        'owner': 'Owner',
        'spend': 'TotalSpend',
        # --- All_ ---
        'all_impressions': 'All_TotalViews',
        'all_engagement': 'All_TotalEngagements',
        'all_weighted_engagement': 'All_TotalWeightedEngagements',
        'all_total_likes': 'All_TotalLikes',
        'all_total_dislikes': 'All_TotalDislikes',
        'all_total_comments': 'All_TotalComments',
        'all_total_shares': 'All_TotalShares',
        'all_total_clicks': 'KPI existiert nicht',
        'all_total_reactions': 'KPI existiert nicht',
        'all_average_view_duration': 'All_AverageViewDuration',
        # --- Organic_ ---
        'organic_impressions': 'Organic_TotalViews',
        'organic_engagement': 'Organic_TotalEngagements',
        'organic_weighted_engagement': 'KPI existiert nicht',
        'organic_engagement_rating': 'KPI existiert nicht',
        'organic_total_likes': 'KPI existiert nicht',
        'organic_total_comments': 'KPI existiert nicht',
        'organic_total_shares': 'KPI existiert nicht',
        'organic_total_clicks': 'KPI existiert nicht',
        'organic_total_reactions': 'KPI existiert nicht',
        # --- Paid_ ---
        'paid_impressions': 'Paid_TotalImpressions',
        'paid_engagement': 'Paid_TotalEngagements',
        'paid_weighted_engagement': 'KPI existiert nicht',
        'paid_engagement_rating': 'KPI existiert nicht',
        'paid_total_likes': 'KPI existiert nicht',
        'paid_total_comments': 'KPI existiert nicht',
        'paid_total_shares': 'KPI existiert nicht',
        'paid_total_clicks': 'KPI existiert nicht',
        'paid_total_reactions': 'KPI existiert nicht',
        # --- KPIs ---
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'VideoViewRate',
        'vtr': 'VTR',
        'cps': 'KPI existiert nicht',
        'click_to_session': 'KPI existiert nicht',
    },
    {
        'table_name': '03_transformed.linkedIn_paid_organic_total',
        # --- Allgemein ---
        'organic_id': 'OrganicID',
        'paid_id': 'AdID',
        'source': 'Source',
        'channel': 'LinkedIn',
        'organic_created_date': 'Organic_CreatedDate',
        'paid_created_date': 'Paid_CreatedDate',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'url': 'URL',
        'post_type': 'PostType',
        'title': 'Title',
        'owner': 'Existiert nicht',
        'spend': 'TotalSpend',
        # --- All_ ---
        'all_impressions': 'All_TotalImpressions',
        'all_engagement': 'All_TotalEngagements',
        'all_weighted_engagement': 'All_TotalWeightedEngagements',
        'all_total_shares': 'All_TotalShares',
        'all_total_comments': 'All_TotalComments',
        'all_total_likes': 'All_TotalLikes',
        'all_total_clicks': 'All_TotalClicks',
        'all_total_dislikes': 'KPI existiert nicht',
        'all_total_reactions': 'KPI existiert nicht',
        'all_average_view_duration': 'KPI existiert nicht',
        # --- Organic_ ---
        'organic_impressions': 'Organic_TotalImpressions',
        'organic_engagement': 'Organic_TotalEngagements',
        'organic_weighted_engagement': 'Organic_TotalWeightedEngagements',
        'organic_engagement_rating': 'Organic_TotalEngagementRateInPercent',
        'organic_total_shares': 'Organic_TotalShares',
        'organic_total_comments': 'Organic_TotalComments',
        'organic_total_likes': 'Organic_TotalLikes',
        'organic_total_clicks': 'Organic_TotalClicks',
        'organic_total_reactions': 'KPI existiert nicht',
        # --- Paid_ ---
        'paid_impressions': 'Paid_TotalImpressions',
        'paid_engagement': 'Paid_TotalEngagements',
        'paid_weighted_engagement': 'Paid_TotalWeightedEngagements',
        'paid_engagement_rating': 'Paid_TotalEngagementRating',
        'paid_total_shares': 'Paid_TotalShares',
        'paid_total_comments': 'Paid_TotalComments',
        'paid_total_likes': 'Paid_TotalLikes',
        'paid_total_clicks': 'Paid_TotalClicks',
        'paid_total_reactions': 'KPI existiert nicht',
        # --- KPIs ---
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'VideoViewRate',
        'vtr': 'KPI existiert nicht',
        'cps': 'KPI existiert nicht',
        'click_to_session': 'KPI existiert nicht',
    },
    {
        'table_name': '03_transformed.meta_paid_organic_total',
        # --- Allgemein ---
        'organic_id': 'OrganicID',
        'paid_id': 'AdID',
        'source': 'Source',
        'channel': 'Plattform',
        'organic_created_date': 'Organic_CreatedDate',
        'paid_created_date': 'Paid_CreatedDate',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'url': 'URL',
        'post_type': 'PostType',
        'title': 'PostMessage',
        'owner': 'Existiert nicht',
        'spend': 'TotalSpend',
        # --- All_ ---
        'all_impressions': 'All_TotalImpressions',
        'all_engagement': 'All_TotalEngagements',
        'all_weighted_engagement': 'All_TotalWeightedEngagements',
        'all_total_shares': 'All_TotalShares',
        'all_total_clicks': 'All_TotalClicks',
        'all_total_comments': 'All_TotalComments',
        'all_total_reactions': 'All_TotalReactions',
        'all_total_likes': 'KPI existiert nicht',
        'all_total_dislikes': 'KPI existiert nicht',
        'all_average_view_duration': 'KPI existiert nicht',
        # --- Organic_ ---
        'organic_impressions': 'Organic_TotalImpressions',
        'organic_engagement': 'Organic_TotalEngagements',
        'organic_weighted_engagement': 'Organic_TotalWeightedEngagements',
        'organic_engagement_rating': 'Organic_EngagementRateInPercent',
        'organic_total_shares': 'Organic_TotalShares',
        'organic_total_clicks': 'Organic_TotalClicks',
        'organic_total_comments': 'Organic_TotalComments',
        'organic_total_reactions': 'Organic_TotalReactions',
        'organic_total_likes': 'KPI existiert nicht',
        # --- Paid_ ---
        'paid_impressions': 'Paid_TotalImpressions',
        'paid_engagement': 'Paid_TotalEngagements',
        'paid_weighted_engagement': 'Paid_TotalWeightedEngagements',
        'paid_engagement_rating': 'Paid_TotalEngagementRating',
        'paid_total_shares': 'Paid_TotalShares',
        'paid_total_clicks': 'Paid_TotalClicks',
        'paid_total_comments': 'Paid_TotalComments',
        'paid_total_reactions': 'Paid_TotalReactions',
        'paid_total_likes': 'KPI existiert nicht',
        # --- KPIs ---
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'VideoViewRate',
        'vtr': 'KPI existiert nicht',
        'cps': 'KPI existiert nicht',
        'click_to_session': 'KPI existiert nicht',
    },
    {
        'table_name': '03_transformed.outbrain_paid_total',
        # --- Allgemein ---
        'organic_id': 'Existiert nicht',
        'paid_id': 'ID',
        'source': 'Paid',
        'channel': 'Outbrain',
        'organic_created_date': 'Existiert nicht',
        'paid_created_date': 'CreatedDate',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'url': 'Existiert nicht',
        'post_type': 'Existiert nicht',
        'title': 'Existiert nicht',
        'owner': 'Existiert nicht',
        'spend': 'KPI existiert nicht',
        # --- All_ ---
        'all_impressions': 'TotalImpressions',
        'all_engagement': 'KPI existiert nicht',
        'all_weighted_engagement': 'KPI existiert nicht',
        'all_total_likes': 'KPI existiert nicht',
        'all_total_dislikes': 'KPI existiert nicht',
        'all_total_comments': 'KPI existiert nicht',
        'all_total_shares': 'KPI existiert nicht',
        'all_total_clicks': 'KPI existiert nicht',
        'all_total_reactions': 'KPI existiert nicht',
        'all_average_view_duration': 'KPI existiert nicht',
        # --- Organic_ ---
        'organic_impressions': 'KPI existiert nicht',
        'organic_engagement': 'KPI existiert nicht',
        'organic_weighted_engagement': 'KPI existiert nicht',
        'organic_engagement_rating': 'KPI existiert nicht',
        'organic_total_likes': 'KPI existiert nicht',
        'organic_total_comments': 'KPI existiert nicht',
        'organic_total_shares': 'KPI existiert nicht',
        'organic_total_clicks': 'KPI existiert nicht',
        'organic_total_reactions': 'KPI existiert nicht',
        'organic_total_saved': 'KPI existiert nicht',
        # --- Paid_ ---
        'paid_impressions': 'TotalImpressions',
        'paid_engagement': 'KPI existiert nicht',
        'paid_weighted_engagement': 'KPI existiert nicht',
        'paid_engagement_rating': 'KPI existiert nicht',
        'paid_total_likes': 'KPI existiert nicht',
        'paid_total_comments': 'KPI existiert nicht',
        'paid_total_shares': 'KPI existiert nicht',
        'paid_total_clicks': 'Paid_Clicks',
        'paid_total_reactions': 'KPI existiert nicht',
        # --- KPIs ---
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'KPI existiert nicht',
        'vtr': 'KPI existiert nicht',
        'cps': 'CPS',
        'click_to_session': 'Click-to-Session',
    },
    {
        'table_name': '03_transformed.instagram_organic_stories_total',
        # --- Allgemein ---
        'organic_id': 'StoryID',
        'paid_id': 'Existiert nicht',
        'source': 'Organic',
        'channel': 'Instagram',
        'organic_created_date': 'CreatedDate',
        'paid_created_date': 'Existiert nicht',
        'campaign_name': 'Existiert nicht',
        'ad_name': 'Existiert nicht',
        'url': 'Existiert nicht',
        'post_type': 'Story',
        'title': 'Caption',
        'owner': 'Existiert nicht',
        'spend': 'KPI existiert nicht',
        # --- All_ ---
        'all_impressions': 'TotalImpressions',
        'all_engagement': 'TotalEngagements',
        'all_weighted_engagement': 'TotalWeightedEngagements',
        'all_total_likes': 'KPI existiert nicht',
        'all_total_comments': 'TotalReplies',
        'all_total_shares': 'TotalShares',
        'all_total_clicks': 'KPI existiert nicht',
        'all_total_reactions': 'KPI existiert nicht',
        'all_average_view_duration': 'KPI existiert nicht',
        # --- Organic_ ---
        'organic_impressions': 'TotalImpressions',
        'organic_engagement': 'TotalEngagements',
        'organic_weighted_engagement': 'TotalWeightedEngagements',
        'organic_engagement_rating': 'TemporaryEngagementRateInPercent',
        'organic_total_likes': 'KPI existiert nicht',
        'organic_total_comments': 'TotalReplies',
        'organic_total_shares': 'TotalShares',
        'organic_total_clicks': 'KPI existiert nicht',
        'organic_total_reactions': 'KPI existiert nicht',
        # --- Paid_ ---
        'paid_impressions': 'KPI existiert nicht',
        'paid_engagement': 'KPI existiert nicht',
        'paid_weighted_engagement': 'KPI existiert nicht',
        'paid_engagement_rating': 'KPI existiert nicht',
        'paid_total_likes': 'KPI existiert nicht',
        'paid_total_comments': 'KPI existiert nicht',
        'paid_total_shares': 'KPI existiert nicht',
        'paid_total_clicks': 'KPI existiert nicht',
        'paid_total_reactions': 'KPI existiert nicht',
        # --- KPIs ---
        'cpm': 'KPI existiert nicht',
        'cpc': 'KPI existiert nicht',
        'ctr': 'KPI existiert nicht',
        'cpv': 'KPI existiert nicht',
        'video_view_rate': 'KPI existiert nicht',
        'vtr': 'KPI existiert nicht',
        'cps': 'KPI existiert nicht',
        'click_to_session': 'KPI existiert nicht',
    }
]


# Combine relevant Tables

In [0]:
schema = T.StructType([
    # --- IDs & Allgemein ---
    T.StructField("OrganicID", T.StringType(), True),
    T.StructField("PaidID", T.StringType(), True),
    T.StructField("Source", T.StringType(), True),
    T.StructField("Channel", T.StringType(), True),
    T.StructField("Organic_CreatedDate", T.StringType(), True),
    T.StructField("Paid_CreatedDate", T.StringType(), True),
    T.StructField("CampaignName", T.StringType(), True),
    T.StructField("AdName", T.StringType(), True),
    T.StructField("URL", T.StringType(), True),
    T.StructField("PostType", T.StringType(), True),
    T.StructField("Title", T.StringType(), True),
    T.StructField("Owner", T.StringType(), True),
    T.StructField("TotalSpend", T.StringType(), True),

    # --- Impressions ---
    T.StructField("All_TotalImpressions", T.StringType(), True),
    T.StructField("Organic_TotalImpressions", T.StringType(), True),
    T.StructField("Paid_TotalImpressions", T.StringType(), True),

    # --- Engagements ---
    T.StructField("All_TotalEngagements", T.StringType(), True),
    T.StructField("Organic_TotalEngagements", T.StringType(), True),
    T.StructField("Paid_TotalEngagements", T.StringType(), True),

    # --- Weighted Engagements ---
    T.StructField("All_TotalWeightedEngagements", T.StringType(), True),
    T.StructField("Organic_TotalWeightedEngagements", T.StringType(), True),
    T.StructField("Paid_TotalWeightedEngagements", T.StringType(), True),

    # --- Engagement Rate / Rating ---
    T.StructField("Organic_TotalEngagementRateInPercent", T.StringType(), True),
    T.StructField("Paid_TotalEngagementRating", T.StringType(), True),

    # --- Likes ---
    T.StructField("All_TotalLikes", T.StringType(), True),
    T.StructField("Organic_TotalLikes", T.StringType(), True),
    T.StructField("Paid_TotalLikes", T.StringType(), True),

    # --- Dislikes ---
    T.StructField("All_TotalDislikes", T.StringType(), True),

    # --- Comments ---
    T.StructField("All_TotalComments", T.StringType(), True),
    T.StructField("Organic_TotalComments", T.StringType(), True),
    T.StructField("Paid_TotalComments", T.StringType(), True),

    # --- Shares ---
    T.StructField("All_TotalShares", T.StringType(), True),
    T.StructField("Organic_TotalShares", T.StringType(), True),
    T.StructField("Paid_TotalShares", T.StringType(), True),

    # --- Clicks ---
    T.StructField("All_TotalClicks", T.StringType(), True),
    T.StructField("Organic_TotalClicks", T.StringType(), True),
    T.StructField("Paid_TotalClicks", T.StringType(), True),

    # --- Reactions ---
    T.StructField("All_TotalReactions", T.StringType(), True),
    T.StructField("Organic_TotalReactions", T.StringType(), True),
    T.StructField("Paid_TotalReactions", T.StringType(), True),

    # --- View Duration ---
    T.StructField("All_AverageViewDuration", T.StringType(), True),

    # --- KPIs ---
    T.StructField("CPM", T.StringType(), True),
    T.StructField("CPC", T.StringType(), True),
    T.StructField("CTR", T.StringType(), True),
    T.StructField("CPV", T.StringType(), True),
    T.StructField("VideoViewRate", T.StringType(), True),
    T.StructField("VTR", T.StringType(), True),
    T.StructField("CPS", T.StringType(), True),
    T.StructField("Click_to_Session", T.StringType(), True),

    # --- Topics ---
    T.StructField("StrategischesThema1", T.StringType(), True),
    T.StructField("StrategischesThema2", T.StringType(), True),
    T.StructField("StrategischesThema3", T.StringType(), True),
    T.StructField("Themenbereich1", T.StringType(), True),
    T.StructField("Themenbereich2", T.StringType(), True),
    T.StructField("Themenbereich3", T.StringType(), True),
])

df_consolidated = spark.createDataFrame([], schema=schema)


In [0]:
# ---------------------------------------------------------------------
# Hilfsfunktion für sichere Spaltenauswahl
# ---------------------------------------------------------------------
def _safe_col_from(df, name: str):
    """
    Gibt eine String-Spalte zurück:
      - Wenn name 'Fehlt noch'/'Existiert nicht'/leer -> Literal "Existiert nicht"
      - Wenn die Spalte im df nicht existiert         -> Literal "Existiert nicht"
      - Wenn 'KPI existiert nicht'                    -> Literal 0
      - Wenn 'Organic'/'Paid'/'Story'/'Video'         -> Literal des Werts
      - Sonst                                         -> df[name] (als string gecastet)
    """
    key = str(name).strip() if name is not None else ""
    if key in {"Fehlt noch", "Existiert nicht", "", None}:
        return F.lit("Existiert nicht")
    if key in {"KPI existiert nicht"}:
        return F.lit(0)
    if key in {"Organic", "Paid", "Story", "Video"}:
        return F.lit(key)
    if key not in df.columns:
        return F.lit("Existiert nicht")
    return F.col(key).cast("string")

# ---------------------------------------------------------------------
# Hauptlogik: Vereinheitlichung aller Tabellen
# ---------------------------------------------------------------------
df_consolidated = None

for t in relevant_tables:
    df = spark.read.table(f"datif_pz_uk_{env}.{t['table_name']}")

    # Themenfelder: Fallback für Kanäle ohne Themen
    if t['table_name'] in [
        "03_transformed.instagram_organic_stories_total",
        "03_transformed.outbrain_paid_total"
    ]:
        df = df.withColumn("StrategischesThema1", F.lit("Kein strategisches Thema"))
        df = df.withColumn("StrategischesThema2", F.lit("Kein strategisches Thema"))
        df = df.withColumn("StrategischesThema3", F.lit("Kein strategisches Thema"))
        df = df.withColumn("Themenbereich1", F.lit("Kein Themenbereich"))
        df = df.withColumn("Themenbereich2", F.lit("Kein Themenbereich"))
        df = df.withColumn("Themenbereich3", F.lit("Kein Themenbereich"))
    else:
        df = df.withColumn(
            "StrategischeThemen",
            get_top3_thema_udf(*[F.col(col) for col in strategische_spalten])
        ).selectExpr("*",
            "StrategischeThemen.StrategischesThema1",
            "StrategischeThemen.StrategischesThema2",
            "StrategischeThemen.StrategischesThema3"
        ).drop("StrategischeThemen")

        df = df.withColumn("Themenbereich1", F.when(F.col("Themenbereich1_Conf") >= 80, F.col("Themenbereich1")))
        df = df.withColumn("Themenbereich2", F.when(F.col("Themenbereich2_Conf") >= 80, F.col("Themenbereich2")))
        df = df.withColumn("Themenbereich3", F.when(F.col("Themenbereich3_Conf") >= 80, F.col("Themenbereich3")))

    # Fallback für alte Spaltennamen
    if t.get('video_view_rate') == 'View Rate':
        t = {**t, 'video_view_rate': 'VideoViewRate'}

    # --------------------------------------------------------------
    # Vereinheitlichte Auswahl aller Spalten (angepasst)
    # --------------------------------------------------------------
    df_sel = df.select(
        # --- Allgemein ---
        _safe_col_from(df, t.get('organic_id')).alias("OrganicID"),
        _safe_col_from(df, t.get('paid_id')).alias("PaidID"),
        _safe_col_from(df, t.get('source')).alias("Source"),
        # Änderung 1: Channel nur bei Meta dynamisch
        F.when(F.lit(t['channel']) == "Plattform", _safe_col_from(df, t.get('channel')))
         .otherwise(F.lit(t.get('channel', 'Existiert nicht'))).alias("Channel"),
        _safe_col_from(df, t.get('organic_created_date')).alias("Organic_CreatedDate"),
        _safe_col_from(df, t.get('paid_created_date')).alias("Paid_CreatedDate"),
        _safe_col_from(df, t.get('campaign_name')).alias("CampaignName"),
        _safe_col_from(df, t.get('ad_name')).alias("AdName"),
        _safe_col_from(df, t.get('url')).alias("URL"),
        _safe_col_from(df, t.get('post_type')).alias("PostType"),
        _safe_col_from(df, t.get('title')).alias("Title"),
        _safe_col_from(df, t.get('owner')).alias("Owner"),
        _safe_col_from(df, t.get('spend')).alias("TotalSpend"),

        # --- Impressions ---
        _safe_col_from(df, t.get('all_impressions')).alias("All_TotalImpressions"),
        _safe_col_from(df, t.get('organic_impressions')).alias("Organic_TotalImpressions"),
        _safe_col_from(df, t.get('paid_impressions')).alias("Paid_TotalImpressions"),

        # --- Engagements ---
        _safe_col_from(df, t.get('all_engagement')).alias("All_TotalEngagements"),
        _safe_col_from(df, t.get('organic_engagement')).alias("Organic_TotalEngagements"),
        _safe_col_from(df, t.get('paid_engagement')).alias("Paid_TotalEngagements"),

        # --- Weighted Engagements (Änderung 2: auf 2 Nachkommastellen) ---
        F.round(_safe_col_from(df, t.get('all_weighted_engagement')).cast("double"), 2).alias("All_TotalWeightedEngagements"),
        F.round(_safe_col_from(df, t.get('organic_weighted_engagement')).cast("double"), 2).alias("Organic_TotalWeightedEngagements"),
        F.round(_safe_col_from(df, t.get('paid_weighted_engagement')).cast("double"), 2).alias("Paid_TotalWeightedEngagements"),

        # --- Engagement Rate (Änderung 3: später 3 Nachkommastellen) ---
        # --- Engagement Rate (inkl. All neu berechnet & richtig positioniert) ---
        F.round(
            F.when(
                (_safe_col_from(df, t.get('all_impressions')).cast("double") > 0),
                (
                    _safe_col_from(df, t.get('all_weighted_engagement')).cast("double") /
                    _safe_col_from(df, t.get('all_impressions')).cast("double")
                ) * 100
            ).otherwise(0),
            3
        ).alias("All_TotalEngagementRateInPercent"),
        F.round(_safe_col_from(df, t.get('organic_engagement_rating')).cast("double"), 3).alias("Organic_TotalEngagementRateInPercent"),
        F.round(_safe_col_from(df, t.get('paid_engagement_rating')).cast("double"), 3).alias("Paid_TotalEngagementRating"),

        # --- Likes ---
        _safe_col_from(df, t.get('all_total_likes')).alias("All_TotalLikes"),
        _safe_col_from(df, t.get('organic_total_likes')).alias("Organic_TotalLikes"),
        _safe_col_from(df, t.get('paid_total_likes')).alias("Paid_TotalLikes"),

        # --- Dislikes ---
        _safe_col_from(df, t.get('all_total_dislikes')).alias("All_TotalDislikes"),
        _safe_col_from(df, t.get('organic_total_dislikes')).alias("Organic_TotalDislikes"),
        _safe_col_from(df, t.get('paid_total_dislikes')).alias("Paid_TotalDislikes"),

        # --- Comments ---
        _safe_col_from(df, t.get('all_total_comments')).alias("All_TotalComments"),
        _safe_col_from(df, t.get('organic_total_comments')).alias("Organic_TotalComments"),
        _safe_col_from(df, t.get('paid_total_comments')).alias("Paid_TotalComments"),

        # --- Shares ---
        _safe_col_from(df, t.get('all_total_shares')).alias("All_TotalShares"),
        _safe_col_from(df, t.get('organic_total_shares')).alias("Organic_TotalShares"),
        _safe_col_from(df, t.get('paid_total_shares')).alias("Paid_TotalShares"),

        # --- Clicks ---
        _safe_col_from(df, t.get('all_total_clicks')).alias("All_TotalClicks"),
        _safe_col_from(df, t.get('organic_total_clicks')).alias("Organic_TotalClicks"),
        _safe_col_from(df, t.get('paid_total_clicks')).alias("Paid_TotalClicks"),

        # --- Reactions ---
        _safe_col_from(df, t.get('all_total_reactions')).alias("All_TotalReactions"),
        _safe_col_from(df, t.get('organic_total_reactions')).alias("Organic_TotalReactions"),
        _safe_col_from(df, t.get('paid_total_reactions')).alias("Paid_TotalReactions"),

        # --- Saved ---
        _safe_col_from(df, t.get('all_total_saved')).alias("All_TotalSaved"),
        _safe_col_from(df, t.get('organic_total_saved')).alias("Organic_TotalSaved"),
        _safe_col_from(df, t.get('paid_total_saved')).alias("Paid_TotalSaved"),

        # --- View Duration ---
        _safe_col_from(df, t.get('all_average_view_duration')).alias("All_AverageViewDuration"),

        # --- KPIs ---
        F.round(_safe_col_from(df, t.get('cpm')),2).alias("CPM"),
        F.round(_safe_col_from(df, t.get('cpc')),2).alias("CPC"),
        F.round(_safe_col_from(df, t.get('ctr')),2).alias("CTR"),
        F.round(_safe_col_from(df, t.get('cpv')),2).alias("CPV"),
        F.round(_safe_col_from(df, t.get('video_view_rate')),2).alias("VideoViewRate"),
        F.round(_safe_col_from(df, t.get('vtr')),2).alias("VTR"),
        F.round(_safe_col_from(df, t.get('cps')),2).alias("CPS"),
        F.round(_safe_col_from(df, t.get('click_to_session')),2).alias("Click_to_Session"),

        # --- Topics ---
        F.col("StrategischesThema1"),
        F.col("StrategischesThema2"),
        F.col("StrategischesThema3"),
        F.col("Themenbereich1"),
        F.col("Themenbereich2"),
        F.col("Themenbereich3"),
    )

    # # --- Berechnung: All_TotalEngagementRateInPercent (gerundet auf 3 Stellen) ---
    # df_sel = df_sel.withColumn(
    #     "All_TotalEngagementRateInPercent",
    #     F.round(
    #         F.when(
    #             (F.col("All_TotalImpressions").cast("double") > 0),
    #             (F.col("All_TotalWeightedEngagements").cast("double") /
    #              F.col("All_TotalImpressions").cast("double")) * 100
    #         ).otherwise(0),
    #         3
    #     )
    # )

    # Union aller Quellen
    df_consolidated = df_sel if df_consolidated is None else df_consolidated.unionByName(df_sel, allowMissingColumns=True)

# Anzeige
# df_consolidated.display()


### Einheitliches PostType Mapping

In [0]:
post_type_mapping = {
    ("YouTube", "Video"): "Video",
    ("Facebook", "video_direct_response"): "Video",
    ("Facebook", "video_inline"): "Video",
    ("Facebook", "Ohne Post Type"): "Other",
    ("Facebook", "album"): "Image",
    ("Facebook", "share"): "Other",
    ("Facebook", "photo"): "Image",
    ("Facebook", "profile_media"): "Other",
    ("Facebook", "cover_photo"): "Other",
    ("Facebook", "multi_share"): "Carousel / Document",
    ("Facebook", "multi_share_no_end_card"): "Other",
    ("Instagram", "VIDEO"): "Video",
    ("Instagram", "IMAGE"): "Image",
    ("Instagram", "CAROUSEL_ALBUM"): "Carousel / Document",
    ("Instagram", "Story"): "Story",
    ("LinkedIn", "Article"): "Other",
    ("LinkedIn", "Repost"): "Other",
    ("LinkedIn", "Image"): "Image",
    ("LinkedIn", "Poll"): "Poll",
    ("LinkedIn", "Document"): "Carousel / Document",
    ("LinkedIn", "Video"): "Video",


}



def map_post_type(channel, post_type):
    key = (channel, post_type)
    return post_type_mapping.get(key, "Keine Zuordnung möglich")

map_post_type_udf = udf(map_post_type, StringType())


df_consolidated = df_consolidated.withColumn(
    "PostTypeGeneralized",
    map_post_type_udf(F.col("Channel"), F.col("PostType"))
)

df_consolidated = df_consolidated.drop("PostType")
df_consolidated = df_consolidated.withColumnRenamed("PostTypeGeneralized", "PostType")

#Write Table

In [0]:
fn_overwrite_table(df_source=df_consolidated, target_schema_name=target_schema_name, target_table_name="consolidated_socials_paid_organic_total", target_path=target_path)