# UK - Anbindung Paid Themen 

* Um was handelt es sich hier  (Kurzbeschreibung Inhalt):
- Alle SoMe Total Tabellen in einer consolidierten Tabelle


---
* QUELLEN:  
- datif_pz_uk_{}.03_transformed.linkedin_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.youtube_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.meta_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.outbrain_paid_oraganic_total
- datif_pz_uk_{}.03_transformed.instagram_organic_stories_total


* ZIEL:  
- datif_pz_uk_{}.03_transformed.consolidated_socials_paid_organic_total


---
* Versionen (aktuelle immer oben):
- 07.10.2025 Justin Stange-Heiduk: init

#Imports

In [0]:
from pyspark.sql.functions import col, lit, to_date, year, udf, concat_ws, when
from pyspark.sql.types import StringType, StructType, StructField
import pyspark.sql.functions as F

In [0]:
%run ../../common/nb_init

#Functions

# Target Schema

In [0]:
target_schema_name = '03_transformed'
target_path = 'funnel'

#Configure Tables

In [0]:
relevant_paid_tables = [
    {
        'table_name': 'datif_pz_uk_dev.03_transformed.youtube_paid_total',
        'id_column': 'ID',
        'date': 'CreatedDate',
        'channel': 'YouTube',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'spend': 'TotalSpend',
        'impressions': 'TotalImpressions',
        'weighted_engagement': 'Fehlt noch',
        'engagement': 'TotalEngagement',
        'engagement_rating': 'Fehlt noch',
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'VideoViewRate',
        'vtr': 'VTR',
        'cps': 'Existiert nicht',
        'click_to_session': 'Existiert nicht',
    
    },
    {
        'table_name': 'datif_pz_uk_dev.03_transformed.outbrain_paid_total',
        'id_column': 'ID',
        'date': 'CreatedDate',
        'channel': 'Outbrain',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'spend': 'TotalSpend',
        'impressions': 'TotalImpressions',
        'weighted_engagement': 'Fehlt noch',
        'engagement': 'Fehlt noch',
        'engagement_rating': 'Fehlt noch',
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'VideoViewRate',
        'vtr': 'Existiert nicht',
        'cps': 'CPS',
        'click_to_session': 'Clicks-to-Sessions',
    },
    {
        'table_name': 'datif_pz_uk_dev.03_transformed.meta_paid_total',
        'id_column': 'ID',
        'date': 'CreatedDate',
        'channel': 'Meta',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName',
        'spend': 'TotalSpend',
        'impressions': 'TotalImpressions',
        'weighted_engagement': 'TotalWeightedEngagement',
        'engagement': 'TotalEngagement',
        'engagement_rating': 'EngagementRating',
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'Existiert nicht',
        'vtr':'Existiert nicht',
        'cps': 'Existiert nicht',
        'click_to_session': 'Existiert nicht',
    },
    {
       'table_name': 'datif_pz_uk_dev.03_transformed.linkedin_paid_total',
        'id_column': 'ID',
        'date': 'CreatedDate',
        'channel': 'LinkedIn',
        'campaign_name': 'CampaignName',
        'ad_name': 'AdName', 
        'spend': 'TotalSpend',
        'impressions': 'TotalImpressions',
        'weighted_engagement': 'TotalWeightedEngagement',
        'engagement': 'TotalEngagement',
        'engagement_rating': 'EngagementRating',
        'cpm': 'CPM',
        'cpc': 'CPC',
        'ctr': 'CTR',
        'cpv': 'CPV',
        'video_view_rate': 'View Rate',
        'vtr': 'Existiert nicht',
        'cps': 'Existiert nicht',
        'click_to_session': 'Existiert nicht',
    },
]


# Combine relevant Tables

In [0]:
schema = T.StructType([
    T.StructField("ID", T.StringType(), True),
    T.StructField("CreatedDate", T.StringType(), True),
    T.StructField("Channel", T.StringType(), True),
    T.StructField("CampaignName", T.StringType(), True),
    T.StructField("AdName", T.StringType(), True),
    T.StructField("TotalSpend", T.StringType(), True),
    T.StructField("TotalImpressions", T.StringType(), True),
    T.StructField("TotalWeightedEngagement", T.StringType(), True),
    T.StructField("TotalEngagement", T.StringType(), True),
    T.StructField("EngagementRating", T.StringType(), True),
    T.StructField("CPM", T.StringType(), True),
    T.StructField("CPC", T.StringType(), True),
    T.StructField("CTR", T.StringType(), True),
    T.StructField("CPV", T.StringType(), True),
    T.StructField("View Rate", T.StringType(), True),
])
df_consolidated = spark.createDataFrame([], schema=schema)


In [0]:
from pyspark.sql import functions as F

def _safe_col_from(df, name: str):
    """
    Gibt eine String-Spalte zurück:
      - Wenn name 'fehlt_noch'/'Existiert nicht'/leer -> Literal "Existiert nicht"
      - Wenn die Spalte im df nicht existiert            -> Literal "Existiert nicht"
      - Sonst                                            -> df[name] (als string gecastet)
    """
    
    key = str(name).strip()
    if key in {"Fehlt noch", "Existiert nicht"}:
        return F.lit(key)
    # Spalten-Existenz prüfen (Spark ist case-sensitiv auf den tatsächlichen Namen)
    # if key not in df.columns:
    #     return F.lit(key)
    return F.col(key).cast("string")

df_consolidated = None

for t in relevant_paid_tables:
    df = spark.table(t['table_name'])

    # WICHTIG: Falls dein Mapping noch "View Rate" enthält, hier einmal korrigieren:
    # (Alternativ: Mapping oben dauerhaft auf 'VideoViewRate' ändern)
    if t.get('video_view_rate') == 'View Rate':
        t = {**t, 'video_view_rate': 'VideoViewRate'}

    df_sel = df.select(
        F.col(t['id_column']).alias("ID"),
        F.col(t['date']).alias("Date"),
        F.lit(t['channel']).alias("Channel"),
        F.col(t['campaign_name']).alias("CampaignName"),
        F.col(t['ad_name']).alias("AdName"),
        F.round(F.col(t['spend']).cast("double"), 2).alias("TotalSpend"),
        F.col(t['impressions']).alias("TotalImpressions"),
        _safe_col_from(df, t.get('weighted_engagement')).alias("TotalWeightedEngagement"),
        _safe_col_from(df, t.get('engagement')).alias("TotalEngagement"),
        _safe_col_from(df, t.get('engagement_rating')).alias("EngagementRating"),
        _safe_col_from(df, t.get('cpm')).alias("CPM"),
        _safe_col_from(df, t.get('cpc')).alias("CPC"),
        _safe_col_from(df, t.get('ctr')).alias("CTR"),
        _safe_col_from(df, t.get('cpv')).alias("CPV"),
        _safe_col_from(df, t.get('video_view_rate')).alias("VideoViewRate"),
        _safe_col_from(df, t.get('vtr')).alias("VTR"),
    )

    df_consolidated = df_sel if df_consolidated is None else df_consolidated.unionByName(df_sel)

df_consolidated.display()


### Einheitliches PostType Mapping

#Write Table

In [0]:
fn_overwrite_table(df_source=df_consolidated, target_schema_name=target_schema_name, target_table_name="consolidated_socials_paid_total", target_path=target_path)