# UK - Anbindung Paid Themen 

* Um was handelt es sich hier  (Kurzbeschreibung Inhalt):
- Die Paid Data Transformatation von Outbrian


---
* QUELLEN:  
- datif_dz_{}.`02_cleaned_uk_linkedin`.`010_outbrain_current_view`


* ZIEL:  
- datif_pz_uk_{}.03_transformed.outbrain_paid_daily

---
* Versionen (aktuelle immer oben):
- 21.08.2025 Max Mustermann: init


# 1. Initialnotebooks & Libraries

In [0]:
%run ../../common/nb_init

In [0]:
target_schema_name = "03_transformed"
target_path = "funnel"

## 03-transformed

### Outbrain Paid Daily

In [0]:
spark.sql(f"""
CREATE OR REPLACE VIEW datif_pz_uk_{env}.`03_transformed`.outbrain_paid_daily AS
WITH outbrain AS (
  SELECT
    CAST(DATE_FORMAT(Date, 'yyyy-MM-dd')       AS DATE)                 AS Date,
    CAST(Promoted_Link_ID__Outbrain            AS STRING)               AS AdID,
    CAST(Campaign_Name__Outbrain               AS STRING)               AS CampaignName,
    CAST(Ad_Name__Outbrain                     AS STRING)               AS AdName,
    CAST(Content_Title__Outbrain               AS STRING)               AS Title,
    CAST(Content_URL__Outbrain                 AS STRING)               AS Content_URL,
    CAST(Image_URL__Outbrain                   AS STRING)               AS Image_URL,
    CAST(Status__Outbrain                      AS STRING)               AS Status,
    COALESCE(ROUND(CAST(Spend__Outbrain        AS DOUBLE), 2), 0)     AS Amount_Spend,
    COALESCE(CAST(Clicks__Outbrain             AS INT), 0)              AS Clicks,
    COALESCE(CAST(Impressions__Outbrain        AS INT), 0)              AS Impressions,
    COALESCE(CAST(Clicks_on_Video__Outbrain    AS INT), 0)              AS Link_Clicks,
    COALESCE(CAST(Video_Plays__Outbrain        AS INT), 0)              AS Video_View,
    COALESCE(CAST(Video_Average_View_Duration__Outbrain AS INT), 0)     AS Video_Average_View
  FROM datif_dz_{env}.`02_cleaned_uk_outbrain`.`010_outbrain_current_view`
),
ga4 AS (
  SELECT
    CAST(Session_campaign___GA4__Google_Analytics               AS STRING)               AS CampaignName,
    CAST(DATE_FORMAT(Date, 'yyyy-MM-dd')                        AS DATE)                 AS Date,
    COALESCE(CAST(SUM(Sessions___GA4__Google_Analytics)         AS INT), 0)              AS Sessions
  FROM datif_dz_{env}.`02_cleaned_uk_ga4`.`008_ga4_sessions_views_path_current_view`
  GROUP BY Session_campaign___GA4__Google_Analytics, Date
)
SELECT
  o.Date,
  o.AdID,
  o.CampaignName,
  o.AdName,
  o.Title,
  o.Content_URL,
  o.Image_URL,
  o.Status,
  o.Amount_Spend,
  o.Clicks,
  o.Impressions,
  o.Link_Clicks,
  o.Video_View,
  o.Video_Average_View,
  COALESCE(g.Sessions, 0) AS Sessions
FROM outbrain o
LEFT JOIN ga4 g
  ON o.CampaignName = g.CampaignName
  AND o.Date         = g.Date
""")


### Outbrain Paid Total

In [0]:
%python
df_daily = spark.read.table(f"datif_pz_uk_{env}.03_transformed.outbrain_paid_daily")

df_total = (
    df_daily.groupBy("AdID").agg(
        F.first("AdID").alias("ID"),
        F.min("Date").alias("CreatedDate").cast('date'),
        F.first("CampaignName").alias("CampaignName"),
        F.first("AdName").alias("AdName"),
        F.round(F.sum("Amount_Spend"),2).alias("TotalSpend").cast('Double'),
        F.sum("Impressions").alias("TotalImpressions").cast('integer'),
        F.sum("Clicks").alias("TotalClicks").cast('integer'),
        F.sum("Link_Clicks").alias("TotalLinkClicks").cast('integer'),
        F.sum("Video_View").alias("TotalVideoView").cast('integer'),
        F.sum("Video_Average_View").alias("TotalVideo_Average_View").cast('integer'), 
        F.sum("Sessions").alias("TotalSessions").cast('integer'),
    ).drop("AdID")
    .withColumn(
        "CPM",
        F.coalesce(
            F.round(
                F.when(F.col("TotalImpressions") == 0, 0).otherwise(
                    (F.col("TotalSpend") / F.col("TotalImpressions")) * 1000
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CPC",
        F.coalesce(
            F.round(
                F.when(F.col("TotalClicks") == 0, 0).otherwise(
                    F.col("TotalSpend") / F.col("TotalClicks")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CTR",
        F.coalesce(
            F.round(
                F.when(F.col("TotalImpressions") == 0, 0).otherwise(
                    F.col("TotalClicks") / F.col("TotalImpressions")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CPV",
        F.coalesce(
            F.round(
                F.when(F.col("TotalVideoView") == 0, 0).otherwise(
                    F.col("TotalSpend") / F.col("TotalVideoView")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "VideoViewRate",
        F.coalesce(
            F.round(
                F.when(F.col("TotalImpressions") == 0, 0).otherwise(
                    F.col("TotalVideoView") / F.col("TotalImpressions")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CPS",
        F.coalesce(
            F.round(
                F.when(F.col("TotalSessions") == 0, 0).otherwise(
                    F.col("TotalSpend") / F.col("TotalSessions")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "Click-to-Session",
        F.coalesce(
            F.round(
                F.when(F.col("TotalClicks") == 0, 0).otherwise(
                    F.col("TotalSessions") / F.col("TotalClicks")
                ), 4
            ),
            F.lit(0)
        )
    )
    .drop("Clicks", "Link_Clicks", "Video_View", "Video_Average_View")

)

fn_overwrite_table(df_source=df_total, target_schema_name=target_schema_name, target_table_name="outbrain_paid_total", target_path=target_path)