# UK - Anbindung Paid Themen 

* Um was handelt es sich hier  (Kurzbeschreibung Inhalt):
- Die Paid Data Transformatation von Youtube


---
* QUELLEN:  
- datif_dz_{}.`02_cleaned_uk_google_ads`.`017_google_ads_kkc_some_current_view`


* ZIEL:  
- datif_pz_uk_{}.03_transformed.youtube_paid_daily
- datif_pz_uk_{}.03_transformed.youtube_paid_total

---
* Versionen (aktuelle immer oben):
- 15.09.2025 Max Mustermann: Add Organic ID
- 21.08.2025 Max Mustermann: init



# 1. Initialnotebooks & Libraries

In [0]:
%run ../../common/nb_init

In [0]:
target_schema_name = "03_transformed"
target_path = "funnel"

## 03-transformed

### Youtube Paid Daily

In [0]:
spark.sql(f"""
CREATE OR REPLACE VIEW datif_pz_uk_{env}.03_transformed.youtube_paid_daily AS
SELECT
    First(CAST(DATE_FORMAT(Date, 'yyyy-MM-dd') as DATE)) as Date,
    First(CAST(Ad_ID__Google_Ads AS STRING)) as AdID,
    First(CAST(Campaign__Google_Ads AS STRING)) as CampaignName,
    First(CAST(Ad_Group_Name__Google_Ads AS STRING)) as AdSetName,
    First(CAST(Ad_Name__Google_Ads AS STRING)) AS AdName,
    First(CAST(Ad_Type__Google_Ads AS STRING)) as Campaign_Objective,
    Sum(COALESCE(ROUND(CAST(Cost__Google_Ads AS Double),2),0)) as Amount_Spend,
    Sum(COALESCE(CAST(Clicks__Google_Ads AS INTEGER),0)) as Clicks,
    Sum(COALESCE(CAST(Impressions__Google_Ads AS INTEGER),0)) as Impressions,
    Sum(COALESCE(CAST(Engagements__Google_Ads AS INTEGER),0)) as Engagements,
    Sum(COALESCE(CAST(Conversions__Google_Ads AS INTEGER),0)) as Conversions,
    Sum(COALESCE(CAST(Video_views__Google_Ads AS INTEGER),0)) as Video_View,
    Sum(COALESCE(CAST(Views_100__Google_Ads AS INTEGER), 0)) as Video_100,

    -- OrganicID-Logik: Alles nach letztem "!" oder "Dark Post"
    CASE 
        WHEN AdName LIKE '%!%' 
        THEN REGEXP_EXTRACT(AdName, '!([^!]+)$', 1)
        ELSE 'Dark Post'
    END AS OrganicID

FROM datif_dz_{env}.`02_cleaned_uk_google_ads`.`017_google_ads_kkc_some_current_view`
GROUP BY Date, Ad_ID__Google_Ads
""")


### Youtube Paid Total

In [0]:
df_daily = spark.read.table(f"datif_pz_uk_{env}.03_transformed.youtube_paid_daily")

df_total = (
    df_daily.groupBy("AdID").agg(
        F.first("AdID").alias("ID"),
        F.first("OrganicID").alias("OrganicID"),
        F.min("Date").alias("CreatedDate").cast('date'),
        F.first("CampaignName").alias("CampaignName"),
        F.first("AdSetName").alias("AdSetName"),
        F.first("AdName").alias("AdName"),
        F.round(F.sum("Amount_Spend"),2).alias("TotalSpend").cast('Double'),
        F.sum("Impressions").alias("TotalImpressions").cast('integer'),
        F.sum("Clicks").alias("TotalClicks").cast('integer'),
        F.sum("Video_View").alias("TotalVideoView").cast('integer'),
        F.sum("Video_100").alias("TotalVideo_100").cast('integer'),
        F.sum("Engagements").alias("TotalEngagement").cast('integer')
    ).drop("AdID")
    .withColumn(
        "CPM",
        F.coalesce(
            F.round(
                F.when(F.col("TotalImpressions") == 0, 0).otherwise(
                    (F.col("TotalSpend") / F.col("TotalImpressions")) * 1000
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CPC",
        F.coalesce(
            F.round(
                F.when(F.col("TotalClicks") == 0, 0).otherwise(
                    F.col("TotalSpend") / F.col("TotalClicks")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CTR",
        F.coalesce(
            F.round(
                F.when(F.col("TotalImpressions") == 0, 0).otherwise(
                    F.col("TotalClicks") / F.col("TotalImpressions")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "CPV",
        F.coalesce(
            F.round(
                F.when(F.col("TotalVideoView") == 0, 0).otherwise(
                    F.col("TotalSpend") / F.col("TotalVideoView")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "VideoViewRate",
        F.coalesce(
            F.round(
                F.when(F.col("TotalImpressions") == 0, 0).otherwise(
                    F.col("TotalVideoView") / F.col("TotalImpressions")
                ), 4
            ),
            F.lit(0)
        )
    )
    .withColumn(
        "VTR",
        F.coalesce(
            F.round(
                F.when(F.col("TotalVideoView") == 0, 0).otherwise(
                    F.col("TotalVideo_100") / F.col("TotalVideoView")
                ), 4
            ),
            F.lit(0)
        )
    )
    .drop("AdSetName", "Clicks", "Video_View", "Video_100")
)

fn_overwrite_table(df_source=df_total, target_schema_name=target_schema_name, target_table_name="youtube_paid_total", target_path=target_path)