# 03-transformed: Transformation der Paid Eco Journal Daten

* Um was handelt es sich hier  (Kurzbeschreibung Inhalt):  
Die Paid Eco Journal Daten transformieren um die Daten für die Marts vorzubereiten


---
## QUELLEN:  
- Unity-Catalog: 
  - datif_dz_dev.02_cleaned_uk_facebook.019_meta_ads_c_kkc_eco_journal
  - datif_dz_dev.02_cleaned_uk_facebook.019_meta_ads_c_kkc_eco_journal_current_view
  - datif_dz_dev.02_cleaned_uk_facebook.019_meta_ads_c_kkc_eco_journal_scd2_view
  
## ZIEL  
- Unity-Catalog: 
  - datif_pz_uk_dev.03_transformed.paid_eco_journal_total
  - datif_pz_uk_dev.03_transformed.paid_eco_journal_daily
  - datif_pz_uk_dev.03_transformed.paid_eco_journal_scd2_view

  
---
* Versionen (aktuelle immer oben):
  - 26.08.2025 Minh Hieu Le: Init
  - 03.09.2025 Minh Hieu Le: Add metrics

In [0]:
%run ../../common/nb_init

In [0]:
# Define schema name and path to store tables
target_schema_name = "03_transformed"
target_path = "paid_data"
# Set source and trg path
source_path = sta_endpoint_pz_uk["03_transformed"] + "/paid_eco_journal"

In [0]:
from pyspark.sql.functions import col, when, round, sum, first

## Paid Eco Journal Daily View

In [0]:
df_daily = spark.read.table(f"datif_dz_{env}.02_cleaned_uk_facebook.019_meta_ads_c_kkc_eco_journal_current_view")

df_daily = (
    df_daily.withColumnRenamed("Ad_ID__Facebook_Ads", "Ad_ID")
        .withColumnRenamed("Campaign_Name__Facebook_Ads", "Campaign_Name")
        .withColumnRenamed("Ad_Name__Facebook_Ads", "Ad_Name")
        .withColumnRenamed("Ad_Set_Name__Facebook_Ads", "Ad_Set_Name")
        .withColumnRenamed("Amount_Spent__Facebook_Ads", "Amount_Spent")
        .withColumnRenamed("Impressions__Facebook_Ads", "Impressions")
        .withColumnRenamed("Clicks_all__Facebook_Ads", "Clicks")
        .withColumnRenamed("Link_Clicks__Facebook_Ads", "Link_Clicks")
        .withColumnRenamed("Post_Saves__Facebook_Ads", "Post_Saves")
        .withColumnRenamed("Post_Shares__Facebook_Ads", "Post_Shares")
        .withColumnRenamed("Post_Comments__Facebook_Ads", "Post_Comments")
        .withColumnRenamed("Post_Reactions__Facebook_Ads", "Post_Reactions")
        .withColumnRenamed("n_3_Second_Video_Views__Facebook_Ads", "Second_Video_Views")
)

df_daily = df_daily.withColumn(
    "Date",
    col("Date").cast("date")
 )

df_daily = df_daily.withColumn(
    "Amount_Spent",
    col("Amount_Spent").cast("double")
)

df_daily = df_daily.withColumn(
    "Impressions",
    col("Impressions").cast("int")
)

df_daily = df_daily.withColumn(
    "Clicks",
    col("Clicks").cast("int")
)

df_daily = df_daily.withColumn(
    "Link_Clicks",
    col("Link_Clicks").cast("int")
)

df_daily = df_daily.withColumn(
    "Post_Saves",
    col("Post_Saves").cast("int")
)

df_daily = df_daily.withColumn(
    "Post_Shares",
    col("Post_Shares").cast("int")
)

df_daily = df_daily.withColumn(
    "Post_Comments",
    col("Post_Comments").cast("int")
)

df_daily = df_daily.withColumn(
    "Post_Reactions",
    col("Post_Reactions").cast("int")
)

df_daily = df_daily.withColumn(
    "Second_Video_Views",
    col("Second_Video_Views").cast("int")
)

df_daily = df_daily.withColumn(
    "WeightedEngagement",
    round(col("Post_Comments")*0.4 + col("Post_Shares")*0.1 + col("Link_Clicks")*0.3 + col("Post_Reactions")*0.2,2)
)

df_daily = df_daily.withColumn(
    "EngagementRate",
    round(when(col("Impressions") == 0, 0).otherwise(col("WeightedEngagement")/col("Impressions")),2)
)

display(df_daily)
fn_overwrite_table(df_source=df_daily, target_schema_name=target_schema_name, target_table_name="paid_eco_journal_daily", target_path=target_path)

## Paid Eco Journal Total

In [0]:
df_total = df_daily.groupby("Ad_ID").agg(
    first("Campaign_Name").alias("Campaign_Name"),
    first("Ad_Name").alias("Ad_Name"),
    first("Ad_Set_Name").alias("Ad_Set_Name"),
    first("LOAD_TS").alias("Load_Timestamp"),
    sum(col("Impressions")).alias("Impressions"),
    sum(col("Clicks")).alias("Clicks"),
    sum(col("Link_Clicks")).alias("Link_Clicks"),
    sum(col("Post_Saves")).alias("Post_Saves"),
    sum(col("Post_Shares")).alias("Post_Shares"),
    sum(col("Post_Comments")).alias("Post_Comments"),
    sum(col("Post_Reactions")).alias("Post_Reactions"),
    sum(col("Second_Video_Views")).alias("Second_Video_Views"),
    sum(col("Amount_Spent")).alias("Amount_Spent")
)

df_total = df_total.withColumn(
    "Amount_Spent",
    round(col("Amount_Spent"), 2)
)

df_total = df_total.withColumn(
    "WeightedEngagement",
    round(col("Post_Comments")*0.4 + col("Post_Shares")*0.1 + col("Link_Clicks")*0.3 + col("Post_Reactions")*0.2, 2)
)

df_total = df_total.withColumn(
    "EngagementRate",
    round((when(col("Impressions") == 0, 0).otherwise(col("WeightedEngagement")/col("Impressions")))*100, 2)
)

df_total = df_total.withColumn(
    "CPM",
    round(when(col("Impressions") == 0, 0).otherwise(col("Amount_Spent")/(col("Impressions")/1000)), 2)
)

df_total = df_total.withColumn(
    "CPC",
    round(when(col("Clicks") == 0, 0).otherwise(col("Amount_Spent")/col("Clicks")), 2)
)

df_total = df_total.withColumn(
    "CTR",
    round((when(col("Impressions") == 0, 0).otherwise(col("Clicks")/col("Impressions")))*100, 2)
)

df_total = df_total.withColumn(
    "CPV",
    round(when(col("Impressions") == 0, 0).otherwise(col("Amount_Spent")/col("Second_Video_Views")), 2)
)

df_total = df_total.withColumn(
    "View_Rate",
    round((when(col("Impressions") == 0, 0).otherwise(col("Second_Video_Views")/col("Impressions")))*100, 2)
)

display(df_total)
fn_overwrite_table(df_source=df_total, target_schema_name=target_schema_name, target_table_name="paid_eco_journal_total", target_path=target_path)

In [0]:
# df_total = spark.read.table(f"datif_dz_{env}.02_cleaned_uk_facebook.019_meta_ads_c_kkc_eco_journal")

# df_total = (
#     df_total.withColumnRenamed("Ad_ID__Facebook_Ads", "Ad_ID")
#         .withColumnRenamed("Campaign_Name__Facebook_Ads", "Campaign_Name")
#         .withColumnRenamed("Ad_Name__Facebook_Ads", "Ad_Name")
#         .withColumnRenamed("Ad_Set_Name__Facebook_Ads", "Ad_Set_Name")
#         .withColumnRenamed("Amount_Spent__Facebook_Ads", "Amount_Spent")
#         .withColumnRenamed("Impressions__Facebook_Ads", "Impressions")
#         .withColumnRenamed("Clicks_all__Facebook_Ads", "Clicks")
#         .withColumnRenamed("Link_Clicks__Facebook_Ads", "Link_Clicks")
#         .withColumnRenamed("Post_Saves__Facebook_Ads", "Post_Saves")
#         .withColumnRenamed("Post_Shares__Facebook_Ads", "Post_Shares")
#         .withColumnRenamed("Post_Comments__Facebook_Ads", "Post_Comments")
#         .withColumnRenamed("Post_Reactions__Facebook_Ads", "Post_Reactions")
#         .withColumnRenamed("n_3_Second_Video_Views__Facebook_Ads", "Second_Video_Views")
# )

# df_total = df_total.withColumn(
#     "Date",
#     col("Date").cast("date")
#  )

# df_total = df_total.withColumn(
#     "Amount_Spent",
#     col("Amount_Spent").cast("double")
# )

# df_total = df_total.withColumn(
#     "Impressions",
#     col("Impressions").cast("int")
# )

# df_total = df_total.withColumn(
#     "Clicks",
#     col("Clicks").cast("int")
# )

# df_total = df_total.withColumn(
#     "Link_Clicks",
#     col("Link_Clicks").cast("int")
# )

# df_total = df_total.withColumn(
#     "Post_Saves",
#     col("Post_Saves").cast("int")
# )

# df_total = df_total.withColumn(
#     "Post_Shares",
#     col("Post_Shares").cast("int")
# )

# df_total = df_total.withColumn(
#     "Post_Comments",
#     col("Post_Comments").cast("int")
# )

# df_total = df_total.withColumn(
#     "Post_Reactions",
#     col("Post_Reactions").cast("int")
# )

# df_total = df_total.withColumn(
#     "Second_Video_Views",
#     col("Second_Video_Views").cast("int")
# )

# display(df_total)
# fn_overwrite_table(df_source=df_total, target_schema_name=target_schema_name, target_table_name="paid_eco_journal_total", target_path=target_path)

## Paid Eco Journal SCD2 View

In [0]:
df_scd2 = spark.read.table(f"datif_dz_{env}.02_cleaned_uk_facebook.019_meta_ads_c_kkc_eco_journal_scd2_view")

df_scd2 = (
    df_scd2.withColumnRenamed("Ad_ID__Facebook_Ads", "Ad_ID")
        .withColumnRenamed("Campaign_Name__Facebook_Ads", "Campaign_Name")
        .withColumnRenamed("Ad_Name__Facebook_Ads", "Ad_Name")
        .withColumnRenamed("Ad_Set_Name__Facebook_Ads", "Ad_Set_Name")
        .withColumnRenamed("Amount_Spent__Facebook_Ads", "Amount_Spent")
        .withColumnRenamed("Impressions__Facebook_Ads", "Impressions")
        .withColumnRenamed("Clicks_all__Facebook_Ads", "Clicks")
        .withColumnRenamed("Link_Clicks__Facebook_Ads", "Link_Clicks")
        .withColumnRenamed("Post_Saves__Facebook_Ads", "Post_Saves")
        .withColumnRenamed("Post_Shares__Facebook_Ads", "Post_Shares")
        .withColumnRenamed("Post_Comments__Facebook_Ads", "Post_Comments")
        .withColumnRenamed("Post_Reactions__Facebook_Ads", "Post_Reactions")
        .withColumnRenamed("n_3_Second_Video_Views__Facebook_Ads", "Second_Video_Views")
)

df_scd2 = df_scd2.withColumn(
    "Date",
    col("Date").cast("date")
 )

df_scd2 = df_scd2.withColumn(
    "Amount_Spent",
    col("Amount_Spent").cast("double")
)

df_scd2 = df_scd2.withColumn(
    "Impressions",
    col("Impressions").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Clicks",
    col("Clicks").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Link_Clicks",
    col("Link_Clicks").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Post_Saves",
    col("Post_Saves").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Post_Shares",
    col("Post_Shares").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Post_Comments",
    col("Post_Comments").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Post_Reactions",
    col("Post_Reactions").cast("int")
)

df_scd2 = df_scd2.withColumn(
    "Second_Video_Views",
    col("Second_Video_Views").cast("int")
)

display(df_scd2)
fn_overwrite_table(df_source=df_scd2, target_schema_name=target_schema_name, target_table_name="paid_eco_journal_scd2_view", target_path=target_path)