## Gold Table
The silver table aggregates data and filters for the columns we need

In [0]:
from pyspark.sql import functions as F

In [0]:
# Retrieve job parameters passed in from the Databricks workflow
dbutils.widgets.text("billing_period", "")
dbutils.widgets.text("silver_table_name", "")
dbutils.widgets.text("target_catalog_name", "")
dbutils.widgets.text("target_schema_name", "")
dbutils.widgets.text("gold_table_name", "")
dbutils.widgets.text("tracker_table_name", "")

silver_table_name = dbutils.widgets.get("silver_table_name")
target_catalog_name = dbutils.widgets.get("target_catalog_name")
target_schema_name = dbutils.widgets.get("target_schema_name")
gold_table_name = dbutils.widgets.get("gold_table_name")
tracker_table_name = dbutils.widgets.get("tracker_table_name")
billing_period = dbutils.widgets.get("billing_period").strip()

In [0]:
#Creating 3 level namespace
silver_tbl = f'{target_catalog_name}.{target_schema_name}.{silver_table_name}'
target_tbl = f'{target_catalog_name}.{target_schema_name}.{gold_table_name}'
tracker_tbl = f'{target_catalog_name}.{target_schema_name}.{tracker_table_name}'

In [0]:
#Creating gold table
spark.sql(f"""
          CREATE TABLE IF NOT EXISTS {target_tbl} (
            cloud_account_id STRING,
            billing_period STRING,
            usage_start_time TIMESTAMP,
            usage_end_time TIMESTAMP,
            usage_date TIMESTAMP,
            charge_type STRING,
            unblended_cost DOUBLE,
            net_unblended_cost DOUBLE,
            amortized_cost DOUBLE,
            net_amortized_cost DOUBLE,
            currency_code STRING,
            usage_metadata STRUCT<cluster_id: STRING, warehouse_id: STRING, instance_pool_id: STRING, job_id: STRING>)
            CLUSTER BY AUTO
""")

In [0]:
try:
    # Read silver table and creating aggregates
    df = spark.sql(f"""
        SELECT
            aws.aws_account_id as cloud_account_id,
            billing_period,
            aws.usage_start_time,
            aws.usage_end_time,
            DATE_TRUNC('day',aws.usage_start_time) as usage_date,
            CASE
            WHEN product_code = 'AmazonEC2' AND usage_type LIKE '%EBS:%'
                THEN 'Storage'
            WHEN product_code = 'AmazonEC2' AND product_family = 'Compute Instance'
                THEN 'Compute'
            WHEN product_code = 'AmazonEC2' AND product_family = 'Data Transfer'
                THEN 'Networking'
            WHEN product_code = 'AmazonVPC'
                THEN 'Networking'
            END as charge_type,
            SUM(unblended_cost) as unblended_cost,
            SUM(net_unblended_cost) as net_unblended_cost,
            SUM(amortized_cost) as amortized_cost,
            SUM(net_amortized_cost) as net_amortized_cost,
            currency_code,
            struct(cluster_id, warehouse_id, instance_pool_id, job_id) AS usage_metadata
        FROM {silver_tbl} aws
        WHERE billing_period = '{billing_period}'
        GROUP BY ALL
    """)

    # Overwrite only this month's partition in the target Delta table.
    (df.write.mode("overwrite")
       .option("mergeSchema","true")
       .option("replaceWhere", f"billing_period = '{billing_period}'")
       .saveAsTable(target_tbl))

     # Update the tracker table to mark this billing period as SUCCEEDED.
    spark.sql(f"""
      UPDATE {tracker_tbl}
      SET status='SUCCEEDED',
          completed_at=current_timestamp(),
          last_error=NULL,
          last_update=current_timestamp()
      WHERE billing_period = '{billing_period}'
    """)
except Exception as e:
    err = str(e).replace("'", "''")[:2000]
    spark.sql(f"""
      UPDATE {tracker_tbl}
      SET status='FAILED',
          last_error='{err}',
          last_update=current_timestamp()
      WHERE billing_period = '{billing_period}'
    """)
    raise