In [0]:
%run ../config/init_config


In [0]:
from pyspark.sql.functions import from_json, to_date, lit, col, row_number, broadcast, count, sum
from pyspark.sql.window import Window

country_path = "file:/Workspace/Users/joelledidanera@gmail.com/03_Databricks_Medallion/includes/country.json"

country_df = spark.read.json(country_path)

In [0]:
def process_sales_silver():
    sales_schema = "sale_id STRING, order_id STRING, vehicle_id STRING, sale_date STRING, amount FLOAT"

    query = (spark.readStream
                .format("delta")
                .table("bronze_raw")
                .filter("source_type = 'sales'")
                .select(from_json("value", sales_schema).alias("v"), "processed_time")
                .select("v.*", "processed_time")
                .withColumn("sale_date", to_date("sale_date", "yyyy-mm-dd"))
                .writeStream
                .format("delta")
                .option("mergeSchema", "true")
                .option("checkpointLocation", silver_checkpoint_path + "/sales")
                .trigger(availableNow=True)
                .table("sales_silver")
                )
                        

    query.awaitTermination()

In [0]:
process_sales_silver()

In [0]:
def process_sales_vehicle():
    current_vehicle_silver = spark.table("current_vehicle_silver")

    query = (spark.readStream
                .table("sales_silver")
                .withColumnRenamed("vehicle_id", "vehicle_id_left")
                .join(current_vehicle_silver, col("vehicle_id_left") == col("vehicle_id"), "inner")
                .drop("vehicle_id_left")
                .drop("expired_at")
                .drop("is_current")
                .writeStream
                .format("delta")
                .option("mergeSchema", "true")
                .option("checkpointLocation", silver_checkpoint_path + "/sales_vehicle")
                .trigger(availableNow=True)
                .table("sales_vehicle")
                )
                        

    query.awaitTermination()

In [0]:
process_sales_vehicle()