In [0]:
# DLT works with three types of Datasets
# Streaming Tables (Permanent/Temporary) -- used as Append Data Sources, Incremental Data
# Materialized Views - used for trnsformations, aggregations or computations
# Views - used for intermediate Transformations, not stored in target schema

import dlt

In [0]:
# create a streaming table for orders
@dlt.table(
    table_properties = { "quality": "bronze"},
    comment = "Order bronze table"
)
def orders_bronze():
    df = spark.readStream.table("dev.bronze.orders_raw")
    return df



# Create a Materialized View for customer

@dlt.table(
    table_properties = { "quality": "bronze"},
    comment = "Customer bronze table"
    name = "customer_bronze"
)
def cust_bronze():
    df = spark.read.table("dev.bronze.customer_raw")
    return df

In [0]:
# create a view to join orders with customer
@dlt.view(
    comment = "joined view"
)
def joined_vw():
    df_c = spark.read.table("LIVE.customer_bronze")
    df_o = spark.read.table("LIVE.orders_bronze")
    
    df_join = df_o.join(df_c, how="left_outer", on=df_c.c_custKey == df_o.o_custKey)
    return df_join

In [0]:
# create MV( Materialized View ) to add new column

from pyspark.sql.functions import current_timestamp

@dlt.table(
    table_properties = { "quality": "silver"},
    comment = "joined table",
    name = "joined_silver"
)
def joined_silver():
    df = spark.read.table("LIVE.joined_vw").withColumn("__insert_date", current_timestamp())
    return df

In [0]:
# Aggregate based on c_mktSegment and find the count of order (o_orderKey)

@dlt.table(
    table_properties = { "quality": "gold"},
    comment = "orders Aggregated table"
)
def orders_agg_gold():
    df = spark.read.table("LIVE.joined_silver")
    df_final = df.groupBy("c_mktSegment").agg(count("o_orderKey").alias("sum_orders")).withColumn("__insert_date", current_timestamp())
    return df_final