# ###**DLT Pipelines**

In [0]:
import dlt
from pyspark.sql.functions import *

####**Streaming view**

In [0]:
#Expectations
my_rules={
    "rule_1": "Product_id is not null",
    "rule_2": "Product_name is not null"
}

In [0]:
#Read Change Data Feed (CDF) as a stream
@dlt.view
@dlt.expect_all_or_drop(my_rules)
def DimProducts_changes():
    df = (spark.readStream.format("delta").option("readChangeFeed", "true").option("startingVersion", 16).table("databricks_catalog.silver.products"))
    
    df = df.filter(col("_change_type").isin("insert", "update_postimage"))
    df = df.withColumn("ingest_ts", col("_commit_timestamp").cast("timestamp"))

    # rename reserved columns so target table doesn't contain them
    df = (df
          .withColumnRenamed("_change_type", "src_change_type")
          .withColumnRenamed("_commit_version", "src_commit_version")
          .withColumnRenamed("_commit_timestamp", "src_commit_timestamp"))

    return df

####DimProducts

In [0]:
dlt.create_streaming_table("DimProducts")

In [0]:
dlt.create_auto_cdc_flow(
  target = "DimProducts",
  source = "DimProducts_changes",
  keys = ["Product_id"],
  sequence_by = "ingest_ts",
  stored_as_scd_type = 2
)