In [0]:
#import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
df = spark.read.format("delta").load("/Volumes/workspace/bronze/bronzevolume/flights/data/")
df = df.withColumn("modifiedDate", current_timestamp())\
    .withColumn("flight_date", to_date(col("flight_date")))\
    .drop("_rescued_data")
display(df)

In [0]:
df = spark.read.format("delta").load("/Volumes/workspace/bronze/bronzevolume/passengers/data/")
# df = df.withColumn("modifiedDate", current_timestamp())\
#     .withColumn("flight_date", to_date(col("flight_date")))\
#     .drop("_rescued_data")
display(df)

In [0]:
# Creating a streaming table using dlt. This will load the table incrementally and perform no transformations since its on the staging environment.
@dlt.table(
  name="stage_bookings"
  )
def stage_bookings():
  df = spark.readStream.format("delta")\
    .load("/Volumes/workspace/bronze/bronzevolume/bookings/data/")
  return df

In [0]:
# Creating a streaming view and performing transformations on the data
@dlt.view(
    name="trans_bookings"
)
def trans_bookings():
  df = spark.readStream.table("stage_bookings")
  df = df.withColumn("amount", col("amount").cast(DoubleType()))\
    .withColumn("modifiedDate", current_timestamp())\
    .withColumn("booking_date", to_date(col("booking_date")))\
    .drop("_rescued_data")
  return df

In [0]:
# These are the rules that are defined for the silver table
rules = {
    "rule1" : "booking_id IS NOT NULL",
    "rule2" : "passenger_id IS NOT NULL",
    # "rule3" : "flight_id IS NOT NULL",
    # "rule4" : "airport_id IS NOT NULL"
}

In [0]:
# Creating a streaming table using dlt. This will load the table to a steaming silver table.
@dlt.table(
    name="silver_bookings"
)
@dlt.expect_all(rules)
def silver_bookings():
  df = spark.readStream.table("trans_bookings")
  return df  