In [0]:
dbutils.widgets.dropdown(name = 'environment', choices= ['dev', 'uat', 'prod'], defaultValue = 'dev', label = 'select your environment')
environment = dbutils.widgets.get('environment')

In [0]:
%run "/Workspace/Users/dakuikui_outlook.com#ext#@dakuikuioutlook.onmicrosoft.com/04_common"

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
schema_traffic = StructType([
        StructField("Record_ID",IntegerType()),
        StructField("Count_point_id",IntegerType()),
        StructField("Direction_of_travel",StringType()),
        StructField("Year",IntegerType()),
        StructField("Count_date",StringType()),
        StructField("hour",IntegerType()),
        StructField("Region_id",IntegerType()),
        StructField("Region_name",StringType()),
        StructField("Local_authority_name",StringType()),
        StructField("Road_name",StringType()),
        StructField("Road_Category_ID",IntegerType()),
        StructField("Start_junction_road_name",StringType()),
        StructField("End_junction_road_name",StringType()),
        StructField("Latitude",DoubleType()),
        StructField("Longitude",DoubleType()),
        StructField("Link_length_km",DoubleType()),
        StructField("Pedal_cycles",IntegerType()),
        StructField("Two_wheeled_motor_vehicles",IntegerType()),
        StructField("Cars_and_taxis",IntegerType()),
        StructField("Buses_and_coaches",IntegerType()),
        StructField("LGV_Type",IntegerType()),
        StructField("HGV_Type",IntegerType()),
        StructField("EV_Car",IntegerType()),
        StructField("EV_Bike",IntegerType())
    ])

In [0]:
schema_roads =  StructType([
        StructField('Road_ID',IntegerType()),
        StructField('Road_Category_Id',IntegerType()),
        StructField('Road_Category',StringType()),
        StructField('Region_ID',IntegerType()),
        StructField('Region_Name',StringType()),
        StructField('Total_Link_Length_Km',DoubleType()),
        StructField('Total_Link_Length_Miles',DoubleType()),
        StructField('All_Motor_Vehicles',DoubleType())
        
        ])

In [0]:
def load_raw_traffic(schema):
    rawTraffic_stream = spark.readStream\
                    .format('cloudFiles')\
                    .option('cloudFiles', 'true')\
                    .option('cloudFiles.format', 'csv')\
                    .option('cloudFiles.schemaLocation', checkpoints + '/rawTraffic_load/schema')\
                    .option('header', 'true')\
                    .schema(schema)\
                    .load(landing_raw_traffic)\
                    .withColumn('Extract_Time', current_timestamp())
    print('read raw traffic successfully')
    return rawTraffic_stream

In [0]:
def load_raw_roads(schema):
    rawRoads_stream = spark.readStream\
                    .format('cloudFiles')\
                    .option('cloudFiles', 'true')\
                    .option('cloudFiles.format', 'csv')\
                    .option('cloudFiles.schemaLocation', checkpoints + '/rawRoads_load/schema')\
                    .option('header', 'true')\
                    .schema(schema)\
                    .load(landing_raw_roads)
    print('read raw roads successfully')
    return rawRoads_stream

In [0]:
def write_raw_traffic(rawTraffic_stream, environment):
    rawTraffic_stream.writeStream\
                .format('delta')\
                .option('checkpointLocation', checkpoints + 'rawTraffic_load/checkpoint')\
                .outputMode('append')\
                .trigger(availableNow=True)\
                .toTable(f'`{environment}_catalog`.`bronze`.`raw_traffic`')
    print('successfully write raw traffic to bronze table')


In [0]:
def write_raw_roads(rawRoads_stream,environment):
    rawRoads_stream.writeStream\
                .format('delta')\
                .option('checkpointLocation', checkpoints + 'rawRoads_load/checkpoint')\
                .outputMode('append')\
                .trigger(availableNow=True)\
                .toTable(f'`{environment}_catalog`.`bronze`.`raw_roads`')
    print('successfully write raw roads to bronze table')

In [0]:
rawTraffic_stream = load_raw_traffic(schema_traffic)
rawRoads_stream = load_raw_roads(schema_roads)
write_raw_traffic(rawTraffic_stream, environment)
write_raw_roads(rawRoads_stream,environment)
