In [1]:
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import create_map, lit

In [2]:
# added for adf

dbutils.widgets.text("pipeline_id", "")
dbutils.widgets.text("run_id", "")
dbutils.widgets.text("task_id", "")
dbutils.widgets.text("processed_timestamp", "")
dbutils.widgets.text("catalog", "unikargo_dev")

To use databricks widgets interactively in your notebook, please install databricks sdk using:
	pip install 'databricks-sdk[notebook]'
Falling back to default_value_only implementation for databricks widgets.


In [3]:
pipeline_id = dbutils.widgets.get("pipeline_id")
run_id = dbutils.widgets.get("run_id")
task_id = dbutils.widgets.get("task_id")
processed_timestamp = dbutils.widgets.get("processed_timestamp")
catalog = dbutils.widgets.get("catalog")

In [4]:
airports_schema = StructType([
    StructField("iata_code", StringType(), True),
    StructField("airline", StringType(), True),
    StructField("city", StringType(), True),
    StructField("state", StringType(), True),
    StructField("country", StringType(), True),
    StructField("latitude", DoubleType(), True),
    StructField("longitude", DoubleType(), True),
])

In [5]:
df = (spark.read
      .schema(airports_schema)
    .option("header", "true") 
      # .csv(f"/Volumes/{catalog}/00_raw/source_unicargo_data/airports.csv")
      .csv("abfss://medallion@adlsunikarrgodev.dfs.core.windows.net/raw/volumes/airports.csv") # added for adf
      )

#  df.show(5)

In [6]:
df = df.withColumn("metadata",
                   create_map(
                       lit("pipeline_id"), lit(pipeline_id),
                       lit("run_id"), lit(run_id),
                       lit("task_id"), lit(task_id),
                       lit("processed_timestamp"), lit(processed_timestamp),
                   ))

In [None]:
df.write\
.format("delta") \
.mode("overwrite")\
.option("overwriteSchema", "true")\
.saveAsTable(f"`{catalog}`.`01_bronze`.`unikargo_airports_bronze`")
