###Ingest circuits data from bronze to silver

0. Import configuration notebook and widgets

In [0]:
%run ../Includes/Configuration

In [0]:
dbutils.widgets.text('p_data_source', '', 'Data Source')
v_data_source = dbutils.widgets.get('p_data_source')

1. Imports

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType
from pyspark.sql.functions import col, current_timestamp, lit

2. Create schema

In [0]:
circuits_schema = StructType(
    fields=[
        StructField("circuitId", IntegerType(), False),
        StructField("circuitRef", StringType(), False),
        StructField("name", StringType(), False),
        StructField("location", StringType(), True),
        StructField("country", StringType(), True),
        StructField("lat", FloatType(), True),
        StructField("lng", FloatType(), True),
        StructField("alt", IntegerType(), True),
        StructField("url", StringType(), True)
    ]
)

3. Read circuits.csv file

In [0]:
circuits_raw_df = spark.read \
    .option("header", True) \
    .schema(circuits_schema) \
    .csv(f"{bronze_container_path}/circuits.csv")

4. Transform the circuit dataframe

In [0]:
circuits_df = circuits_raw_df \
    .select(
        col("circuitId").alias("circuit_id"),
        col("circuitRef").alias("circuit_ref"),
        col("name"),
        col("location"),
        col("country"),
        col("lat").alias("latitude"),
        col("lng").alias("longitude"),
        col("alt").alias("altitude")
    ) \
    .withColumn("ingestion_date", current_timestamp()) \
    .withColumn("data_source", lit(v_data_source))

5. Write the dataframe to silver layer

In [0]:
circuits_df.write.mode('overwrite').parquet(f"{silver_container_path}/circuits")