## Ingest results.json file

1. Read the JSON file using the spark dataframe reader

In [0]:
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
file_date= dbutils.widgets.get("p_file_date")

In [0]:
pit_stops_schema = StructType(
    fields=[
        StructField("raceId", IntegerType(), False),
        StructField("driverId", IntegerType(), True),
        StructField("stop", StringType(), True),
        StructField("lap", IntegerType(), True),
        StructField("time", StringType(), True),
        StructField("duration", StringType(), True),
        StructField("milliseconds", IntegerType(), True)
    ]
)

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
pit_stops_df = spark.read \
  .option("multiline", True) \
  .schema(pit_stops_schema) \
  .json(f'{raw_folder_path}/{file_date}/pit_stops.json')
#display(pit_stops_df)

2. Process Dataframe

In [0]:
pit_stops_final_df = pit_stops_df.withColumnRenamed("raceId", "race_id") \
    .withColumnRenamed("DriverId", "driver_id") \
    .drop(col('statusId')) \
    .withColumn("data_source", lit(v_data_source)) \
    .withColumn("file_date", lit(file_date))
pit_stops_final_df = add_ingestion_date(pit_stops_final_df)
#display(pit_stops_final_df)

3. Write data to datalake as parquet

In [0]:
#pit_stops_final_df.write.mode("overwrite").parquet(f"{processed_folder_path}/pit_stops")
# database

#pit_stops_final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_processed.pit_stops")

In [0]:
#save_table(pit_stops_final_df, "f1_processed", "pit_stops", "race_id")
merge_condition = "tgt.race_id = src.race_id AND tgt.driver_id = src.driver_id AND tgt.stop = src.stop AND tgt.race_id = src.race_id"
merge_table(pit_stops_final_df, "f1_processed", "pit_stops", "race_id", merge_condition)

In [0]:
# %fs
# ls mnt/formula1jf/processed/pit_stops

In [0]:
dbutils.notebook.exit("Success")