### Ingest qualifying JSON files

In [0]:
dbutils.widgets.text("p_data_source","")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date","2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

##### Step 1 - Read the JSON file using the spark dataframe reader

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

In [0]:
fields = [StructField("qualifyId",IntegerType(),False),
          StructField("raceId",IntegerType(),True),
          StructField("driverId",IntegerType(),True),
          StructField("constructorId",IntegerType(),True),
          StructField("number",IntegerType(),True),
          StructField("position",IntegerType(),True),
          StructField("q1",StringType(),True),
          StructField("q2",StringType(),True),
          StructField("q3",StringType(),True)
         ]
qualifying_schema = StructType(fields = fields)

In [0]:
qualifying_df = spark.read \
.schema(qualifying_schema) \
.option("multiLine", True) \
.json(f"{raw_folder_path}/{v_file_date}/qualifying/qualifying_split_*.json")

##### Step 2 - Rename and add columns

In [0]:
from pyspark.sql.functions import lit

qualifying_final_df = qualifying_df \
.withColumnRenamed("qualifyId","qualify_id") \
.withColumnRenamed("raceId","race_id") \
.withColumnRenamed("driverId","driver_id") \
.withColumnRenamed("constructorId","constructor_id") \
.withColumn("data_source",lit(v_data_source)) \
.withColumn("file_date",lit(v_file_date))

qualifying_final_df = add_ingestion_date(qualifying_final_df)

##### Step 3 - Write dataframe in parquet format

In [0]:
merge_condition = "target.qualify_id = source.qualify_id and target.race_id = source.race_id"
merge_delta_data(qualifying_final_df, "f1_processed", "qualifying", processed_folder_path, merge_condition, "race_id")

In [0]:
dbutils.notebook.exit("Success")

Success

In [0]:
%sql
/*select race_id,count(1) 
  from f1_processed.qualifying
 group by race_id
 order by race_id desc*/

race_id,count(1)
1053,20
1052,20
1047,20
1046,20
1045,20
1044,20
1043,20
1042,20
1041,20
1040,20
