In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")


In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

In [0]:
qualifying_schema = StructType(fields=[StructField("qualifyId", IntegerType(), False),
                                      StructField("raceId", IntegerType(), True),
                                      StructField("driverId", IntegerType(), True),
                                      StructField("constructorId", IntegerType(), True),
                                      StructField("number", IntegerType(), True),
                                      StructField("position", IntegerType(), True),
                                      StructField("q1", StringType(), True),
                                      StructField("q2", StringType(), True),
                                      StructField("q3", StringType(), True),
                                     ])

In [0]:
qualifying_df = spark.read \
.schema(qualifying_schema) \
.option("multiLine", True) \
.json(f"{incraw_folder_path}/{v_file_date}/qualifying")

In [0]:
qualifying_with_ingestion_date_df = add_ingestion_date(qualifying_df)

In [0]:
from pyspark.sql.functions import lit

In [0]:
final_df = qualifying_with_ingestion_date_df.withColumnRenamed("qualifyId", "qualify_id") \
.withColumnRenamed("driverId", "driver_id") \
.withColumnRenamed("raceId", "race_id") \
.withColumnRenamed("constructorId", "constructor_id") \
.withColumn("ingestion_date", current_timestamp()) \
.withColumn("data_source", lit(v_data_source)) \
.withColumn("file_date", lit(v_file_date))

In [0]:
merge_condition = "tgt.qualify_id = src.qualify_id AND tgt.race_id = src.race_id"
merge_delta_data(final_df, 'f1_inc_processed', 'qualifying_dt', incprocessed_folder_path, merge_condition, 'race_id')

In [0]:
spark.sql("SELECT * FROM f1_inc_processed.qualifying_dt").show()

+----------+-------+---------+--------------+------+--------+--------+--------+--------+--------------------+-----------+----------+
|qualify_id|race_id|driver_id|constructor_id|number|position|      q1|      q2|      q3|      ingestion_date|data_source| file_date|
+----------+-------+---------+--------------+------+--------+--------+--------+--------+--------------------+-----------+----------+
|      8735|   1052|      830|             9|    33|       1|1:30.499|1:30.318|1:28.997|2024-12-21 05:54:...|     Ergast|2021-03-28|
|      8736|   1052|        1|           131|    44|       2|1:30.617|1:30.085|1:29.385|2024-12-21 05:54:...|     Ergast|2021-03-28|
|      8737|   1052|      822|           131|    77|       3|1:31.200|1:30.186|1:29.586|2024-12-21 05:54:...|     Ergast|2021-03-28|
|      8738|   1052|      844|             6|    16|       4|1:30.691|1:30.010|1:29.678|2024-12-21 05:54:...|     Ergast|2021-03-28|
|      8739|   1052|      842|           213|    10|       5|1:30.848

In [0]:
dbutils.notebook.exit("Success")