In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

In [0]:
lap_times_schema = StructType(fields=[StructField("raceId", IntegerType(), False),
                                      StructField("driverId", IntegerType(), True),
                                      StructField("lap", IntegerType(), True),
                                      StructField("position", IntegerType(), True),
                                      StructField("time", StringType(), True),
                                      StructField("milliseconds", IntegerType(), True)
                                     ])


In [0]:
lap_times_df = spark.read \
.schema(lap_times_schema) \
.csv(f"{incraw_folder_path}/{v_file_date}/lap_times")

In [0]:
lap_times_with_ingestion_date_df = add_ingestion_date(lap_times_df)

In [0]:
from pyspark.sql.functions import lit

In [0]:
final_df = lap_times_with_ingestion_date_df.withColumnRenamed("driverId", "driver_id") \
.withColumnRenamed("raceId", "race_id") \
.withColumn("ingestion_date", current_timestamp()) \
.withColumn("data_source", lit(v_data_source)) \
.withColumn("file_date", lit(v_file_date))


In [0]:
merge_condition = "tgt.race_id = src.race_id AND tgt.driver_id = src.driver_id AND tgt.lap = src.lap AND tgt.race_id = src.race_id"
merge_delta_data(final_df, 'f1_inc_processed', 'lap_times_dt', incprocessed_folder_path, merge_condition, 'race_id')

In [0]:
spark.sql("SELECT * FROM f1_inc_processed.lap_times_dt").show()

+-------+---------+---+--------+--------+------------+--------------------+-----------+----------+
|race_id|driver_id|lap|position|    time|milliseconds|      ingestion_date|data_source| file_date|
+-------+---------+---+--------+--------+------------+--------------------+-----------+----------+
|   1052|      830|  1|       1|1:58.245|      118245|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052|      830|  2|       1|2:22.406|      142406|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052|      830|  3|       1|2:38.001|      158001|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052|      830|  4|       1|1:44.343|      104343|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052|      830|  5|       1|1:44.629|      104629|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052|      830|  6|       1|1:35.982|       95982|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052|      830|  7|       1|1:35.902|       95902|2024-12-21 05:54:...|     Ergast|2021-03-28|
|   1052| 

In [0]:
dbutils.notebook.exit("Success")