## Ingest 2 json files in the qualifying folder

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
%run "../includes/configurations"

In [0]:
%run "../includes/common_functions"

In [0]:
display(dbutils.fs.ls(f'{raw_folder_path}/qualifying'))

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

qualifying_schema = StructType(fields = [StructField('qualifyId', IntegerType(), False),
                                         StructField('raceId', IntegerType(), True),
                                         StructField('driverId', IntegerType(), True),
                                         StructField('constructorId', IntegerType(), True),
                                         StructField('number', IntegerType(), True),
                                         StructField('position', IntegerType(), True),
                                         StructField('q1', StringType(), True),
                                         StructField('q2', StringType(), True),
                                         StructField('q3', StringType(), True)
                                         ])

qualifying_df = spark.read \
    .schema(qualifying_schema) \
    .option('multiLine', True) \
    .json(f'{raw_folder_path}/qualifying/qualifying_split*.json')

qualifying_df.limit(5).display()

In [0]:
display(qualifying_df.count())

In [0]:
from pyspark.sql.functions import col, current_timestamp, lit

qualifying_renamed_df = qualifying_df.withColumnRenamed('qualifyId', 'qualify_id') \
                                    .withColumnRenamed('raceId', 'race_id') \
                                    .withColumnRenamed('driverId', 'driver_id') \
                                    .withColumnRenamed('constructorId', 'constructor_id') \
                                        .withColumn('data_source', lit(v_data_source))

display(qualifying_renamed_df)

qualifying_final_df = ingestion_date(qualifying_renamed_df)

qualifying_final_df.write.mode('overwrite').format("parquet").saveAsTable("f1_processed.qualifying")

In [0]:
display(spark.read.parquet(f'{processed_folder_path}/qualifying'))

In [0]:
dbutils.notebook.exit("Success")