### Ingest constructors.json file

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

##### Step 1 - Read the JOSN file uisng spark dataframe reader

In [0]:
constructors_schema = "constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING"
# This is a DDL style of defining the schema.

In [0]:
constructors_df = spark.read \
    .schema(constructors_schema) \
    .json(f"{raw_folder_path}/{v_file_date}/constructors.json")

In [0]:
# constructors_df.printSchema()

root
 |-- constructorId: integer (nullable = true)
 |-- constructorRef: string (nullable = true)
 |-- name: string (nullable = true)
 |-- nationality: string (nullable = true)
 |-- url: string (nullable = true)



In [0]:
# display(constructors_df.limit(5))

constructorId,constructorRef,name,nationality,url
1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Prix_Engineering
4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formula_One
5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso


##### Step 2 - Drop unwanted columns form the dataframe

In [0]:
constructors_dropped_df = constructors_df.drop('url')
# You can also use other methods. Like you used in selecting columns

##### Step 3 - Rename columns and add ingestion date

In [0]:
from pyspark.sql.functions import lit

In [0]:
constructors_final_df = add_ingestion_date(constructors_dropped_df) \
    .withColumnRenamed('constructorId', 'constructor_id') \
    .withColumnRenamed('constructorRef', 'constructor_ref') \
    .withColumn("data_source", lit(v_data_source)) \
    .withColumn("file_date", lit(v_file_date))

##### Step 4 - Write output data to processed container

In [0]:
constructors_final_df.write.mode('overwrite').format("delta").saveAsTable('f1_processed.constructors')

In [0]:
%sql
SELECT * FROM f1_processed.constructors
LIMIT 5;

constructor_id,constructor_ref,name,nationality,ingestion_date,data_source,file_date
1,mclaren,McLaren,British,2023-12-22T11:00:50.346Z,Ergast,2021-04-18
2,bmw_sauber,BMW Sauber,German,2023-12-22T11:00:50.346Z,Ergast,2021-04-18
3,williams,Williams,British,2023-12-22T11:00:50.346Z,Ergast,2021-04-18
4,renault,Renault,French,2023-12-22T11:00:50.346Z,Ergast,2021-04-18
5,toro_rosso,Toro Rosso,Italian,2023-12-22T11:00:50.346Z,Ergast,2021-04-18


In [0]:
dbutils.notebook.exit("Success")