
## Ingest constructor.json file


#### Step 1 - Read the JSON file using the spark dataframe reader

In [0]:
constructor_schema = "constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING"

In [0]:
constructor_df = spark.read.format("json")\
                           .schema(schema=constructor_schema)\
                           .load("/mnt/tideformula1dl/raw/constructors.json")


#### Step 2 - Drop unwanted columns from the dataframe

In [0]:
from pyspark.sql.functions import col

In [0]:
constructor_dropped_df = constructor_df.drop(col('url'))


#### Step 3 - Rename columns and add ingestion date

In [0]:
from pyspark.sql.functions import current_timestamp

constructor_final_df = constructor_dropped_df.withColumnRenamed("constructorId","constructor_id")\
                                             .withColumnRenamed("constructorRef","constructor_ref")\
                                             .withColumn("ingestion_date",current_timestamp())


#### Step 4 - Write output to parquet file

In [0]:
constructor_final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_processed.constructors")

In [0]:
dbutils.notebook.exit("Success")