## Ingest circuits.csv file

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType
from pyspark.sql.functions import col, current_timestamp

In [0]:
circuits_schema = StructType(fields= [StructField("circuitId", IntegerType(), False), 
                                      StructField("circuitRef", StringType(), True),
                                      StructField("name", StringType(), True),
                                      StructField("location", StringType(), True),
                                      StructField("country", StringType(), True),
                                      StructField("lat", DoubleType(), True),
                                      StructField("lng", DoubleType(), True),
                                      StructField("alt", IntegerType(), True),
                                      StructField("url", StringType(), True)
                                      ])

In [0]:
df_circuits = spark.read \
    .option("header", "true") \
    .schema(circuits_schema) \
    .csv("/mnt/formula1datalake280199/raw/circuits.csv")

## Selecting the required Columns

In [0]:
df_circuits_selected = df_circuits.select(col("circuitId"), col("circuitRef"), col("name"), col("location"), col("country"), col("lat"), col("lng"), col("alt"))

## Renaming Columns

In [0]:
df_circuits_renamed = df_circuits_selected.withColumnRenamed("circuitId", "circuit_id") \
                                        .withColumnRenamed("circuitRef", "circuit_ref") \
                                        .withColumnRenamed("lat", "latitude") \
                                        .withColumnRenamed("lng", "longitude") \
                                        .withColumnRenamed("alt", "altitude")

## Adding Timestamp Column for Auditing

In [0]:
df_circuits_final = df_circuits_renamed.withColumn("ingestion_date", current_timestamp())

## Write Data in Parquet

In [0]:
df_circuits_final.write.mode("overwrite").parquet("mnt/formula1datalake280199/processed/circuits")