In [0]:
dbutils.fs.mounts()

Out[38]: [MountInfo(mountPoint='/databricks-datasets', source='databricks-datasets', encryptionType=''),
 MountInfo(mountPoint='/mnt/formula1hh/processed', source='abfss://processed@formula1hh.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/mnt/formula1hh/raw', source='abfss://raw@formula1hh.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-tracking', source='databricks/mlflow-tracking', encryptionType=''),
 MountInfo(mountPoint='/databricks-results', source='databricks-results', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-registry', source='databricks/mlflow-registry', encryptionType=''),
 MountInfo(mountPoint='/mnt/formula1hh/presentation', source='abfss://presentation@formula1hh.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/mnt/formula1hh/demo', source='abfss://demo@formula1hh.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/', source='DatabricksRoot', encryptionType=''

In [0]:
%fs
ls /mnt/formula1hh/raw

path,name,size,modificationTime
dbfs:/mnt/formula1hh/raw/circuits.csv,circuits.csv,10044,1682445327000
dbfs:/mnt/formula1hh/raw/constructors.json,constructors.json,30415,1682445327000
dbfs:/mnt/formula1hh/raw/drivers.json,drivers.json,180812,1682445327000
dbfs:/mnt/formula1hh/raw/lap_times/,lap_times/,0,1682445346000
dbfs:/mnt/formula1hh/raw/pit_stops.json,pit_stops.json,1369387,1682445327000
dbfs:/mnt/formula1hh/raw/qualifying/,qualifying/,0,1682445359000
dbfs:/mnt/formula1hh/raw/races.csv,races.csv,116847,1682445327000
dbfs:/mnt/formula1hh/raw/results.json,results.json,7165641,1682445328000


In [0]:
dbutils.widgets.text("p_data_source", " ")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
v_file_date

Out[10]: '2021-03-22'

In [0]:
%run "../includes/configuration"

In [0]:
raw_folder_path

Out[17]: '/mnt/formula1hh/raw/'

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType

circuits_schema = StructType(fields=[StructField("circuitId", IntegerType(), False),
                                    StructField("circuitRef", StringType(), True),
                                    StructField("name", StringType(), True),
                                    StructField("location", StringType(), True),
                                    StructField("country", StringType(), True),
                                    StructField("lat", DoubleType(), True),
                                    StructField("lng", DoubleType(), True),
                                    StructField("alt", IntegerType(), True),
                                    StructField("url", StringType(), True)])

In [0]:
circuits_df = spark.read.csv(f'{raw_folder_path}{v_file_date}/circuits.csv',header = True, schema=circuits_schema)

In [0]:
display(circuits_df)

circuitId,circuitRef,name,location,country,lat,lng,alt,url
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_International_Circuit
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_International_Circuit
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park
6,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7,http://en.wikipedia.org/wiki/Circuit_de_Monaco
7,villeneuve,Circuit Gilles Villeneuve,Montreal,Canada,45.5,-73.5228,13,http://en.wikipedia.org/wiki/Circuit_Gilles_Villeneuve
8,magny_cours,Circuit de Nevers Magny-Cours,Magny Cours,France,46.8642,3.16361,228,http://en.wikipedia.org/wiki/Circuit_de_Nevers_Magny-Cours
9,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,http://en.wikipedia.org/wiki/Silverstone_Circuit
10,hockenheimring,Hockenheimring,Hockenheim,Germany,49.3278,8.56583,103,http://en.wikipedia.org/wiki/Hockenheimring


In [0]:
circuits_df.printSchema()

root
 |-- circuitId: integer (nullable = true)
 |-- circuitRef: string (nullable = true)
 |-- name: string (nullable = true)
 |-- location: string (nullable = true)
 |-- country: string (nullable = true)
 |-- lat: double (nullable = true)
 |-- lng: double (nullable = true)
 |-- alt: integer (nullable = true)
 |-- url: string (nullable = true)



In [0]:
selected_df = circuits_df.select("circuitId", "circuitRef", "name", "location", 'country', 'lat', 'lng', 'alt')

In [0]:
display(selected_df)

circuitId,circuitRef,name,location,country,lat,lng,alt
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130
6,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7
7,villeneuve,Circuit Gilles Villeneuve,Montreal,Canada,45.5,-73.5228,13
8,magny_cours,Circuit de Nevers Magny-Cours,Magny Cours,France,46.8642,3.16361,228
9,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153
10,hockenheimring,Hockenheimring,Hockenheim,Germany,49.3278,8.56583,103


In [0]:
from pyspark.sql.functions import lit

In [0]:
circuits_renamed = selected_df.withColumnRenamed('circuitId', 'circuit_id').\
    withColumnRenamed('circuitRef', 'circuit_ref').\
    withColumnRenamed('lat', 'latitude').\
    withColumnRenamed('lng', 'longitude').\
    withColumnRenamed('alt', 'altitude').\
    withColumn('data_source', lit(v_data_source)).\
    withColumn('file_date', lit(v_file_date))


In [0]:
display(circuits_renamed)

circuit_id,circuit_ref,name,location,country,latitude,longitude,altitude,data_source,file_date
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,testing,2021-03-21
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,testing,2021-03-21
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,testing,2021-03-21
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,testing,2021-03-21
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,testing,2021-03-21
6,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7,testing,2021-03-21
7,villeneuve,Circuit Gilles Villeneuve,Montreal,Canada,45.5,-73.5228,13,testing,2021-03-21
8,magny_cours,Circuit de Nevers Magny-Cours,Magny Cours,France,46.8642,3.16361,228,testing,2021-03-21
9,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,testing,2021-03-21
10,hockenheimring,Hockenheimring,Hockenheim,Germany,49.3278,8.56583,103,testing,2021-03-21


In [0]:
%run "../includes/common_functions"

In [0]:
circuits_final_df = ingest_time(circuits_renamed)

In [0]:
display(circuits_final_df)

circuit_id,circuit_ref,name,location,country,latitude,longitude,altitude,data_source,file_date,ingestion_time
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,testing,2021-03-21,2023-05-04T00:36:17.745+0000
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,testing,2021-03-21,2023-05-04T00:36:17.745+0000
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,testing,2021-03-21,2023-05-04T00:36:17.745+0000
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,testing,2021-03-21,2023-05-04T00:36:17.745+0000
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,testing,2021-03-21,2023-05-04T00:36:17.745+0000
6,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7,testing,2021-03-21,2023-05-04T00:36:17.745+0000
7,villeneuve,Circuit Gilles Villeneuve,Montreal,Canada,45.5,-73.5228,13,testing,2021-03-21,2023-05-04T00:36:17.745+0000
8,magny_cours,Circuit de Nevers Magny-Cours,Magny Cours,France,46.8642,3.16361,228,testing,2021-03-21,2023-05-04T00:36:17.745+0000
9,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,testing,2021-03-21,2023-05-04T00:36:17.745+0000
10,hockenheimring,Hockenheimring,Hockenheim,Germany,49.3278,8.56583,103,testing,2021-03-21,2023-05-04T00:36:17.745+0000


In [0]:
circuits_final_df.write.mode('overwrite').format("parquet").saveAsTable(f'f1_processed.circuits')

In [0]:
df = spark.read.parquet(f'{processed_folder_path}circuits')

In [0]:
%sql
SELECT *
FROM f1_processed.circuits

circuit_id,circuit_ref,name,location,country,latitude,longitude,altitude,data_source,file_date,ingestion_time
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,testing,2021-03-21,2023-05-04T00:36:25.642+0000
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,testing,2021-03-21,2023-05-04T00:36:25.642+0000
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,testing,2021-03-21,2023-05-04T00:36:25.642+0000
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,testing,2021-03-21,2023-05-04T00:36:25.642+0000
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,testing,2021-03-21,2023-05-04T00:36:25.642+0000
6,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7,testing,2021-03-21,2023-05-04T00:36:25.642+0000
7,villeneuve,Circuit Gilles Villeneuve,Montreal,Canada,45.5,-73.5228,13,testing,2021-03-21,2023-05-04T00:36:25.642+0000
8,magny_cours,Circuit de Nevers Magny-Cours,Magny Cours,France,46.8642,3.16361,228,testing,2021-03-21,2023-05-04T00:36:25.642+0000
9,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,testing,2021-03-21,2023-05-04T00:36:25.642+0000
10,hockenheimring,Hockenheimring,Hockenheim,Germany,49.3278,8.56583,103,testing,2021-03-21,2023-05-04T00:36:25.642+0000


In [0]:
display(df)

circuit_id,circuit_ref,name,location,country,latitude,longitude,altitude,data_source,file_date,ingestion_time
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,testing,2021-03-21,2023-05-04T00:36:25.642+0000
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,testing,2021-03-21,2023-05-04T00:36:25.642+0000
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,testing,2021-03-21,2023-05-04T00:36:25.642+0000
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,testing,2021-03-21,2023-05-04T00:36:25.642+0000
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,testing,2021-03-21,2023-05-04T00:36:25.642+0000
6,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7,testing,2021-03-21,2023-05-04T00:36:25.642+0000
7,villeneuve,Circuit Gilles Villeneuve,Montreal,Canada,45.5,-73.5228,13,testing,2021-03-21,2023-05-04T00:36:25.642+0000
8,magny_cours,Circuit de Nevers Magny-Cours,Magny Cours,France,46.8642,3.16361,228,testing,2021-03-21,2023-05-04T00:36:25.642+0000
9,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,testing,2021-03-21,2023-05-04T00:36:25.642+0000
10,hockenheimring,Hockenheimring,Hockenheim,Germany,49.3278,8.56583,103,testing,2021-03-21,2023-05-04T00:36:25.642+0000


In [0]:
dbutils.notebook.exit('Success!')

Success!