##### 1. Read dataframes : Constructors, Circuits, Races, Results, Drivers
##### 2. race_circuits_df: Join on (race_df, circuit_df) circuit_id: race_circuits_df
##### 3. race_results_df: Joins on (race_circuits_df, results_df): Race_id (race_circuits), driver_id (drivers_df), constructor_id (contructors_df) 

## Incremental load changes
##### 1. Add
%run "../includes/common_functions"
##### 2. Add
dbutils.widgets.text("p_file_date", "2021-03-21")\
v_file_date = dbutils.widgets.get("p_file_date")


##### 3. results_df 
###### &nbsp;a. Add a filter of a file date
.filter(f"file_date = '{v_file_date}'")
###### &nbsp;b. change column name 'race_id' to 'result_race_id'
.withColumnRenamed("race_id", "result_race_id")

##### 4. race_results_df
###### change results_df.race_id to results_df.result_race_id

##### 5. final_df
###### add 'race_id' column to the selected columns

##### 6. Overwrite:
overwrite_partition(final_df, 'f1_presentation', 'race_results', 'race_id')

##### 7. Run p_file_date 3 times (2021-03-21, 2021-03-28, 20021-04-18)



## Amendments for Incremental load of driver standings
##### 1. DROP TABLE f1_presentation.race_results;
##### 2. results_df: Rename "file_date" to "result_file_date"
.withColumnRenamed("file_date", "result_file_date")
##### 3. final_df
###### &nbsp;a. Add 'result_file_date' to the selected columns<ul>
###### &nbsp;b. change column name 'result_file_date' to 'file_date'
.withColumnRenamed("result_file_date", "file_date")
##### 4. Run p_file_date 3 times (2021-03-21, 2021-03-28, 20021-04-18)




In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
v_file_date

#### Constructors

In [0]:
constructors_df = spark.read.parquet(f"{processed_folder_path}/constructors") \
.withColumnRenamed("name", "team")
display(constructors_df)

#### Circuits

In [0]:
circuits_df = spark.read.parquet(f"{processed_folder_path}/circuits") \
.withColumnRenamed("location", "circuit_location") 
display(circuits_df)

#### Races

In [0]:
races_df = spark.read.parquet(f"{processed_folder_path}/races")\
.withColumnRenamed("name","race_name")\
.withColumnRenamed("race_timestamp", "race_date")
display(races_df)

#### Results

In [0]:
results_df = spark.read.parquet(f"{processed_folder_path}/results") \
.filter(f"file_date = '{v_file_date}'") \
.withColumnRenamed("time", "race_time") \
.withColumnRenamed("race_id", "result_race_id") \
.withColumnRenamed("file_date", "result_file_date")
display(results_df)

#### Drivers

In [0]:
drivers_df = spark.read.parquet(f"{processed_folder_path}/drivers") \
.withColumnRenamed("number", "driver_number") \
.withColumnRenamed("name", "driver_name") \
.withColumnRenamed("nationality", "driver_nationality") 
display(drivers_df)

In [0]:
race_circuits_df = races_df.join(circuits_df, races_df.circuit_id == circuits_df.circuit_id, "inner")\
.select(races_df.race_id, races_df.race_year, races_df.race_name, races_df.race_date, circuits_df.circuit_location)    
display(race_circuits_df)

In [0]:
race_results_df = results_df.join(race_circuits_df, results_df.result_race_id == race_circuits_df.race_id) \
                            .join(drivers_df, results_df.driver_id == drivers_df.driver_id) \
                            .join(constructors_df, results_df.constructor_id == constructors_df.constructor_id)
display(race_results_df)                            

In [0]:
from pyspark.sql.functions import current_timestamp

In [0]:
final_df = race_results_df.select("race_id", "race_year", "race_name", "race_date", "circuit_location", "driver_name", "driver_number", "driver_nationality", "team", "grid", "fastest_lap", "race_time", "points", "position", "result_file_date") \
.withColumn("created_date", current_timestamp()) \
.withColumnRenamed("result_file_date", "file_date")
display(final_df)

In [0]:
overwrite_partition(final_df, 'f1_presentation', 'race_results', 'race_year')

In [0]:
# %sql
# SELECT  * FROM f1_presentation.race_results;

In [0]:
# %sql
# DROP TABLE f1_presentation.race_results;