## 2020 Abu Dhabi Grand Prix Results

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

#### Step 1: Obtain races and filter by year 2020

In [0]:
races_df = spark.read.parquet(f"{processed_folder_path}/races")
races_2020_df = races_df.filter(races_df.race_year == '2020')

#### Step 2: Obtain circuits and join to races

In [0]:
circuits_df = spark.read.parquet(f"{processed_folder_path}/circuits")
circuits_abu_dhabi_df = circuits_df.filter(circuits_df.location == 'Abu Dhabi')
race_circuits_df = races_df.join(circuits_df, races_df.circuit_id == circuits_df.circuit_id, 'inner').select(races_df.race_year, races_df.race_id, races_df.name.alias('race_name'), races_df.race_timestamp.alias('race_date'), circuits_df.location.alias('circuit_location'))

#### Step 3: Obtain constructors

In [0]:
constructors_df = spark.read.parquet(f"{processed_folder_path}/constructors")
constructors_df = constructors_df.select(constructors_df.constructor_id, constructors_df.name.alias('team'))

#### Step 4: Obtain drivers

In [0]:
drivers_df = spark.read.parquet(f"{processed_folder_path}/drivers")
drivers_df = drivers_df.select(drivers_df.driver_id, drivers_df.name.alias('driver_name'), drivers_df.number.alias('driver_number'), drivers_df.nationality.alias('driver_nationality'))

#### Step 5: Obtain results and join all together

In [0]:
results_df = spark.read.parquet(f'{processed_folder_path}/results')
results_df = results_df.join(constructors_df, results_df.constructor_id == constructors_df.constructor_id, 'inner').select(results_df.grid, results_df.position,results_df.fastest_lap, results_df.time.alias('race_time'), results_df.points, results_df.driver_id,results_df.race_id, results_df.constructor_id)\
  .join(drivers_df, results_df.driver_id == drivers_df.driver_id)\
  .join(race_circuits_df, results_df.race_id == race_circuits_df.race_id)\
  .join(constructors_df, 'constructor_id')\
  .drop('driver_id', 'constructor_id')

#### Step 6: Rearrange column order

In [0]:
results_final_df = add_ingestion_date(results_df.select('race_year', 'race_name', 'race_date', 'circuit_location', 'driver_name', 'driver_number', 'driver_nationality', 'team', 'grid', 'fastest_lap', 'race_time', 'points', 'position'))
results_final_df = results_final_df.orderBy(results_final_df.points.desc())
    

In [0]:
display(results_final_df)

#### Step 7: Save to processed storage layer in parquet

In [0]:
results_final_df.write\
        .mode('overwrite')\
        .parquet(f'{presentation_folder_path}/race_results')