###Driver Standings Metric Transformation

0. Import Configuration

In [0]:
%run ../Includes/Configuration

1. Imports

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import col, current_timestamp, when, lit, sum, desc, rank

2. Read Appropriate Datasets
- Required Datasets are
  - Drivers
  - Constructors
  - Races
  - Results

In [0]:
# Drivers Dataset

drivers_df = spark.read.parquet(f'{silver_container_path}/drivers') \
    .select('driver_id', 'name', 'nationality') \
    .withColumnRenamed('name', 'driver_name') \
    .withColumnRenamed('nationality', 'driver_nationality')

In [0]:
# Constructors Data

constructor_df = spark.read.parquet(f'{silver_container_path}/constructors') \
    .select('constructor_id', 'name') \
    .withColumnRenamed('name', 'team')

In [0]:
# Races Data

races_df = spark.read.parquet(f'{silver_container_path}/races') \
    .select('race_id', 'race_year')


In [0]:
# Results Data

results_df = spark.read.parquet(f'{silver_container_path}/results') \
    .select('result_id', 'race_id', 'driver_id', 'constructor_id', 'position_order', 'points')

3. Join Dataframes to get the desired data

In [0]:
driver_standings_raw_df = results_df.join(races_df, 'race_id') \
    .join(drivers_df, 'driver_id') \
    .join(constructor_df, 'constructor_id') \
    .select('driver_id', 'driver_name', 'driver_nationality', 'team', 'race_year', 'position_order', 'points')

4. Aggreagtions

In [0]:
driver_rank_spec = Window.partitionBy('race_year').orderBy(desc('points'), desc('wins'))

driver_standings_df = driver_standings_raw_df \
    .withColumn('position_order', when(col('position_order') == 1, lit(1)).otherwise(lit(0))) \
    .groupBy('driver_id', 'driver_name', 'driver_nationality', 'team', 'race_year') \
    .agg(sum('position_order').alias('wins'), sum('points').alias('points')) \
    .drop('driver_id') \
    .withColumn('rank', rank().over(driver_rank_spec))

5. Writing the dataframe to gold layer

In [0]:
driver_standings_df.write.mode('overwrite').parquet(f'{gold_container_path}/driver_standings')