###Constructor Standings Transformation

####Importing Configuration

In [0]:
%run ../Includes/Configuration

0. Imports

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import col, when, lit, desc, sum, rank

1. Read Appropriate Datasets
- Required Datasets
  - Constructors
  - Races
  - Results

In [0]:
# Constructors Data

constructor_df = spark.read.parquet(f'{silver_container_path}/constructors') \
    .select('constructor_id', 'name') \
    .withColumnRenamed('name', 'team')

In [0]:
# Races Data

races_df = spark.read.parquet(f'{silver_container_path}/races') \
    .select('race_id', 'race_year')

In [0]:
# Results Data

results_df = spark.read.parquet(f'{silver_container_path}/results') \
    .select(col('result_id'), col('race_id'), col('constructor_id'), col('position_order'), col('points')) \
    .withColumn('position_order', when(col('position_order') == 1, lit(1)).otherwise(lit(0)))

2. Join and aggregate the datasets to get the desired data

In [0]:
constructor_standing_spec = Window.partitionBy('race_year').orderBy(desc('points'), desc('wins'))

constructor_standings_df = results_df.join(races_df, "race_id") \
                                         .join(constructor_df, "constructor_id") \
                                         .groupBy('constructor_id', 'team', 'race_year') \
                                         .agg(sum('points').alias('points'), sum('position_order').alias('wins')) \
                                         .withColumn('rank', rank().over(constructor_standing_spec))

3. Writing dataframe to gold layer

In [0]:
constructor_standings_df.write.mode('overwrite').parquet(f'{gold_container_path}/constructor_standings')