#### Produce driver standings

In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")


In [0]:
from pyspark.sql.functions import sum, when, count, col


In [0]:
driver_standings_df = race_results_df \
    .groupBy("race_year","driver_name","driver_nationality", "team") \
        .agg(sum("points").alias("total_points"),
             count(when(col("position")==1,True)).alias("wins"))

In [0]:
from pyspark.sql.functions import desc


In [0]:
display(driver_standings_df.filter("race_year = 2020").orderBy(desc("total_points")))


race_year,driver_name,driver_nationality,team,total_points,wins
2020,Lewis Hamilton,British,Mercedes,347.0,11
2020,Valtteri Bottas,Finnish,Mercedes,223.0,2
2020,Max Verstappen,Dutch,Red Bull,214.0,2
2020,Sergio Pérez,Mexican,Racing Point,125.0,1
2020,Daniel Ricciardo,Australian,Renault,119.0,0
2020,Carlos Sainz,Spanish,McLaren,105.0,0
2020,Alexander Albon,Thai,Red Bull,105.0,0
2020,Charles Leclerc,Monegasque,Ferrari,98.0,0
2020,Lando Norris,British,McLaren,97.0,0
2020,Lance Stroll,Canadian,Racing Point,75.0,0


In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import desc, rank, asc

In [0]:
driver_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))
final_df = driver_standings_df.withColumn("rank", rank().over(driver_rank_spec))

In [0]:
display(final_df.filter("race_year = 2020"))


race_year,driver_name,driver_nationality,team,total_points,wins,rank
2020,Lewis Hamilton,British,Mercedes,347.0,11,1
2020,Valtteri Bottas,Finnish,Mercedes,223.0,2,2
2020,Max Verstappen,Dutch,Red Bull,214.0,2,3
2020,Sergio Pérez,Mexican,Racing Point,125.0,1,4
2020,Daniel Ricciardo,Australian,Renault,119.0,0,5
2020,Carlos Sainz,Spanish,McLaren,105.0,0,6
2020,Alexander Albon,Thai,Red Bull,105.0,0,6
2020,Charles Leclerc,Monegasque,Ferrari,98.0,0,8
2020,Lando Norris,British,McLaren,97.0,0,9
2020,Pierre Gasly,French,AlphaTauri,75.0,1,10


In [0]:
final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/driver_standings")
