## Produce the driver standings

In [0]:
%run "../includes/configurations"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")

In [0]:
from pyspark.sql.functions import sum, count, when, col, desc

driver_standings_df = race_results_df \
    .groupBy("race_year", "driver_name", "driver_nationality", "team") \
    .agg(sum("points").alias("total_points"),
        count(when(col("position") == 1, True)).alias("wins")) 

In [0]:
display(driver_standings_df.filter("race_year = 2000").orderBy(desc("total_points")))

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import desc, rank

driver_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))

final_df = driver_standings_df.withColumn("rank", rank().over(driver_rank_spec))

In [0]:
display(final_df.filter("race_year = 2015"))

In [0]:
final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_presentation.driver_standings")