### Product Constructor Standings

In [0]:
dbutils.widgets.text('p_file_date', '2021-03-21')
v_file_date = dbutils.widgets.get('p_file_date')

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
race_results_list = spark.read.format('delta').load(f"{presentation_folder_path}/race_results")\
                                .filter(f"file_date = '{v_file_date}'") \
                                .select('race_year') \
                                .distinct().collect()
race_results_list = [item.race_year for item in race_results_list]

In [0]:
from pyspark.sql.functions import sum, count, when, col
race_results_df = spark.read.format('delta').load(f"{presentation_folder_path}/race_results") \
                            .filter(col('race_year').isin(race_results_list))
display(race_results_df)

In [0]:
constructor_standings_df = race_results_df \
                    .groupBy("race_year",  "team") \
                    .agg(sum("points").alias('total_points'), count(when(col("position") == 1, True)).alias('wins'))
display(constructor_standings_df)

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import rank, desc

constructor_rank_spec = Window.partitionBy('race_year').orderBy(desc('total_points'), desc('wins'))

constructors_rank_df = constructor_standings_df.withColumn('rank', rank().over(constructor_rank_spec))
display(constructors_rank_df)

In [0]:
#incremental_load(constructors_rank_df, 'f1_presentation', 'constructor_standings', 'race_year')

In [0]:
folder_path = "/mnt/formula1dl244/presentation/constructor_standings"
merge_condition = "tgt.team = src.team AND tgt.race_year = src.race_year"
partition_column = 'race_year'
merge_delta_data(constructors_rank_df, 'f1_presentation', 'constructor_standings', folder_path, merge_condition, partition_column)

In [0]:
%sql
SELECT * FROM f1_presentation.constructor_standings