In [0]:
#Notes
#Columns needed from bronze - no Rally column because it is null
#match_id, SetNo, GameNo, PointNumber, PointWinner, RallyCount, SetWinner(to determine match winner)

In [0]:
from pyspark.sql.functions import col

catalog_name = 'workspace'
schema_name = 'bronze'
table_name = 'tennis_points_raw'
columns = ['match_id', 'SetNo', 'GameNo', 'PointNumber', 'PointWinner', 'RallyCount']
select_columns = ', '.join(columns)

query = f"SELECT {select_columns} FROM {catalog_name}.{schema_name}.{table_name}"

raw_df = spark.sql(query)

raw_df.printSchema()

df = (
    raw_df
    .withColumn("PointNumber", col("PointNumber").try_cast("int"))
)


# df.printSchema()
# df.count()
# display(df.limit(10))


In [0]:
first_point_df = df.filter(df.PointNumber == 1)

# display(first_point_df.limit(10))
# first_point_df.count()

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number, col

window_spec = Window.partitionBy('match_id').orderBy(col('PointNumber').desc())

rank_df = df_with_rank = df.withColumn('point_rank', row_number().over(window_spec))

last_point_df = rank_df.filter('point_rank = 1')

match_winner_df = (
    last_point_df
    .select(
        'match_id',
        last_point_df.PointWinner.alias('match_winner'),
        #last_point_df.PointNumber.alias('last_point_number')
    )
)

In [0]:
first_point_df = first_point_df.withColumnRenamed(
    "PointWinner", "first_point_winner"
)

final_df = (
    first_point_df
    .join(
        match_winner_df,
        on="match_id",
        how="left"
    )
)


In [0]:
#Create silver table
spark.sql("CREATE SCHEMA IF NOT EXISTS workspace.silver")

from delta.tables import DeltaTable

silver_table = DeltaTable.forName(
    spark,
    "workspace.silver.match_first_point_winner"
)

(
    silver_table.alias("target")
    .merge(
        final_df.alias("source"),
        """
        target.match_id = source.match_id
        """
    )
    .whenNotMatchedInsertAll()
    .execute()
)



In [0]:
# %sql
# select count(*) from workspace.silver.match_first_point_winner