In [0]:
from pyspark.sql import functions as F, types as T
from delta.tables import DeltaTable
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.types import *

#Functions

In [0]:
def write_to_table(
    df: DataFrame,
    table_name: str,
    mode: str = "overwrite",
    merge_schema: bool = True,
    partition_by: list[str] = None,
    path: str = None,
    save_as_table: bool = True
) -> None:
    """
    Generalised Delta write helper for bronze layer.

    Parameters:
    - df (DataFrame): Spark DataFrame to write.
    - table_name (str): Name of the Delta table (used if save_as_table=True).
    - mode (str): Write mode ('overwrite', 'append', 'ignore', 'error', etc.).
    - merge_schema (bool): Whether to merge schema on write.
    - partition_by (list[str], optional): List of columns to partition by.
    - path (str, optional): Path to save the Delta table (used if save_as_table=False).
    - save_as_table (bool): If True, saves as managed table; else saves to path.

    Raises:
    - ValueError: If neither save_as_table nor path is properly specified.
    """

    df_with_ts = df.withColumn("last_updated", F.current_timestamp())

    writer = df_with_ts.write.format("delta").mode(mode)

    if merge_schema:
        writer = writer.option("mergeSchema", "true")
    elif mode == "overwrite":
        writer = writer.option("overwriteSchema", "true")

    if partition_by:
        writer = writer.partitionBy(*partition_by)

    if save_as_table:
        writer.saveAsTable(table_name)
    elif path:
        writer.save(path)
    else:
        raise ValueError("Either save_as_table must be True or a path must be provided.")

In [0]:
try:
    ENV = dbutils.widgets.get("ENV")
except Exception:
    ENV = "dev"

#ensure valid ENV
valid_envs = {"dev", "test", "prod"}

# Validate ENV
if ENV not in valid_envs:
    print(f"Invalid ENV: {ENV}. Must be one of {valid_envs}. Exiting notebook.")
    dbutils.notebook.exit("Invalid ENV")
    
bronze_schema = f"fpl_bronze_{ENV}"
silver_schema = f"fpl_silver_{ENV}"

fixtures_df = spark.table(f"{silver_schema}.fixtures")

#Write Seasons

Get seasons from fixtures table in silver layer. 

Currently has season key e.g. 202526, season_short (25_26) and first/last kick off times per season. 

Possible additions to add season results in future? Winner, Champions league, relegated etc.

In [0]:
seasons_df = fixtures_df.select(
        "season_key", 
        "kickoff_time"
    ).withColumn(
        "season_short",
        F.concat_ws("_",
            F.col("season_key").substr(3, 2),  # '20' from '202021'
            F.col("season_key").substr(5, 2)   # '21' from '202021'
        )
    ).groupBy("season_key", "season_short"
    ).agg(
        F.min("kickoff_time").alias("season_start"),
        F.max("kickoff_time").alias("season_end")
    )

write_to_table(
    df = seasons_df,
    table_name = f"{silver_schema}.seasons",
    mode = "overwrite",
    merge_schema  = False
)

#Write Gameweeks

Get gameweeks from fixtures table in silver layer.

In [0]:
gameweek_df = fixtures_df.select(
        "season_key", 
        "gameweek_key",
        "kickoff_time",
        "gameweek"
    ).groupBy("gameweek_key", "season_key", "gameweek"
              ).agg(
        F.min("kickoff_time").alias("gameweek_start"),
        F.max("kickoff_time").alias("gameweek_end")
    )

write_to_table(
    df = gameweek_df,
    table_name = f"{silver_schema}.gameweeks",
    mode = "overwrite",
    merge_schema  = False
)