In [0]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window

#Get all teams across seasons

Get all teams from players_raw and current teams in bootstrap-static. 

Manually identify teams for ids between seasons. "team_code" is common across seasons. Load to csv in fpl_raw to ingest to bronze and silver.

In [0]:
bronze_schema = "fpl_bronze_dev"
seasons = ['16_17', '17_18', '18_19', '19_20', '20_21', '21_22', '22_23', '23_24', '24_25']

#Load players_raw from all seasons
players_dfs = []
for season in seasons:
    df = spark.table(f"{bronze_schema}.players_raw_{season}"
                ).select(
                  F.lit(season).alias("season"),
                  F.col("team_code").alias("team_code"),
                  F.col("team").alias("team_id"),
                  F.col("first_name"),
                  F.col("second_name")
              )
    players_dfs.append(df)

players_all = players_dfs[0]
for df in players_dfs[1:]:
    players_all = players_all.unionByName(df)

#Extract one random player name per team per season for identification
window_spec = Window.partitionBy("season", "team_code", "team_id").orderBy(F.rand())

teams_base_df = players_all.withColumn(
        "full_name", 
        F.concat_ws(" ", F.col("first_name"), F.col("second_name"))
    ).withColumn(
        "row_num", 
        F.row_number().over(window_spec)
    ).filter(F.col("row_num") == 1
    ).select(
        "season", 
        "team_code", 
        "team_id", 
        "full_name"
    ).withColumn(
        "team_name", 
        F.lit(None).cast("string")
    ).withColumn(
        "team_name_short", 
        F.lit(None).cast("string")
    ).withColumn(
        "is_promoted", 
        F.lit(None).cast("boolean")
    ).withColumn(
        "is_relegated", 
        F.lit(None).cast("boolean")
    )

#Load teams_25_26 (not historic) and align schema
teams_25_26_df = spark.table(f"{bronze_schema}.teams_25_26"
).select(
        F.lit("25_26").alias("season"),
        F.col("code").alias("team_code"),
        F.col("id").alias("team_id"),
        F.lit(None).cast("string").alias("full_name"),  # No player name available
        F.col("name").alias("team_name"),
        F.col("short_name").alias("team_name_short"),
        F.lit(None).cast("boolean").alias("is_promoted"),
        F.lit(None).cast("boolean").alias("is_relegated")
    )

#Union all seasons + 25_26
teams_df = teams_base_df.unionByName(teams_25_26_df)

display(teams_df)

season,team_code,team_id,full_name,team_name,team_name_short,is_promoted,is_relegated
16_17,1,11,Anthony Martial,,,,
16_17,11,6,Joel Robles,,,,
16_17,110,14,Saido Berahino,,,,
16_17,13,8,Nampalys Mendy,,,,
16_17,14,9,Dejan Lovren,,,,
16_17,20,13,Florin Gardos,,,,
16_17,21,20,Aaron Cresswell,,,,
16_17,25,12,Adam Clayton,,,,
16_17,3,1,Héctor Bellerín,,,,
16_17,31,5,Ezekiel Fryers,,,,
