In [0]:
# Step 1: Get the max fixture_key and its season_key from team_features
team_features_table = "fpl_feature_dev.team_features"
team_features_df = spark.read.table(team_features_table)

max_gameweek_row = team_features_df.agg(F.max("gameweek_key").alias("max_gameweek_key")).collect()[0]
next_gameweek_key = max_gameweek_row["max_gameweek_key"] + 1

# Step 2: Find the next fixture(s) in fpl_silver_dev.fixtures for the same season_key
fixtures_table = "fpl_silver_dev.fixtures"
fixtures_df = spark.read.table(fixtures_table)

# Find the next fixture_key(s) for the same season_key
next_gameweek_df = fixtures_df.filter(
    F.col("gameweek_key") == next_gameweek_key
)

display(next_gameweek_df)

# You can now use next_fixture_df to generate features for prediction

In [0]:
# Get next fixture(s) info
next_fixtures = next_fixture_df.select("fixture_key", "home_team_key", "away_team_key").collect()

rolling_window = 5
rolling_features = []

for row in next_fixtures:
    fixture_key = row["fixture_key"]
    for team_col in ["home_team_key", "away_team_key"]:
        team_key = row[team_col]
        # Get last 5 games for this team (excluding the next fixture)
        team_recent = team_features_df.filter(
            (F.col("team_key") == team_key) &
            (F.col("fixture_key") < fixture_key) &
            (F.col("season_key") == season_key)
        ).orderBy(F.col("fixture_key").desc()).limit(rolling_window)
        # Calculate mean for all rolling stats columns
        agg_exprs = [F.mean(c).alias(f"rolling_{c}_mean") for c in team_recent.columns if c not in ["team_key", "fixture_key", "season_key", "gameweek_key"]]
        team_rolling = team_recent.agg(*agg_exprs).withColumn("team_key", F.lit(team_key)).withColumn("fixture_key", F.lit(fixture_key))
        rolling_features.append(team_rolling)

# Union all team rolling features into a single DataFrame
if rolling_features:
    from functools import reduce
    team_rolling_features_df = reduce(lambda a, b: a.unionByName(b), rolling_features)
    display(team_rolling_features_df)
else:
    print("No rolling features found for next fixtures.")