# FPL Points Predictor

Generates predictions for the upcoming gameweek automatically.


In [1]:
# ============================================================================
# STEP 1: UPDATE DATA - Get next GW fixtures and players
# ============================================================================
import sys
sys.path.insert(0, '..')

from scripts.update_data import main as update_data

# Run update to get next GW info
update_result = update_data()

CURRENT_GW = update_result['current_gw']
TARGET_GW = update_result['next_gw']
TARGET_SEASON = '2025-26'

print(f'\nðŸŽ¯ Will generate predictions for GW{TARGET_GW}')


FPL DATA UPDATE SCRIPT
Timestamp: 2025-12-12 15:55:15
Checking FPL API for current gameweek...
  Current GW: 15, Next GW: 16

Current GW: 15 (latest completed)
Next GW: 16 (to predict)

[1] Fetching GW16 fixtures from FPL API...
  Found 10 fixtures for GW16:
    Chelsea vs Everton
    Liverpool vs Brighton
    Burnley vs Fulham
    Arsenal vs Wolves
    Crystal Palace vs Man City
    Nott'm Forest vs Spurs
    Sunderland vs Newcastle
    West Ham vs Aston Villa
    Brentford vs Leeds
    Man Utd vs Bournemouth

  Teams playing in GW16: 20

[2] Finding players for 20 teams...
  Found 294 unique players

[3] Checking data status for GW15...
  âœ“ Found 10 matches, 20 team datasets

[4] Creating prediction list for GW16...
  âœ“ Created 294 prediction rows
    Teams: 15
    Players: 294

âœ“ Saved fixtures to: C:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\upcoming\gw16_fixtures.csv
âœ“ Saved prediction list to: C:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\upcoming\gw16_play

In [2]:
# ============================================================================
# STEP 2: RUN PREDICTION PIPELINE
# ============================================================================
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline

# Run the full pipeline
pipeline = FPLPredictionPipeline(data_dir='../data')
predictions = pipeline.run(target_gw=TARGET_GW, target_season=TARGET_SEASON, verbose=True)

# Store for analysis
test_df = predictions
print(f'\nâœ“ Generated {len(test_df)} predictions for GW{TARGET_GW}')


FPL PREDICTION PIPELINE - GW16 2025-26
[1] Loading data from raw CSV files...
  Loaded 49282 player-match records from 3334 team-matches
  Seasons: ['2021-22', '2022-23', '2023-24', '2024-25', '2025-26']
[2] Computing rolling features...
  Computed 105 rolling features
[3] Splitting train/test data...
  Mode: Prediction (GW16 is upcoming)
  Building prediction set for GW16...
    Using historical data up to GW15
    Found 10 fixtures, 20 teams
    Found 464 players in prior GW data
    Teams matched: 20/20
    Final test set: 464 players with features
  Train: 49282 records, Test: 464 records
[4] Training minutes model...
Training MinutesModel on 49282 samples (players who played 1+ min)...
  Actual distribution:
    Mean: 66.9, Median: 89.0
    90 min: 24940 (50.6%)
    60+ min: 34504 (70.0%)
    <30 min: 10289 (20.9%)

  Model predictions:
    Mean: 77.1, Median: 82.7
    MAE: 17.3
    RÂ²: 0.254
    Predicted 90 min: 3300 (6.7%)
    Predicted 60+ min: 42712 (86.7%)
[5] Training goal

In [None]:
# STEP 3: VIEW TOP 40 PLAYERS
import pandas as pd

cols = ['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']

top40 = test_df.nlargest(40, 'exp_total_pts')[cols].copy()
top40['pred_minutes'] = top40['pred_minutes'].round(0).astype(int)
top40['pred_exp_goals'] = (top40['pred_exp_goals']*top40["pred_minutes"]).round(2)
top40['pred_exp_assists'] = (top40['pred_exp_assists']*top40["pred_minutes"]).round(2)
top40['pred_cs_prob'] = (top40['pred_cs_prob'] * 100).round(0).astype(str) + '%'
top40['pred_defcon_prob'] = (top40['pred_defcon_prob'] * 100).round(0).astype(str) + '%'
top40['pred_bonus'] = top40['pred_bonus'].round(2)
top40['exp_total_pts'] = top40['exp_total_pts'].round(2)

top40.columns = ['Player', 'Team', 'Pos', 'Opp', 'Mins', 'xG', 'xA', 'CS', 'DC', 'Bonus', 'Pts']
print(f'TOP 40 PLAYERS - GW{TARGET_GW}')
print('=' * 100)
print(top40.to_string(index=False))


TOP 40 PLAYERS - GW16
               Player                    Team      Pos            Opp  Mins    xG    xA    CS    DC  Bonus   Pts
      Bruno Fernandes       manchester_united       CM    Bournemouth    88 60.72 54.56 48.0% 48.0%   3.00 11.73
        Matheus Cunha       manchester_united    FW,AM    Bournemouth    88 41.36 34.32 48.0% 22.0%   0.41  8.74
          Declan Rice                 arsenal       LM         Wolves    88 29.92 28.16 66.0% 66.0%   1.27  7.94
          Xavi Simons       tottenham_hotspur       AM  Nott'm Forest    59 32.45 17.11 21.0% 18.0%   2.62  7.82
       Anthony Gordon        newcastle_united       LW     Sunderland    65 44.85  3.25 38.0%  0.0%   1.54  7.53
        Mohamed Salah               liverpool    RW,RM       Brighton    90 54.00 30.60 46.0%  1.0%   0.78  7.26
    Gabriel MagalhÃ£es                 arsenal       CB         Wolves    89 10.68  8.01 66.0% 55.0%   0.30  7.00
        Patrick Dorgu       manchester_united       WB    Bournemouth    

In [4]:

test_df.loc[test_df['player_name']=="Erling Haaland"][['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']]

Unnamed: 0,player_name,team,position,opponent,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
123,Erling Haaland,manchester_city,FW,Crystal Palace,88.0,0.397007,0.153452,0.362259,0.003017,1.178515,5.232935


In [5]:

test_df.loc[test_df['player_name']=="Harry Wilson"][['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']]

Unnamed: 0,player_name,team,position,opponent,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
158,Harry Wilson,fulham,"RW,AM",Burnley,81.936394,0.428639,0.189457,0.101851,0.070418,0.673826,5.62808


In [6]:
# STEP 4: DETAILED TOP 10
print(f'DETAILED PREDICTIONS - TOP 10 FOR GW{TARGET_GW}')
print('=' * 100)

for i, (_, row) in enumerate(test_df.nlargest(10, 'exp_total_pts').iterrows()):
    home_away = 'H' if row.get('is_home', 0) == 1 else 'A'
    print(f'\n{i+1}. {row["player_name"]} ({row["team"]}) {home_away} vs {row["opponent"]}')
    print(f'   Position: {row["position"]} | Predicted Minutes: {row["pred_minutes"]:.0f}')
    print(f'   Goals:   xG={row["pred_exp_goals"]:.2f} -> {row["exp_goals_pts"]:.2f} pts')
    print(f'   Assists: xA={row["pred_exp_assists"]:.2f} -> {row["exp_assists_pts"]:.2f} pts')
    print(f'   CS: {row["pred_cs_prob"]*100:.0f}% -> {row["exp_cs_pts"]:.2f} pts')
    print(f'   Defcon: {row["pred_defcon_prob"]*100:.0f}% -> {row["exp_defcon_pts"]:.2f} pts')
    print(f'   Bonus: {row.get("pred_bonus", 0):.2f} pts')
    print(f'   Appearance: {row["exp_appearance_pts"]:.0f} pts')
    print(f'   TOTAL: {row["exp_total_pts"]:.2f} pts')


DETAILED PREDICTIONS - TOP 10 FOR GW16

1. Bruno Fernandes (manchester_united) H vs Bournemouth
   Position: CM | Predicted Minutes: 88
   Goals:   xG=0.69 -> 3.44 pts
   Assists: xA=0.62 -> 1.86 pts
   CS: 48% -> 0.48 pts
   Defcon: 48% -> 0.95 pts
   Bonus: 3.00 pts
   Appearance: 2 pts
   TOTAL: 11.73 pts

2. Matheus Cunha (manchester_united) H vs Bournemouth
   Position: FW,AM | Predicted Minutes: 88
   Goals:   xG=0.47 -> 2.82 pts
   Assists: xA=0.39 -> 1.17 pts
   CS: 48% -> 1.90 pts
   Defcon: 22% -> 0.44 pts
   Bonus: 0.41 pts
   Appearance: 2 pts
   TOTAL: 8.74 pts

3. Declan Rice (arsenal) H vs Wolves
   Position: LM | Predicted Minutes: 88
   Goals:   xG=0.34 -> 1.72 pts
   Assists: xA=0.32 -> 0.97 pts
   CS: 66% -> 0.66 pts
   Defcon: 66% -> 1.32 pts
   Bonus: 1.27 pts
   Appearance: 2 pts
   TOTAL: 7.94 pts

4. Xavi Simons (tottenham_hotspur) A vs Nott'm Forest
   Position: AM | Predicted Minutes: 59
   Goals:   xG=0.55 -> 2.77 pts
   Assists: xA=0.29 -> 0.86 pts
   CS: 21

In [7]:
# STEP 5: BREAKDOWN BY POSITION
print(f'\nTOP PLAYERS BY POSITION - GW{TARGET_GW}')
print('=' * 80)

def get_fpl_pos(pos):
    if pd.isna(pos): return 'MID'
    pos = str(pos).upper()
    if 'GK' in pos: return 'GK'
    elif any(p in pos for p in ['CB', 'LB', 'RB', 'WB', 'DF']): return 'DEF'
    elif any(p in pos for p in ['FW', 'CF', 'ST', 'LW', 'RW']): return 'FWD'
    return 'MID'

test_df['fpl_pos'] = test_df['position'].apply(get_fpl_pos)

for pos in ['GK', 'DEF', 'MID', 'FWD']:
    print(f'\n--- {pos} ---')
    pos_df = test_df[test_df['fpl_pos'] == pos].nlargest(5, 'exp_total_pts')
    for _, row in pos_df.iterrows():
        print(f"  {row['player_name']:20s} ({row['team']:15s}) vs {row['opponent']:15s} -> {row['exp_total_pts']:.2f} pts")



TOP PLAYERS BY POSITION - GW16

--- GK ---
  David Raya           (arsenal        ) vs Wolves          -> 4.78 pts
  Senne Lammens        (manchester_united) vs Bournemouth     -> 4.33 pts
  Altay BayÄ±ndÄ±r       (manchester_united) vs Bournemouth     -> 4.24 pts
  Alisson              (liverpool      ) vs Brighton        -> 4.10 pts
  Giorgi Mamardashvili (liverpool      ) vs Brighton        -> 4.09 pts

--- DEF ---
  Gabriel MagalhÃ£es    (arsenal        ) vs Wolves          -> 7.00 pts
  Patrick Dorgu        (manchester_united) vs Bournemouth     -> 6.92 pts
  Nico Oâ€™Reilly        (manchester_city) vs Crystal Palace  -> 6.76 pts
  Diogo Dalot          (manchester_united) vs Bournemouth     -> 6.46 pts
  JurriÃ«n Timber       (arsenal        ) vs Wolves          -> 6.31 pts

--- MID ---
  Bruno Fernandes      (manchester_united) vs Bournemouth     -> 11.73 pts
  Declan Rice          (arsenal        ) vs Wolves          -> 7.94 pts
  Xavi Simons          (tottenham_hotspur) vs Not

In [8]:
# STEP 6: SAVE PREDICTIONS
from pathlib import Path

output_dir = Path('../data/predictions')
output_dir.mkdir(exist_ok=True)

output_path = output_dir / f'gw{TARGET_GW}_{TARGET_SEASON}_predictions.csv'
test_df.to_csv(output_path, index=False)

print(f'Saved predictions to: {output_path}')
print(f'Total players: {len(test_df)}')


Saved predictions to: ..\data\predictions\gw16_2025-26_predictions.csv
Total players: 464


In [9]:
# Show Arsenal's CS prediction input features for GW16
from models.clean_sheet_model import CleanSheetModel

features = CleanSheetModel.FEATURES

# Find Arsenal in test_df
arsenal = test_df[test_df['team'].str.contains('manchester_united', case=False, na=False)].iloc[0]

print(f"fulham vs {arsenal['opponent']} - CS Prob: {arsenal['pred_cs_prob']:.1%}")
print(f"\n{'='*60}")
print("CS MODEL INPUT FEATURES:")
print(f"{'='*60}")

for feat in features:
    if feat in arsenal.index:
        val = arsenal[feat]
        print(f"  {feat}: {val}")
    else:
        print(f"  {feat}: NOT IN DATA")

fulham vs Bournemouth - CS Prob: 47.5%

CS MODEL INPUT FEATURES:
  team_goals_conceded_last1: 1.0
  team_goals_conceded_roll3: 1.0
  team_goals_conceded_roll5: 1.2
  team_goals_conceded_roll10: 1.4
  team_goals_conceded_roll30: 1.4
  team_xga_roll5: 1.1400000000000001
  team_clean_sheets_roll5: 0.0
  team_clean_sheets_roll10: 1.0
  team_clean_sheets_roll30: 5.0
  team_xga_roll5_home: 1.1400000000000001
  team_xga_roll5_away: 1.92
  opp_goals_scored_last1: 0.0
  opp_goals_scored_roll3: 0.6666666666666666
  opp_goals_scored_roll5: 0.8
  opp_goals_scored_roll10: 0.8
  opp_goals_scored_roll30: 0.8
  opp_xg_roll5: NOT IN DATA
  opp_xg_roll5_home: 0.7200000000000001
  opp_xg_roll5_away: 1.4600000000000002
  xga_xg_ratio: 0.7307692307692307
  defensive_advantage: 0.32000000000000006
  is_home: 1
  team_encoded: 16
  opponent_encoded: 2


In [10]:
# Show Arsenal's CS prediction input features for GW16
from models.clean_sheet_model import CleanSheetModel

features = CleanSheetModel.FEATURES

# Find Arsenal in test_df
arsenal = test_df[test_df['team'].str.contains('arsenal', case=False, na=False)].iloc[0]

print(f"Arsenal vs {arsenal['opponent']} - CS Prob: {arsenal['pred_cs_prob']:.1%}")
print(f"\n{'='*60}")
print("CS MODEL INPUT FEATURES:")
print(f"{'='*60}")

for feat in features:
    if feat in arsenal.index:
        val = arsenal[feat]
        print(f"  {feat}: {val}")
    else:
        print(f"  {feat}: NOT IN DATA")

Arsenal vs Wolves - CS Prob: 65.5%

CS MODEL INPUT FEATURES:
  team_goals_conceded_last1: 2.0
  team_goals_conceded_roll3: 1.0
  team_goals_conceded_roll5: 1.2
  team_goals_conceded_roll10: 0.6
  team_goals_conceded_roll30: 0.6666666666666666
  team_xga_roll5: 0.36
  team_clean_sheets_roll5: 2.0
  team_clean_sheets_roll10: 5.0
  team_clean_sheets_roll30: 14.0
  team_xga_roll5_home: 0.36
  team_xga_roll5_away: 0.4800000000000001
  opp_goals_scored_last1: 1.0
  opp_goals_scored_roll3: 0.3333333333333333
  opp_goals_scored_roll5: 0.2
  opp_goals_scored_roll10: 0.2
  opp_goals_scored_roll30: 0.2
  opp_xg_roll5: NOT IN DATA
  opp_xg_roll5_home: 0.9599999999999997
  opp_xg_roll5_away: 1.5
  xga_xg_ratio: 0.22499999999999998
  defensive_advantage: 1.1400000000000001
  is_home: 1
  team_encoded: 0
  opponent_encoded: 26


# FPL Points Predictor

Uses predicted probabilities to calculate expected FPL points:
- P(Goal) Ã— Goal Points (by position)
- P(Assist) Ã— 3
- P(Clean Sheet) Ã— CS Points (by position)
- P(Defcon) Ã— 2 (hitting defensive action threshold)
- Predicted Bonus Points (from BonusModel)
- Expected Minutes â†’ Appearance Points

## FPL Scoring Rules
| Action | GK | DEF | MID | FWD |
|--------|----|----|-----|-----|
| Goal | 6 | 6 | 5 | 4 |
| Assist | 3 | 3 | 3 | 3 |
| Clean Sheet | 4 | 4 | 1 | 0 |
| Defcon (10+ DEF / 12+ MID,FWD) | 0 | 2 | 2 | 2 |
| 60+ mins | 2 | 2 | 2 | 2 |
| 1-59 mins | 1 | 1 | 1 | 1 |
| Bonus | 1-3 | 1-3 | 1-3 | 1-3 |
