# FPL Points Predictor

Generates predictions for the upcoming gameweek automatically.


In [1]:
# ============================================================================
# STEP 1: UPDATE DATA - Get next GW fixtures and players
# ============================================================================
import sys
sys.path.insert(0, '..')

from scripts.update_data import main as update_data

# Run update to get next GW info
update_result = update_data()

CURRENT_GW = update_result['current_gw']
TARGET_GW = update_result['next_gw']
TARGET_SEASON = '2025-26'

print(f'\nüéØ Will generate predictions for GW{TARGET_GW}')


FPL DATA UPDATE SCRIPT
Timestamp: 2025-12-17 19:32:28
Checking FPL API for current gameweek...
  Current GW: 16, Next GW: 17

Current GW: 16 (latest completed)
Next GW: 17 (to predict)

[1] Fetching GW17 fixtures from FPL API...
  Found 10 fixtures for GW17:
    Newcastle vs Chelsea
    Bournemouth vs Burnley
    Brighton vs Sunderland
    Man City vs West Ham
    Wolves vs Brentford
    Spurs vs Liverpool
    Everton vs Arsenal
    Leeds vs Crystal Palace
    Aston Villa vs Man Utd
    Fulham vs Nott'm Forest

  Teams playing in GW17: 20

[2] Finding players for 20 teams...
  Found 295 unique players

[3] Checking data status for GW16...
  ‚úì Found 10 matches, 20 team datasets

[4] Creating prediction list for GW17...
  ‚úì Created 295 prediction rows
    Teams: 15
    Players: 295

‚úì Saved fixtures to: C:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\upcoming\gw17_fixtures.csv
‚úì Saved prediction list to: C:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\upcoming\gw17_play

In [2]:
# ============================================================================
# STEP 2: RUN PREDICTION PIPELINE
# ============================================================================
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline

# Run the full pipeline
pipeline = FPLPredictionPipeline(data_dir='../data')
predictions = pipeline.run(target_gw=TARGET_GW, target_season=TARGET_SEASON, verbose=True)

# Store for analysis
test_df = predictions
print(f'\n‚úì Generated {len(test_df)} predictions for GW{TARGET_GW}')


FPL PREDICTION PIPELINE - GW17 2025-26
[1] Loading data from raw CSV files...
  Loaded 49583 player-match records from 3354 team-matches
  Seasons: ['2021-22', '2022-23', '2023-24', '2024-25', '2025-26']
[2] Computing rolling features...
  Computed 119 rolling features
[3] Splitting train/test data...
  Mode: Prediction (GW17 is upcoming)
  Building prediction set for GW17...
    Using historical data up to GW16
    Found 10 fixtures, 20 teams
    Found 468 players in prior GW data
    Teams matched: 20/20
    Final test set: 468 players with features
  Train: 49583 records, Test: 468 records
[4] Training minutes model...
Training MinutesModel on 49583 samples (players who played 1+ min)...
  Actual distribution:
    Mean: 66.8, Median: 89.0
    90 min: 25088 (50.6%)
    60+ min: 34713 (70.0%)
    <30 min: 10355 (20.9%)

  Model predictions:
    Mean: 77.1, Median: 82.7
    MAE: 17.3
    R¬≤: 0.254
    Predicted 90 min: 3262 (6.6%)
    Predicted 60+ min: 42928 (86.6%)
[5] Training goal

In [3]:
# STEP 3: VIEW TOP 40 PLAYERS
import pandas as pd

cols = ['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']

top40 = test_df.nlargest(40, 'exp_total_pts')[cols].copy()
top40['pred_minutes'] = top40['pred_minutes'].round(0).astype(int)
top40['pred_exp_goals'] = (top40['pred_exp_goals']*(top40["pred_minutes"]/90)).round(2)
top40['pred_exp_assists'] = (top40['pred_exp_assists']*(top40["pred_minutes"]/90)).round(2)
top40['pred_cs_prob'] = (top40['pred_cs_prob'] * 100).round(0).astype(str) + '%'
top40['pred_defcon_prob'] = (top40['pred_defcon_prob'] * 100).round(0).astype(str) + '%'
top40['pred_bonus'] = top40['pred_bonus'].round(2)
top40['exp_total_pts'] = top40['exp_total_pts'].round(2)

top40.columns = ['Player', 'Team', 'Pos', 'Opp', 'Mins', 'xG', 'xA', 'CS', 'DC', 'Bonus', 'Pts']
print(f'TOP 40 PLAYERS - GW{TARGET_GW}')
print('=' * 100)
top40
#print(top40.to_string(index=False))


TOP 40 PLAYERS - GW17


Unnamed: 0,Player,Team,Pos,Opp,Mins,xG,xA,CS,DC,Bonus,Pts
57,Bruno Fernandes,manchester_united,CM,Aston Villa,89,0.53,0.57,13.0%,48.0%,2.89,10.37
124,Erling Haaland,manchester_city,FW,West Ham,88,1.12,0.26,38.0%,0.0%,2.89,10.29
60,Bukayo Saka,arsenal,RW,Everton,82,0.39,0.48,46.0%,22.0%,2.84,9.45
166,Hugo Ekitike,liverpool,FW,Spurs,76,0.76,0.15,36.0%,1.0%,2.53,8.69
304,Matheus Cunha,manchester_united,"FW,AM",Aston Villa,88,0.44,0.29,13.0%,23.0%,1.96,8.52
373,Phil Foden,manchester_city,AM,West Ham,88,0.81,0.22,38.0%,10.0%,1.05,8.46
160,Harry Wilson,fulham,RW,Nott'm Forest,81,0.44,0.32,30.0%,1.0%,1.75,7.56
333,Morgan Rogers,aston_villa,LW,Man Utd,89,0.46,0.31,36.0%,2.0%,1.77,7.44
328,Mohamed Salah,liverpool,"RW,FW",Spurs,88,0.29,0.84,36.0%,0.0%,0.88,7.29
379,Ra√∫l Jim√©nez,fulham,FW,Nott'm Forest,83,0.48,0.21,30.0%,1.0%,0.2,7.22


In [4]:

test_df.loc[test_df['player_name']=="Erling Haaland"][['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']]

Unnamed: 0,player_name,team,position,opponent,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
124,Erling Haaland,manchester_city,FW,West Ham,88.0,1.146193,0.269792,0.382814,0.003748,2.885884,10.287527


In [5]:

test_df.loc[test_df['player_name'].str.contains("nzo")][['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']]

Unnamed: 0,player_name,team,position,opponent,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
122,Enzo Fern√°ndez,chelsea,"DM,AM",Newcastle,88.0,0.380463,0.185628,0.234034,0.166711,0.082674,5.109328
123,Enzo Le F√©e,sunderland,AM,Brighton,88.0,0.200662,0.088197,0.272104,0.225092,0.04092,4.031111


In [6]:

test_df.loc[test_df['player_name']=="Harry Wilson"][['player_name', 'team', 'position', 'opponent', 'pred_minutes',
        'pred_exp_goals', 'pred_exp_assists', 'pred_cs_prob', 
        'pred_defcon_prob', 'pred_bonus', 'exp_total_pts']]

Unnamed: 0,player_name,team,position,opponent,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
160,Harry Wilson,fulham,RW,Nott'm Forest,81.29879,0.48404,0.353103,0.302973,0.013914,1.75015,7.560459


In [7]:
# STEP 4: DETAILED TOP 10
print(f'DETAILED PREDICTIONS - TOP 10 FOR GW{TARGET_GW}')
print('=' * 100)

for i, (_, row) in enumerate(test_df.nlargest(10, 'exp_total_pts').iterrows()):
    home_away = 'H' if row.get('is_home', 0) == 1 else 'A'
    print(f'\n{i+1}. {row["player_name"]} ({row["team"]}) {home_away} vs {row["opponent"]}')
    print(f'   Position: {row["position"]} | Predicted Minutes: {row["pred_minutes"]:.0f}')
    print(f'   Goals:   xG={row["pred_exp_goals"]:.2f} -> {row["exp_goals_pts"]:.2f} pts')
    print(f'   Assists: xA={row["pred_exp_assists"]:.2f} -> {row["exp_assists_pts"]:.2f} pts')
    print(f'   CS: {row["pred_cs_prob"]*100:.0f}% -> {row["exp_cs_pts"]:.2f} pts')
    print(f'   Defcon: {row["pred_defcon_prob"]*100:.0f}% -> {row["exp_defcon_pts"]:.2f} pts')
    print(f'   Bonus: {row.get("pred_bonus", 0):.2f} pts')
    print(f'   Appearance: {row["exp_appearance_pts"]:.0f} pts')
    print(f'   TOTAL: {row["exp_total_pts"]:.2f} pts')


DETAILED PREDICTIONS - TOP 10 FOR GW17

1. Bruno Fernandes (manchester_united) A vs Aston Villa
   Position: CM | Predicted Minutes: 89
   Goals:   xG=0.53 -> 2.66 pts
   Assists: xA=0.57 -> 1.72 pts
   CS: 13% -> 0.13 pts
   Defcon: 48% -> 0.97 pts
   Bonus: 2.89 pts
   Appearance: 2 pts
   TOTAL: 10.37 pts

2. Erling Haaland (manchester_city) H vs West Ham
   Position: FW | Predicted Minutes: 88
   Goals:   xG=1.15 -> 4.58 pts
   Assists: xA=0.27 -> 0.81 pts
   CS: 38% -> 0.00 pts
   Defcon: 0% -> 0.01 pts
   Bonus: 2.89 pts
   Appearance: 2 pts
   TOTAL: 10.29 pts

3. Bukayo Saka (arsenal) A vs Everton
   Position: RW | Predicted Minutes: 82
   Goals:   xG=0.42 -> 2.12 pts
   Assists: xA=0.53 -> 1.59 pts
   CS: 46% -> 0.46 pts
   Defcon: 22% -> 0.45 pts
   Bonus: 2.84 pts
   Appearance: 2 pts
   TOTAL: 9.45 pts

4. Hugo Ekitike (liverpool) A vs Spurs
   Position: FW | Predicted Minutes: 76
   Goals:   xG=0.90 -> 3.62 pts
   Assists: xA=0.17 -> 0.52 pts
   CS: 36% -> 0.00 pts
   Defc

In [8]:
# STEP 5: BREAKDOWN BY POSITION
print(f'\nTOP PLAYERS BY POSITION - GW{TARGET_GW}')
print('=' * 80)

def get_fpl_pos(pos):
    if pd.isna(pos): return 'MID'
    pos = str(pos).upper()
    if 'GK' in pos: return 'GK'
    elif any(p in pos for p in ['CB', 'LB', 'RB', 'WB', 'DF']): return 'DEF'
    elif any(p in pos for p in ['FW', 'CF', 'ST', 'LW', 'RW']): return 'FWD'
    return 'MID'

test_df['fpl_pos'] = test_df['position'].apply(get_fpl_pos)

for pos in ['GK', 'DEF', 'MID', 'FWD']:
    print(f'\n--- {pos} ---')
    pos_df = test_df[test_df['fpl_pos'] == pos].nlargest(5, 'exp_total_pts')
    for _, row in pos_df.iterrows():
        print(f"  {row['player_name']:20s} ({row['team']:15s}) vs {row['opponent']:15s} -> {row['exp_total_pts']:.2f} pts")



TOP PLAYERS BY POSITION - GW17

--- GK ---
  Robin Roefs          (sunderland     ) vs Brighton        -> 5.09 pts
  David Raya           (arsenal        ) vs Everton         -> 3.87 pts
  Nick Pope            (newcastle_united) vs Chelsea         -> 3.82 pts
  James Trafford       (manchester_city) vs West Ham        -> 3.78 pts
  Marco Bizot          (aston_villa    ) vs Man Utd         -> 3.77 pts

--- DEF ---
  Matheus Nunes        (manchester_city) vs West Ham        -> 6.73 pts
  Reinildo Mandava     (sunderland     ) vs Brighton        -> 6.47 pts
  Gabriel Magalh√£es    (arsenal        ) vs Everton         -> 6.42 pts
  Lewis Hall           (newcastle_united) vs Chelsea         -> 5.99 pts
  Daniel Mu√±oz         (crystal_palace ) vs Leeds           -> 5.89 pts

--- MID ---
  Bruno Fernandes      (manchester_united) vs Aston Villa     -> 10.37 pts
  Phil Foden           (manchester_city) vs West Ham        -> 8.46 pts
  Mateus Fernandes     (west_ham_united) vs Man City       

In [9]:
# STEP 6: SAVE PREDICTIONS
from pathlib import Path

output_dir = Path('../data/predictions')
output_dir.mkdir(exist_ok=True)

output_path = output_dir / f'gw{TARGET_GW}_{TARGET_SEASON}_predictions.csv'
test_df.to_csv(output_path, index=False)

print(f'Saved predictions to: {output_path}')
print(f'Total players: {len(test_df)}')


Saved predictions to: ..\data\predictions\gw17_2025-26_predictions.csv
Total players: 468


In [10]:
# Show Arsenal's CS prediction input features for GW16
from models.clean_sheet_model import CleanSheetModel

features = CleanSheetModel.FEATURES

# Find Arsenal in test_df
arsenal = test_df[test_df['team'].str.contains('manchester_united', case=False, na=False)].iloc[0]

print(f"fulham vs {arsenal['opponent']} - CS Prob: {arsenal['pred_cs_prob']:.1%}")
print(f"\n{'='*60}")
print("CS MODEL INPUT FEATURES:")
print(f"{'='*60}")

for feat in features:
    if feat in arsenal.index:
        val = arsenal[feat]
        print(f"  {feat}: {val}")
    else:
        print(f"  {feat}: NOT IN DATA")

fulham vs Aston Villa - CS Prob: 13.0%

CS MODEL INPUT FEATURES:
  team_goals_conceded_last1: 4.0
  team_goals_conceded_roll3: 2.0
  team_goals_conceded_roll5: 1.6
  team_goals_conceded_roll10: 1.4
  team_goals_conceded_roll30: 1.4333333333333333
  team_xga_roll5: 0.8400000000000001
  team_clean_sheets_roll5: 0.0
  team_clean_sheets_roll10: 1.0
  team_clean_sheets_roll30: 4.0
  team_xga_roll5_home: 0.5599999999999999
  team_xga_roll5_away: 0.8400000000000001
  opp_goals_scored_last1: 2.0
  opp_goals_scored_roll3: 2.6666666666666665
  opp_goals_scored_roll5: 2.2
  opp_goals_scored_roll10: 2.2
  opp_goals_scored_roll30: 2.2
  opp_xg_roll5: NOT IN DATA
  opp_xg_roll5_home: 1.56
  opp_xg_roll5_away: 1.44
  xga_xg_ratio: 0.5060240963855421
  defensive_advantage: 0.72
  is_home: 0
  team_encoded: 16
  opponent_encoded: 1


In [11]:
# Show Arsenal's CS prediction input features for GW16
from models.clean_sheet_model import CleanSheetModel

features = CleanSheetModel.FEATURES

# Find Arsenal in test_df
arsenal = test_df[test_df['team'].str.contains('arsenal', case=False, na=False)].iloc[0]

print(f"Arsenal vs {arsenal['opponent']} - CS Prob: {arsenal['pred_cs_prob']:.1%}")
print(f"\n{'='*60}")
print("CS MODEL INPUT FEATURES:")
print(f"{'='*60}")

for feat in features:
    if feat in arsenal.index:
        val = arsenal[feat]
        print(f"  {feat}: {val}")
    else:
        print(f"  {feat}: NOT IN DATA")

Arsenal vs Everton - CS Prob: 45.5%

CS MODEL INPUT FEATURES:
  team_goals_conceded_last1: 1.0
  team_goals_conceded_roll3: 1.0
  team_goals_conceded_roll5: 1.0
  team_goals_conceded_roll10: 0.7
  team_goals_conceded_roll30: 0.7333333333333333
  team_xga_roll5: 0.72
  team_clean_sheets_roll5: 1.0
  team_clean_sheets_roll10: 5.0
  team_clean_sheets_roll30: 13.0
  team_xga_roll5_home: 0.42000000000000004
  team_xga_roll5_away: 0.72
  opp_goals_scored_last1: 0.0
  opp_goals_scored_roll3: 1.0
  opp_goals_scored_roll5: 1.0
  opp_goals_scored_roll10: 1.0
  opp_goals_scored_roll30: 1.0
  opp_xg_roll5: NOT IN DATA
  opp_xg_roll5_home: 0.9600000000000002
  opp_xg_roll5_away: 1.3
  xga_xg_ratio: 0.6792452830188678
  defensive_advantage: 0.2400000000000002
  is_home: 0
  team_encoded: 0
  opponent_encoded: 8


# FPL Points Predictor

Uses predicted probabilities to calculate expected FPL points:
- P(Goal) √ó Goal Points (by position)
- P(Assist) √ó 3
- P(Clean Sheet) √ó CS Points (by position)
- P(Defcon) √ó 2 (hitting defensive action threshold)
- Predicted Bonus Points (from BonusModel)
- Expected Minutes ‚Üí Appearance Points

## FPL Scoring Rules
| Action | GK | DEF | MID | FWD |
|--------|----|----|-----|-----|
| Goal | 6 | 6 | 5 | 4 |
| Assist | 3 | 3 | 3 | 3 |
| Clean Sheet | 4 | 4 | 1 | 0 |
| Defcon (10+ DEF / 12+ MID,FWD) | 0 | 2 | 2 | 2 |
| 60+ mins | 2 | 2 | 2 | 2 |
| 1-59 mins | 1 | 1 | 1 | 1 |
| Bonus | 1-3 | 1-3 | 1-3 | 1-3 |
