In [1]:
# =============================================================================
# STEP 0: Force reload modules (run this first after code changes!)
# =============================================================================
import importlib
import src.data_loader
import src.features
import src.pipeline
import src.models.goals
import src.models.base


In [2]:
# =============================================================================
# STEP 1: Update Data (optional - only if you need fresh gameweek data)
# =============================================================================
!python scrape_update_data.py --gameweek 24
# !python scrape_update_data.py --auto

FotMob Incremental Data Updater

Season: 2025/2026
Fetching fixture list...
Found 240 completed matches in 2025/2026
Checking existing data in: player_stats_8seasons_20260204_215756.csv
Found 2897 existing match IDs in data
Filtering to gameweeks [24]: 10 matches
  Skipping 10 matches already in data
  0 new matches to scrape
No matches to scrape!


In [3]:
# =============================================================================
# STEP 2: Run the Pipeline
# =============================================================================
from src.pipeline import FPLPipeline

pipeline = FPLPipeline('data')
pipeline.load_data()
pipeline.compute_features()

pipeline.tune(n_iter=100, use_subprocess=True)
pipeline.train()
predictions = pipeline.predict(gameweek=25, season='2025/2026')

LOADING DATA
Loading player stats from: player_stats_8seasons_20260204_215756.csv
  Loaded 81,935 player-match records
  Removed 302 duplicate rows
  Seasons: ['2018/2019', '2019/2020', '2020/2021', '2021/2022', '2022/2023', '2023/2024', '2024/2025', '2025/2026']
Loaded 2,901 fixtures
  Removed 240 duplicate fixtures
Filtered to seasons: ['2020/2021', '2021/2022', '2022/2023', '2023/2024', '2024/2025', '2025/2026']
Current season (2025/2026): 501 active players
Final dataset: 61,662 records

COMPUTING FEATURES
Computing rolling features...
  Computed 84 rolling/lifetime features

TUNING HYPERPARAMETERS WITH HOLDOUT TEST SET

Data split (temporal):
  Train: 49,320 samples
  Test:  12,329 samples (most recent 20%)
  Test set spans: ['2024/2025', '2025/2026']

------------------------------------------------------------
PHASE 1: Hyperparameter Tuning (5-fold CV on train set)
------------------------------------------------------------

Tuning GOALS (100 trials, 5-fold CV, Poisson Deviance

  from .autonotebook import tqdm as notebook_tqdm
[I 2026-02-04 22:17:44,704] A new study created in memory with name: no-name-36c85113-648a-4302-addc-dc9880aeead8


  Best CV Poisson Deviance: 8.3100
  Params: {'n_estimators': 183, 'max_depth': 3, 'learning_rate': 0.028875092188713345, 'min_child_weight': 1}

Tuning CLEAN_SHEET (100 trials, 5-fold CV, Brier Score)...
  Team-matches: 3427, CS rate: 25.4%


Best trial: 0. Best value: 0.208133:   1%|          | 1/100 [00:01<02:35,  1.57s/it]

[I 2026-02-04 22:17:46,280] Trial 0 finished with value: 0.20813265687366972 and parameters: {'n_estimators': 144, 'max_depth': 8, 'learning_rate': 0.1205712628744377, 'min_child_weight': 6}. Best is trial 0 with value: 0.20813265687366972.


Best trial: 1. Best value: 0.179699:   2%|▏         | 2/100 [00:02<01:44,  1.06s/it]

[I 2026-02-04 22:17:46,989] Trial 1 finished with value: 0.17969927735311889 and parameters: {'n_estimators': 89, 'max_depth': 3, 'learning_rate': 0.012184186502221764, 'min_child_weight': 9}. Best is trial 1 with value: 0.17969927735311889.


Best trial: 1. Best value: 0.179699:   3%|▎         | 3/100 [00:03<01:31,  1.06it/s]

[I 2026-02-04 22:17:47,795] Trial 2 finished with value: 0.18288652372739705 and parameters: {'n_estimators': 200, 'max_depth': 7, 'learning_rate': 0.010725209743171996, 'min_child_weight': 10}. Best is trial 1 with value: 0.17969927735311889.


Best trial: 1. Best value: 0.179699:   4%|▍         | 4/100 [00:03<01:25,  1.12it/s]

[I 2026-02-04 22:17:48,603] Trial 3 finished with value: 0.18359504743814997 and parameters: {'n_estimators': 258, 'max_depth': 4, 'learning_rate': 0.01855998084649059, 'min_child_weight': 2}. Best is trial 1 with value: 0.17969927735311889.


Best trial: 1. Best value: 0.179699:   5%|▌         | 5/100 [00:04<01:22,  1.15it/s]

[I 2026-02-04 22:17:49,422] Trial 4 finished with value: 0.1900284443297048 and parameters: {'n_estimators': 126, 'max_depth': 6, 'learning_rate': 0.04345454109729477, 'min_child_weight': 3}. Best is trial 1 with value: 0.17969927735311889.


Best trial: 1. Best value: 0.179699:   7%|▋         | 7/100 [00:05<00:53,  1.74it/s]

[I 2026-02-04 22:17:50,102] Trial 5 finished with value: 0.1818549448738925 and parameters: {'n_estimators': 203, 'max_depth': 3, 'learning_rate': 0.027010527749605478, 'min_child_weight': 4}. Best is trial 1 with value: 0.17969927735311889.
[I 2026-02-04 22:17:50,208] Trial 6 finished with value: 0.18645218416153003 and parameters: {'n_estimators': 164, 'max_depth': 7, 'learning_rate': 0.019721610970574007, 'min_child_weight': 6}. Best is trial 1 with value: 0.17969927735311889.
[I 2026-02-04 22:17:50,263] Trial 7 finished with value: 0.18826825317105314 and parameters: {'n_estimators': 198, 'max_depth': 3, 'learning_rate': 0.07896186801026692, 'min_child_weight': 2}. Best is trial 1 with value: 0.17969927735311889.


Best trial: 1. Best value: 0.179699:  12%|█▏        | 12/100 [00:05<00:16,  5.20it/s]

[I 2026-02-04 22:17:50,301] Trial 8 finished with value: 0.20876956492225024 and parameters: {'n_estimators': 66, 'max_depth': 8, 'learning_rate': 0.26690431824362526, 'min_child_weight': 9}. Best is trial 1 with value: 0.17969927735311889.
[I 2026-02-04 22:17:50,339] Trial 9 finished with value: 0.18628533956965318 and parameters: {'n_estimators': 126, 'max_depth': 3, 'learning_rate': 0.1024932221692416, 'min_child_weight': 5}. Best is trial 1 with value: 0.17969927735311889.
[I 2026-02-04 22:17:50,380] Trial 10 finished with value: 0.1819918656609305 and parameters: {'n_estimators': 55, 'max_depth': 5, 'learning_rate': 0.010260022325256595, 'min_child_weight': 8}. Best is trial 1 with value: 0.17969927735311889.
[I 2026-02-04 22:17:50,461] Trial 11 finished with value: 0.18677287660870318 and parameters: {'n_estimators': 257, 'max_depth': 4, 'learning_rate': 0.030141745644539943, 'min_child_weight': 4}. Best is trial 1 with value: 0.17969927735311889.


Best trial: 16. Best value: 0.178491:  16%|█▌        | 16/100 [00:05<00:10,  7.88it/s]

[I 2026-02-04 22:17:50,500] Trial 12 finished with value: 0.17996330383502554 and parameters: {'n_estimators': 97, 'max_depth': 4, 'learning_rate': 0.022910397190415495, 'min_child_weight': 7}. Best is trial 1 with value: 0.17969927735311889.
[I 2026-02-04 22:17:50,540] Trial 13 finished with value: 0.17956736298487425 and parameters: {'n_estimators': 91, 'max_depth': 4, 'learning_rate': 0.016184756546235152, 'min_child_weight': 8}. Best is trial 13 with value: 0.17956736298487425.
[I 2026-02-04 22:17:50,585] Trial 14 finished with value: 0.18011966991227296 and parameters: {'n_estimators': 90, 'max_depth': 5, 'learning_rate': 0.01405713166590307, 'min_child_weight': 10}. Best is trial 13 with value: 0.17956736298487425.
[I 2026-02-04 22:17:50,625] Trial 15 finished with value: 0.1822943906223511 and parameters: {'n_estimators': 90, 'max_depth': 4, 'learning_rate': 0.043047222355872466, 'min_child_weight': 8}. Best is trial 13 with value: 0.17956736298487425.
[I 2026-02-04 22:17:50,667

Best trial: 16. Best value: 0.178491:  21%|██        | 21/100 [00:06<00:06, 11.65it/s]

[I 2026-02-04 22:17:50,767] Trial 17 finished with value: 0.19098757463637983 and parameters: {'n_estimators': 292, 'max_depth': 5, 'learning_rate': 0.03486522047050463, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:50,817] Trial 18 finished with value: 0.18616581889406902 and parameters: {'n_estimators': 125, 'max_depth': 4, 'learning_rate': 0.06245498476075221, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:50,864] Trial 19 finished with value: 0.17897548970764893 and parameters: {'n_estimators': 166, 'max_depth': 3, 'learning_rate': 0.016034055878559182, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:50,940] Trial 20 finished with value: 0.21596962701747882 and parameters: {'n_estimators': 171, 'max_depth': 6, 'learning_rate': 0.18188215906458088, 'min_child_weight': 5}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  24%|██▍       | 24/100 [00:06<00:05, 13.77it/s]

[I 2026-02-04 22:17:50,983] Trial 21 finished with value: 0.17920713840603436 and parameters: {'n_estimators': 147, 'max_depth': 3, 'learning_rate': 0.019293369292093035, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,025] Trial 22 finished with value: 0.17866264107954 and parameters: {'n_estimators': 149, 'max_depth': 3, 'learning_rate': 0.015651027835738567, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,078] Trial 23 finished with value: 0.17903185000837488 and parameters: {'n_estimators': 184, 'max_depth': 3, 'learning_rate': 0.014530404746275356, 'min_child_weight': 6}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,122] Trial 24 finished with value: 0.1799970314847577 and parameters: {'n_estimators': 150, 'max_depth': 3, 'learning_rate': 0.024175074355191933, 'min_child_weight': 9}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  28%|██▊       | 28/100 [00:06<00:04, 15.58it/s]

[I 2026-02-04 22:17:51,182] Trial 25 finished with value: 0.18295136325134917 and parameters: {'n_estimators': 224, 'max_depth': 3, 'learning_rate': 0.033836536873135634, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,226] Trial 26 finished with value: 0.17959563985590446 and parameters: {'n_estimators': 112, 'max_depth': 4, 'learning_rate': 0.013876476775159208, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,292] Trial 27 finished with value: 0.1891967388543604 and parameters: {'n_estimators': 159, 'max_depth': 5, 'learning_rate': 0.051776354010346234, 'min_child_weight': 9}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  32%|███▏      | 32/100 [00:06<00:04, 16.43it/s]

[I 2026-02-04 22:17:51,356] Trial 28 finished with value: 0.18079659941963783 and parameters: {'n_estimators': 220, 'max_depth': 3, 'learning_rate': 0.02244436240994719, 'min_child_weight': 1}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,408] Trial 29 finished with value: 0.1801624673674241 and parameters: {'n_estimators': 138, 'max_depth': 4, 'learning_rate': 0.01685265116057537, 'min_child_weight': 6}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,467] Trial 30 finished with value: 0.1787059507434213 and parameters: {'n_estimators': 181, 'max_depth': 3, 'learning_rate': 0.010425658055299665, 'min_child_weight': 5}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,520] Trial 31 finished with value: 0.1787297176679487 and parameters: {'n_estimators': 178, 'max_depth': 3, 'learning_rate': 0.010276734043698813, 'min_child_weight': 5}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  36%|███▌      | 36/100 [00:07<00:03, 17.63it/s]

[I 2026-02-04 22:17:51,570] Trial 32 finished with value: 0.17866337605260738 and parameters: {'n_estimators': 190, 'max_depth': 3, 'learning_rate': 0.011873534911199633, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,629] Trial 33 finished with value: 0.17926577902618354 and parameters: {'n_estimators': 225, 'max_depth': 3, 'learning_rate': 0.01305293342027598, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,692] Trial 34 finished with value: 0.18009429690672185 and parameters: {'n_estimators': 187, 'max_depth': 4, 'learning_rate': 0.011349272067579389, 'min_child_weight': 3}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,731] Trial 35 finished with value: 0.1791876468486653 and parameters: {'n_estimators': 112, 'max_depth': 3, 'learning_rate': 0.012155952260366044, 'min_child_weight': 3}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  38%|███▊      | 38/100 [00:07<00:04, 15.36it/s]

[I 2026-02-04 22:17:51,798] Trial 36 finished with value: 0.1804722907371888 and parameters: {'n_estimators': 209, 'max_depth': 4, 'learning_rate': 0.012395158107006875, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:51,904] Trial 37 finished with value: 0.1869117090815528 and parameters: {'n_estimators': 239, 'max_depth': 6, 'learning_rate': 0.01963881511809124, 'min_child_weight': 5}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  41%|████      | 41/100 [00:07<00:04, 14.48it/s]

[I 2026-02-04 22:17:52,004] Trial 38 finished with value: 0.1896117377387369 and parameters: {'n_estimators': 140, 'max_depth': 8, 'learning_rate': 0.027204646266317148, 'min_child_weight': 6}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,060] Trial 39 finished with value: 0.17972689728572716 and parameters: {'n_estimators': 198, 'max_depth': 3, 'learning_rate': 0.017356134516042312, 'min_child_weight': 3}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,104] Trial 40 finished with value: 0.17854516673136686 and parameters: {'n_estimators': 156, 'max_depth': 3, 'learning_rate': 0.011563323494606426, 'min_child_weight': 10}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,149] Trial 41 finished with value: 0.17874227854280053 and parameters: {'n_estimators': 156, 'max_depth': 3, 'learning_rate': 0.01016689761287984, 'min_child_weight': 10}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  45%|████▌     | 45/100 [00:07<00:03, 16.08it/s]

[I 2026-02-04 22:17:52,237] Trial 42 finished with value: 0.182618379292299 and parameters: {'n_estimators': 133, 'max_depth': 7, 'learning_rate': 0.012592313400844731, 'min_child_weight': 9}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,292] Trial 43 finished with value: 0.17889681821284437 and parameters: {'n_estimators': 184, 'max_depth': 3, 'learning_rate': 0.015234144566943782, 'min_child_weight': 10}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,345] Trial 44 finished with value: 0.17969657139050002 and parameters: {'n_estimators': 113, 'max_depth': 4, 'learning_rate': 0.011459738747401116, 'min_child_weight': 5}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,381] Trial 45 finished with value: 0.17889506415821116 and parameters: {'n_estimators': 75, 'max_depth': 3, 'learning_rate': 0.019711835893998803, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  50%|█████     | 50/100 [00:07<00:02, 17.31it/s]

[I 2026-02-04 22:17:52,434] Trial 46 finished with value: 0.18984192660816826 and parameters: {'n_estimators': 175, 'max_depth': 3, 'learning_rate': 0.1165890300736711, 'min_child_weight': 6}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,498] Trial 47 finished with value: 0.17985864978696017 and parameters: {'n_estimators': 192, 'max_depth': 4, 'learning_rate': 0.010197874043504583, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,557] Trial 48 finished with value: 0.18079166488131262 and parameters: {'n_estimators': 209, 'max_depth': 3, 'learning_rate': 0.022649243478567175, 'min_child_weight': 9}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,610] Trial 49 finished with value: 0.1800731256847866 and parameters: {'n_estimators': 155, 'max_depth': 4, 'learning_rate': 0.01490946283364872, 'min_child_weight': 10}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  53%|█████▎    | 53/100 [00:08<00:02, 17.57it/s]

[I 2026-02-04 22:17:52,673] Trial 50 finished with value: 0.21881879315604796 and parameters: {'n_estimators': 124, 'max_depth': 5, 'learning_rate': 0.2852799858240089, 'min_child_weight': 2}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,722] Trial 51 finished with value: 0.17874805716110176 and parameters: {'n_estimators': 174, 'max_depth': 3, 'learning_rate': 0.010285295058752371, 'min_child_weight': 5}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,771] Trial 52 finished with value: 0.17866219029018277 and parameters: {'n_estimators': 164, 'max_depth': 3, 'learning_rate': 0.01183252693586247, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,819] Trial 53 finished with value: 0.17861991339971822 and parameters: {'n_estimators': 161, 'max_depth': 3, 'learning_rate': 0.012859987800827288, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  57%|█████▋    | 57/100 [00:08<00:02, 17.26it/s]

[I 2026-02-04 22:17:52,867] Trial 54 finished with value: 0.17861891460382423 and parameters: {'n_estimators': 162, 'max_depth': 3, 'learning_rate': 0.013438059127194462, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:52,947] Trial 55 finished with value: 0.1810024176003882 and parameters: {'n_estimators': 163, 'max_depth': 4, 'learning_rate': 0.017434586098628316, 'min_child_weight': 3}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,004] Trial 56 finished with value: 0.17849380618122354 and parameters: {'n_estimators': 147, 'max_depth': 3, 'learning_rate': 0.01364993838406383, 'min_child_weight': 2}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  60%|██████    | 60/100 [00:08<00:02, 18.55it/s]

[I 2026-02-04 22:17:53,050] Trial 57 finished with value: 0.1790622167124693 and parameters: {'n_estimators': 102, 'max_depth': 3, 'learning_rate': 0.013613394027830749, 'min_child_weight': 2}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,098] Trial 58 finished with value: 0.2029249519449225 and parameters: {'n_estimators': 131, 'max_depth': 4, 'learning_rate': 0.17946427191489572, 'min_child_weight': 2}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,147] Trial 59 finished with value: 0.1799037457369566 and parameters: {'n_estimators': 144, 'max_depth': 3, 'learning_rate': 0.026034740880938823, 'min_child_weight': 1}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,197] Trial 60 finished with value: 0.186172886477528 and parameters: {'n_estimators': 164, 'max_depth': 3, 'learning_rate': 0.07711383150924575, 'min_child_weight': 4}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  64%|██████▍   | 64/100 [00:08<00:01, 19.21it/s]

[I 2026-02-04 22:17:53,243] Trial 61 finished with value: 0.17856436385834282 and parameters: {'n_estimators': 150, 'max_depth': 3, 'learning_rate': 0.015311366841751535, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,292] Trial 62 finished with value: 0.17857975625368494 and parameters: {'n_estimators': 167, 'max_depth': 3, 'learning_rate': 0.013655543701485015, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,334] Trial 63 finished with value: 0.17879603922963322 and parameters: {'n_estimators': 122, 'max_depth': 3, 'learning_rate': 0.020916092131430707, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,378] Trial 64 finished with value: 0.17890994215653389 and parameters: {'n_estimators': 150, 'max_depth': 3, 'learning_rate': 0.018216941321126078, 'min_child_weight': 9}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 16. Best value: 0.178491:  67%|██████▋   | 67/100 [00:08<00:01, 20.18it/s]

[I 2026-02-04 22:17:53,425] Trial 65 finished with value: 0.1787134715683008 and parameters: {'n_estimators': 169, 'max_depth': 3, 'learning_rate': 0.014242815948795918, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,486] Trial 66 finished with value: 0.1799745115749366 and parameters: {'n_estimators': 136, 'max_depth': 4, 'learning_rate': 0.016327271242792544, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,539] Trial 67 finished with value: 0.17857224149766285 and parameters: {'n_estimators': 155, 'max_depth': 3, 'learning_rate': 0.013329869598767119, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.


Best trial: 71. Best value: 0.178479:  72%|███████▏  | 72/100 [00:09<00:01, 19.19it/s]

[I 2026-02-04 22:17:53,631] Trial 68 finished with value: 0.18470615221349254 and parameters: {'n_estimators': 143, 'max_depth': 7, 'learning_rate': 0.015649350188512656, 'min_child_weight': 7}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,684] Trial 69 finished with value: 0.18524200495595572 and parameters: {'n_estimators': 151, 'max_depth': 4, 'learning_rate': 0.042684940820683616, 'min_child_weight': 9}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,719] Trial 70 finished with value: 0.1790732062749515 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.0135434185792889, 'min_child_weight': 8}. Best is trial 16 with value: 0.17849130612298497.
[I 2026-02-04 22:17:53,765] Trial 71 finished with value: 0.1784787518906916 and parameters: {'n_estimators': 158, 'max_depth': 3, 'learning_rate': 0.012780208497107332, 'min_child_weight': 8}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:53,80

Best trial: 71. Best value: 0.178479:  76%|███████▌  | 76/100 [00:09<00:01, 19.25it/s]

[I 2026-02-04 22:17:53,849] Trial 73 finished with value: 0.1789519729695736 and parameters: {'n_estimators': 132, 'max_depth': 3, 'learning_rate': 0.01109780533140559, 'min_child_weight': 8}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:53,919] Trial 74 finished with value: 0.18129563138813587 and parameters: {'n_estimators': 297, 'max_depth': 3, 'learning_rate': 0.017857886375366446, 'min_child_weight': 7}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:53,968] Trial 75 finished with value: 0.17982710400172913 and parameters: {'n_estimators': 169, 'max_depth': 3, 'learning_rate': 0.021149371519342007, 'min_child_weight': 9}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:54,008] Trial 76 finished with value: 0.17850258205640668 and parameters: {'n_estimators': 142, 'max_depth': 3, 'learning_rate': 0.01291832451972807, 'min_child_weight': 8}. Best is trial 71 with value: 0.1784787518906916.


Best trial: 71. Best value: 0.178479:  81%|████████  | 81/100 [00:09<00:00, 20.63it/s]

[I 2026-02-04 22:17:54,057] Trial 77 finished with value: 0.17861312936841817 and parameters: {'n_estimators': 155, 'max_depth': 3, 'learning_rate': 0.011088115912978709, 'min_child_weight': 8}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:54,098] Trial 78 finished with value: 0.17857502816395313 and parameters: {'n_estimators': 142, 'max_depth': 3, 'learning_rate': 0.012350103090991023, 'min_child_weight': 9}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:54,150] Trial 79 finished with value: 0.18007796319284078 and parameters: {'n_estimators': 144, 'max_depth': 4, 'learning_rate': 0.016222047417803358, 'min_child_weight': 9}. Best is trial 71 with value: 0.1784787518906916.
[I 2026-02-04 22:17:54,192] Trial 80 finished with value: 0.1787612450970418 and parameters: {'n_estimators': 129, 'max_depth': 3, 'learning_rate': 0.012328663334895629, 'min_child_weight': 10}. Best is trial 71 with value: 0.1784787518906916.


Best trial: 81. Best value: 0.178452:  85%|████████▌ | 85/100 [00:09<00:00, 21.30it/s]

[I 2026-02-04 22:17:54,235] Trial 81 finished with value: 0.17845193039698923 and parameters: {'n_estimators': 139, 'max_depth': 3, 'learning_rate': 0.014714535186228915, 'min_child_weight': 8}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,281] Trial 82 finished with value: 0.17877059863327957 and parameters: {'n_estimators': 137, 'max_depth': 3, 'learning_rate': 0.011401050534475326, 'min_child_weight': 9}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,319] Trial 83 finished with value: 0.17849019026410345 and parameters: {'n_estimators': 118, 'max_depth': 3, 'learning_rate': 0.01866898266417978, 'min_child_weight': 8}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,361] Trial 84 finished with value: 0.17859919980999805 and parameters: {'n_estimators': 115, 'max_depth': 3, 'learning_rate': 0.014886734715997997, 'min_child_weight': 8}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:5

Best trial: 81. Best value: 0.178452:  90%|█████████ | 90/100 [00:09<00:00, 21.09it/s]

[I 2026-02-04 22:17:54,457] Trial 86 finished with value: 0.17985532161414142 and parameters: {'n_estimators': 103, 'max_depth': 4, 'learning_rate': 0.01791504072910562, 'min_child_weight': 8}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,504] Trial 87 finished with value: 0.17972960721384934 and parameters: {'n_estimators': 149, 'max_depth': 3, 'learning_rate': 0.02461230623340211, 'min_child_weight': 7}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,543] Trial 88 finished with value: 0.17879830627200413 and parameters: {'n_estimators': 107, 'max_depth': 3, 'learning_rate': 0.016372046511473358, 'min_child_weight': 6}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,607] Trial 89 finished with value: 0.18515351160470434 and parameters: {'n_estimators': 128, 'max_depth': 6, 'learning_rate': 0.028491219594101825, 'min_child_weight': 8}. Best is trial 81 with value: 0.17845193039698923.


Best trial: 92. Best value: 0.178409:  94%|█████████▍| 94/100 [00:10<00:00, 21.59it/s]

[I 2026-02-04 22:17:54,646] Trial 90 finished with value: 0.17959747645954657 and parameters: {'n_estimators': 94, 'max_depth': 3, 'learning_rate': 0.03426875648365224, 'min_child_weight': 7}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,692] Trial 91 finished with value: 0.1785468223004112 and parameters: {'n_estimators': 142, 'max_depth': 3, 'learning_rate': 0.012826834489056546, 'min_child_weight': 9}. Best is trial 81 with value: 0.17845193039698923.
[I 2026-02-04 22:17:54,737] Trial 92 finished with value: 0.17840934165885142 and parameters: {'n_estimators': 157, 'max_depth': 3, 'learning_rate': 0.012924553719482078, 'min_child_weight': 9}. Best is trial 92 with value: 0.17840934165885142.
[I 2026-02-04 22:17:54,779] Trial 93 finished with value: 0.1787936711265105 and parameters: {'n_estimators': 137, 'max_depth': 3, 'learning_rate': 0.010995611714426657, 'min_child_weight': 10}. Best is trial 92 with value: 0.17840934165885142.
[I 2026-02-04 22:17:54,

Best trial: 92. Best value: 0.178409:  99%|█████████▉| 99/100 [00:10<00:00, 21.91it/s]

[I 2026-02-04 22:17:54,859] Trial 95 finished with value: 0.18010358525208317 and parameters: {'n_estimators': 82, 'max_depth': 3, 'learning_rate': 0.01182183701484051, 'min_child_weight': 10}. Best is trial 92 with value: 0.17840934165885142.
[I 2026-02-04 22:17:54,904] Trial 96 finished with value: 0.17844392081084512 and parameters: {'n_estimators': 158, 'max_depth': 3, 'learning_rate': 0.014233669591774696, 'min_child_weight': 9}. Best is trial 92 with value: 0.17840934165885142.
[I 2026-02-04 22:17:54,956] Trial 97 finished with value: 0.17977928355476874 and parameters: {'n_estimators': 175, 'max_depth': 3, 'learning_rate': 0.020738055958189876, 'min_child_weight': 10}. Best is trial 92 with value: 0.17840934165885142.
[I 2026-02-04 22:17:55,003] Trial 98 finished with value: 0.17848476720750375 and parameters: {'n_estimators': 157, 'max_depth': 3, 'learning_rate': 0.013930162777619907, 'min_child_weight': 9}. Best is trial 92 with value: 0.17840934165885142.


Best trial: 92. Best value: 0.178409: 100%|██████████| 100/100 [00:10<00:00,  9.62it/s]


[I 2026-02-04 22:17:55,098] Trial 99 finished with value: 0.18424577394280234 and parameters: {'n_estimators': 274, 'max_depth': 5, 'learning_rate': 0.014560742640534724, 'min_child_weight': 9}. Best is trial 92 with value: 0.17840934165885142.
  Best CV Brier Score: 0.1784
  Params: {'n_estimators': 157, 'max_depth': 3, 'learning_rate': 0.012924553719482078, 'min_child_weight': 9}

------------------------------------------------------------
PHASE 2: Evaluation on Held-Out Test Set
------------------------------------------------------------

Model        Metric       Test Value   Secondary   
------------------------------------------------
GOALS        Poisson Dev  0.4862       0.1853      
ASSISTS      Poisson Dev  0.4032       0.1386      
MINUTES      Huber Loss   143.2087     18.6464     
DEFCON       Poisson Dev  9.1267       6.1408      
CLEAN_SHEET  Brier Score  0.1765       0.6567      

(Lower is better for all metrics except AUC which should be higher)
Poisson Dev: count/r

In [4]:
# =============================================================================
# STEP 3: View Top Players
# =============================================================================
# Top 20 by expected points with full prediction breakdown
display_cols = [
    'player_name', 'team', 'fpl_position', 'opponent', 'is_home',
    'pred_minutes', 'pred_exp_goals', 'pred_exp_assists', 
    'pred_cs_prob', 'pred_defcon_prob','pred_bonus', 'exp_total_pts'
]
available_cols = [c for c in display_cols if c in predictions.columns]
predictions.nlargest(20, 'exp_total_pts')[available_cols].round(2)

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
50,Bruno Fernandes,Manchester United,MID,Spurs,1,87.360001,0.31,0.33,0.38,0.16,0.75,5.98
38,Mohamed Salah,Liverpool,MID,Man City,1,86.349998,0.41,0.22,0.28,0.0,0.7,5.68
146,Marcos Senesi,Bournemouth,DEF,Aston Villa,1,88.0,0.05,0.1,0.28,0.75,0.24,5.44
120,Erling Haaland,Manchester City,FWD,Liverpool,0,84.639999,0.54,0.09,0.3,0.0,0.93,5.36
80,Nordi Mukiele,Sunderland,DEF,Arsenal,0,88.25,0.02,0.09,0.33,0.71,0.19,5.34
148,Cristian Romero,Tottenham Hotspur,DEF,Man Utd,0,88.0,0.07,0.06,0.28,0.69,0.27,5.34
212,Bryan Mbeumo,Manchester United,MID,Spurs,1,88.0,0.39,0.16,0.38,0.01,0.5,5.33
235,Bukayo Saka,Arsenal,MID,Sunderland,1,80.419998,0.34,0.26,0.28,0.01,0.58,5.33
324,Lewis Hall,Newcastle United,DEF,Brentford,1,84.150002,0.06,0.13,0.31,0.45,0.4,5.29
326,Yerson Mosquera,Wolverhampton Wanderers,DEF,Chelsea,1,86.489998,0.08,0.04,0.27,0.65,0.27,5.27


In [5]:
predictions[predictions['player_name'].str.contains('Ander', case=False, na=False)][available_cols].round(2)


Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
60,Joachim Andersen,Fulham,DEF,Everton,1,89.300003,0.04,0.04,0.18,0.59,0.07,4.31
69,Sander Berge,Fulham,MID,Everton,1,86.190002,0.03,0.05,0.18,0.21,0.06,2.94
275,Elliot Anderson,Nottingham Forest,MID,Leeds,0,88.0,0.06,0.1,0.29,0.59,0.11,4.19


In [6]:
# Debug: Inspect Bassey's features
bassey = predictions[predictions['player_name'].str.contains('Bassey', case=False, na=False)]

# Show all goal-related features
goal_features = [
    'player_name', 'team', 'opponent', 'is_home',
    # Rolling per90 rates (model inputs)
    'xg_per90_roll3', 'xg_per90_roll5', 'xg_per90_roll10',
    'goals_per90_roll3', 'goals_per90_roll5', 'goals_per90_roll10',
    'shots_per90_roll3', 'shots_per90_roll5',
    # Recent form
    'goals_last1', 'goals_roll3', 'goals_roll5',
    # Lifetime
    'lifetime_goals_per90', 'lifetime_xg_per90',
    # Predictions
    'pred_minutes', 'pred_goals_per90', 'pred_exp_goals'
]
available = [c for c in goal_features if c in bassey.columns]
print("Bassey's goal-related features:")
bassey[available].T

Bassey's goal-related features:


Unnamed: 0,197
player_name,Calvin Bassey
team,Fulham
opponent,Everton
is_home,1
xg_per90_roll3,0.25
xg_per90_roll5,0.15
xg_per90_roll10,0.083333
goals_per90_roll3,0.333333
goals_per90_roll5,0.2
goals_per90_roll10,0.111111


In [7]:
# Debug: Check raw data for Bassey - look for duplicates or wrong goal counts
bassey_raw = pipeline.df[pipeline.df['player_name'].str.contains('Bassey', case=False, na=False)]
bassey_recent = bassey_raw[bassey_raw['season'] == '2025/2026'].sort_values('gameweek', ascending=False)

print(f"Bassey's games this season: {len(bassey_recent)}")
print(f"Unique matches: {bassey_recent['match_id'].nunique()}")
print(f"\nIf games > unique matches, there are still duplicates!")
print("\nLast 10 rows:")
bassey_recent[['gameweek', 'team', 'opponent', 'minutes', 'goals', 'xg', 'match_id']].head(10)

Bassey's games this season: 17
Unique matches: 17

If games > unique matches, there are still duplicates!

Last 10 rows:


Unnamed: 0,gameweek,team,opponent,minutes,goals,xg,match_id
9075,24.0,Fulham,Manchester United,4.0,0.0,0.0,4813610
9074,16.0,Fulham,Burnley,90.0,1.0,0.74,4813527
9073,15.0,Fulham,Crystal Palace,90.0,0.0,0.01,4813519
9072,14.0,Fulham,Manchester City,90.0,0.0,0.0,4813509
9071,13.0,Fulham,Tottenham Hotspur,90.0,0.0,0.0,4813503
9070,12.0,Fulham,Sunderland,90.0,0.0,0.0,4813489
9069,11.0,Fulham,Everton,90.0,0.0,0.0,4813479
9068,10.0,Fulham,Wolverhampton Wanderers,90.0,0.0,0.0,4813468
9067,9.0,Fulham,Newcastle United,90.0,0.0,0.0,4813463
9066,8.0,Fulham,Arsenal,8.0,0.0,0.0,4813448


In [8]:
# One-liner: Liverpool vs Newcastle CS features
import pandas as pd
pred = pd.read_csv('data/predictions/gw24_2025-2026.csv')
cols = ['team_conceded_roll5','team_conceded_roll10','team_xga_roll5','team_xga_roll10','team_cs_rate_roll5','team_cs_rate_roll10','opp_goals_roll5','opp_goals_roll10','opp_xg_roll5','opp_xg_roll10','is_home','pred_cs_prob']
pred[(pred['team'].str.contains('Liverpool')) & (pred['opponent'].str.contains('Newcastle'))][['team','opponent']+cols].drop_duplicates()

Unnamed: 0,team,opponent,team_conceded_roll5,team_conceded_roll10,team_xga_roll5,team_xga_roll10,team_cs_rate_roll5,team_cs_rate_roll10,opp_goals_roll5,opp_goals_roll10,opp_xg_roll5,opp_xg_roll10,is_home,pred_cs_prob
19,Liverpool,Newcastle,1.2,1.2,0.95,1.082,0.4,0.3,1.8,1.5,2.102,1.822,1,0.258891
