In [1]:
# ============================================================================
# FPL Model Tuning Notebook
# ============================================================================
# This notebook demonstrates the tune mode for evaluating and optimizing models.
# Use this to:
#   - Evaluate model performance with proper train/test splits
#   - Compare different hyperparameter configurations
#   - Run feature selection (importance-based or RFE)
#   - Run Optuna hyperparameter optimization
#   - View feature importance and model metrics
# ============================================================================

import pandas as pd
import numpy as np
import warnings
from pathlib import Path
import os

warnings.filterwarnings('ignore')

# Set up path - only change if not already in project root
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir("..")
print(f"Working directory: {os.getcwd()}")

# Import the pipeline
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline, TuneConfig, ModelMetrics


Working directory: c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj


In [2]:
#from update_data import get_current_and_next_gw, get_next_gw_fixtures, get_players_for_teams
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline, TuneConfig, load_raw_data, compute_rolling_features

In [3]:
# ============================================================================
# GW17 PREDICTIONS - Using Tuned Models
# ============================================================================
# This cell:
# 1. Scrapes GW16 data from FBRef (if not already present)
# 2. Gets GW17 fixtures from FPL API
# 3. Runs predictions using the best tuned model configurations
# 4. Prints model accuracy summaries and player projections table
# ============================================================================

import sys
import requests
import json
from pathlib import Path
from datetime import datetime

# Add scripts to path
sys.path.insert(0, str(Path('scripts').resolve()))

from update_data import get_current_and_next_gw, get_next_gw_fixtures, get_players_for_teams
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline, TuneConfig

DATA_DIR = Path(r'c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data')
SEASON = "2025-26"

print("=" * 80)
print("GW17 2025-26 PREDICTIONS USING TUNED MODELS")
print("=" * 80)
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()

# ============================================================================
# STEP 1: Check current gameweek status from FPL API
# ============================================================================
print("STEP 1: Checking FPL API for gameweek status...")
current_gw, next_gw = get_current_and_next_gw()
print(f"  Current GW: {current_gw} (just completed/in progress)")
print(f"  Next GW: {next_gw} (to predict)")
print()

# Set target gameweeks
COMPLETED_GW = 20  # The gameweek that just finished (need to scrape this)
TARGET_GW = 21     # The gameweek we want to predict
print(f"Scraping completed GW: {COMPLETED_GW}")
print(f"Target for prediction: GW{TARGET_GW}")
print()

# ============================================================================
# STEP 2: SCRAPE GW16 DATA FROM FBREF
# ============================================================================
print("STEP 2: Scraping GW16 data from FBRef...")
print("=" * 80)

gw16_dir = DATA_DIR / SEASON / f"gw{COMPLETED_GW}"

# Check if we already have GW16 data
if gw16_dir.exists():
    existing_matches = [m for m in gw16_dir.iterdir() if m.is_dir()]
    print(f"  Found existing GW{COMPLETED_GW} data: {len(existing_matches)} matches")
    
    # Ask whether to re-scrape (in notebook context, we'll check match count)
    # FPL has 10 matches per gameweek typically
    if len(existing_matches) >= 10:
        print(f"  ✓ GW{COMPLETED_GW} data appears complete, skipping scrape")
        SKIP_SCRAPE = True
    else:
        print(f"  ⚠️ GW{COMPLETED_GW} data incomplete ({len(existing_matches)}/10 matches), will re-scrape")
        SKIP_SCRAPE = False
else:
    print(f"  GW{COMPLETED_GW} data not found, will scrape from FBRef")
    SKIP_SCRAPE = False

if not SKIP_SCRAPE:
    # Import and run the scraper
    from scrape_gameweek import scrape_gameweek
    
    print()
    scrape_result = scrape_gameweek(
        gameweek=COMPLETED_GW,
        season=SEASON,
        delete_existing=True,
        verbose=True
    )
    
    if scrape_result['matches_scraped'] == 0:
        print("\n⚠️ WARNING: No matches were scraped for GW{COMPLETED_GW}!")
        print("    This could mean:")
        print("    - GW16 matches haven't been played yet")
        print("    - FBRef hasn't updated match reports yet")
        print("    - There was a connection issue")
        print("\n    Predictions will use data up to the latest available GW.")
    else:
        print(f"\n✓ Successfully scraped {scrape_result['matches_scraped']} GW{COMPLETED_GW} matches")

print()

# ============================================================================
# STEP 3: Get GW17 fixtures from FPL API
# ============================================================================
print("STEP 3: Fetching GW17 fixtures from FPL API...")
fixtures_df = get_next_gw_fixtures(TARGET_GW)
print()

# Get teams playing in GW17
teams_playing = list(set(fixtures_df['home_team'].tolist() + fixtures_df['away_team'].tolist()))
print(f"Teams playing in GW{TARGET_GW}: {len(teams_playing)}")

# Save fixtures
fixtures_path = DATA_DIR / "upcoming" / f"gw{TARGET_GW}_fixtures.csv"
fixtures_path.parent.mkdir(parents=True, exist_ok=True)
fixtures_df.to_csv(fixtures_path, index=False)
print(f"Saved fixtures to: {fixtures_path}")
print()


GW17 2025-26 PREDICTIONS USING TUNED MODELS
Timestamp: 2026-01-05 20:21:22

STEP 1: Checking FPL API for gameweek status...
Checking FPL API for current gameweek...
  Current GW: 20, Next GW: 21
  Current GW: 20 (just completed/in progress)
  Next GW: 21 (to predict)

Scraping completed GW: 20
Target for prediction: GW21

STEP 2: Scraping GW16 data from FBRef...
  Found existing GW20 data: 10 matches
  ✓ GW20 data appears complete, skipping scrape

STEP 3: Fetching GW17 fixtures from FPL API...

[1] Fetching GW21 fixtures from FPL API...
  Found 10 fixtures for GW21:
    West Ham vs Nott'm Forest
    Bournemouth vs Spurs
    Brentford vs Sunderland
    Crystal Palace vs Aston Villa
    Everton vs Wolves
    Fulham vs Chelsea
    Man City vs Brighton
    Burnley vs Man Utd
    Newcastle vs Leeds
    Arsenal vs Liverpool

Teams playing in GW21: 20
Saved fixtures to: c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\upcoming\gw21_fixtures.csv



In [4]:
# ============================================================================
# SUBPROCESS-BASED OPTUNA TUNING (Memory-Safe!)
# ============================================================================
# Each model runs in a separate Python process - memory fully released between models!
# This is like restarting the kernel for each model automatically.
# 
# RUN THIS CELL INSTEAD OF THE OLD TUNING CELL ABOVE!
# ============================================================================
import subprocess
import json
from pathlib import Path

print("SUBPROCESS-BASED OPTUNA TUNING")
print("=" * 80)
print("Each model runs in isolated subprocess - memory released between models!")
print()

MODELS = ['goals', 'assists', 'minutes', 'defcon', 'clean_sheet', 'bonus']
N_TRIALS = 100  # <-- CHANGE THIS! 100 Optuna trials per model
RESULTS_DIR = DATA_DIR / 'tuning_results'
RESULTS_DIR.mkdir(exist_ok=True)

tuning_results = {}

for model_name in MODELS:
    print(f"\n{'='*70}")
    print(f"Starting {model_name.upper()} in subprocess ({N_TRIALS} Optuna trials)...")
    print(f"{'='*70}")
    
    output_file = RESULTS_DIR / f'{model_name}_tuned.json'
    
    # Run in subprocess - ALL memory is released when subprocess exits!
    result = subprocess.run(
        ['python', 'scripts/tune_single_model.py', model_name, str(N_TRIALS), str(output_file)],
        cwd=str(Path.cwd()),
    )
    
    if result.returncode == 0 and output_file.exists():
        with open(output_file) as f:
            tuning_results[model_name] = json.load(f)
        print(f"\n✓ {model_name.upper()} complete!")
    else:
        print(f"\n✗ {model_name.upper()} failed with code {result.returncode}")
        tuning_results[model_name] = {'mae': 0, 'rmse': 0, 'r2': 0, 'samples': 0, 'best_params': {}}

# Print summary
print()
print("=" * 80)
print("MODEL ACCURACY SUMMARY")
print("=" * 80)
print()
print(f"{'Model':<15} {'MAE':>10} {'RMSE':>10} {'R²':>10} {'Samples':>10}")
print("-" * 55)
for name, res in tuning_results.items():
    print(f"{name.upper():<15} {res['mae']:>10.4f} {res['rmse']:>10.4f} {res['r2']:>10.4f} {res['samples']:>10,}")
print("-" * 55)

# Save tuning results summary
print(f"\nResults saved in: {RESULTS_DIR}")
print("You can now run predictions using these tuned parameters!")


SUBPROCESS-BASED OPTUNA TUNING
Each model runs in isolated subprocess - memory released between models!


Starting GOALS in subprocess (100 Optuna trials)...

✓ GOALS complete!

Starting ASSISTS in subprocess (100 Optuna trials)...

✓ ASSISTS complete!

Starting MINUTES in subprocess (100 Optuna trials)...

✓ MINUTES complete!

Starting DEFCON in subprocess (100 Optuna trials)...

✓ DEFCON complete!

Starting CLEAN_SHEET in subprocess (100 Optuna trials)...

✓ CLEAN_SHEET complete!

Starting BONUS in subprocess (100 Optuna trials)...

✓ BONUS complete!

MODEL ACCURACY SUMMARY

Model                  MAE       RMSE         R²    Samples
-------------------------------------------------------
GOALS               0.2730     1.8110     0.0057     18,535
ASSISTS             0.1989     1.4532     0.0024     18,535
MINUTES            17.8268    26.9790     0.2586     18,535
DEFCON              5.0795    14.2502     0.0486     18,535
CLEAN_SHEET         0.1846     0.4296     0.6778      1,295


In [5]:
# ============================================================================
# RUN PREDICTIONS WITH TUNED PARAMETERS
# ============================================================================
# Run this after the subprocess tuning completes!
# ============================================================================

print("=" * 80)
print("RUNNING PREDICTIONS WITH TUNED PARAMETERS")
print("=" * 80)

# Initialize fresh pipeline
pipeline = FPLPredictionPipeline(data_dir=str(DATA_DIR))

# Run predictions
predictions_df = pipeline.run(target_gw=TARGET_GW, target_season=SEASON, verbose=True)

# ============================================================================
# PLAYER PROJECTIONS TABLE - TOP 50
# ============================================================================
print()
print("=" * 80)
print(f"GW{TARGET_GW} PLAYER PROJECTIONS - TOP 50 BY EXPECTED POINTS")
print("=" * 80)

display_cols = [
    'player_name', 'team', 'fpl_position', 'opponent', 'is_home',
    'pred_minutes', 'pred_exp_goals', 'pred_exp_assists', 
    'pred_cs_prob', 'pred_defcon_prob', 'pred_bonus', 'exp_total_pts'
]

top_players = predictions_df[predictions_df['pred_minutes'] >= 30].copy()
top_players = top_players.nlargest(50, 'exp_total_pts')

top_players_display = top_players[display_cols].copy()
top_players_display.columns = [
    'Player', 'Team', 'Pos', 'Opponent', 'Home',
    'Mins', 'xG', 'xA', 'CS%', 'Def%', 'Bonus', 'xPts'
]

top_players_display['Mins'] = top_players_display['Mins'].round(0).astype(int)
top_players_display['xG'] = top_players_display['xG'].round(3)
top_players_display['xA'] = top_players_display['xA'].round(3)
top_players_display['CS%'] = (top_players_display['CS%'] * 100).round(1).astype(str) + '%'
top_players_display['Def%'] = (top_players_display['Def%'] * 100).round(1).astype(str) + '%'
top_players_display['Bonus'] = top_players_display['Bonus'].round(2)
top_players_display['xPts'] = top_players_display['xPts'].round(2)
top_players_display['Home'] = top_players_display['Home'].map({1: 'H', 0: 'A'})

top_players_display = top_players_display.reset_index(drop=True)
top_players_display.index = top_players_display.index + 1

print()
print(top_players_display.to_string())

# ============================================================================
# TOP PLAYERS BY POSITION
# ============================================================================
print()
print("=" * 80)
print("TOP PLAYERS BY POSITION")
print("=" * 80)

for pos in ['GK', 'DEF', 'MID', 'FWD']:
    pos_players = predictions_df[
        (predictions_df['fpl_position'] == pos) & 
        (predictions_df['pred_minutes'] >= 30)
    ].nlargest(5, 'exp_total_pts')
    
    print(f"\n{pos} - Top 5:")
    print("-" * 60)
    for _, row in pos_players.iterrows():
        home_away = "(H)" if row['is_home'] == 1 else "(A)"
        print(f"  {row['player_name']:25s} vs {row['opponent']:15s} {home_away} | xPts: {row['exp_total_pts']:.2f}")

# ============================================================================
# SAVE PREDICTIONS
# ============================================================================
print()
print("=" * 80)
output_path = DATA_DIR / 'predictions' / f'gw{TARGET_GW}_{SEASON}_all_predictions.csv'
predictions_df.to_csv(output_path, index=False)
print(f"✓ Full predictions saved to: {output_path}")
print(f"✓ Total players predicted: {len(predictions_df)}")
print("=" * 80)


RUNNING PREDICTIONS WITH TUNED PARAMETERS
FPL PREDICTION PIPELINE - GW21 2025-26
[1] Loading data from raw CSV files...
  Loaded 92674 player-match records from 6472 team-matches
  Seasons: ['2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26']
[2] Computing rolling features...
  Computing lifetime player profile features...
    Added 36 lifetime player profile features
  Computed 158 rolling + lifetime features
[3] Splitting train/test data...
  Mode: Prediction (GW21 is upcoming)
  Building prediction set for GW21...
    Using historical data up to GW20
    Found 10 fixtures, 20 teams
    Found 487 players in prior GW data
    Teams matched: 20/20
    Final test set: 487 players with features
  Train: 92674 records, Test: 487 records
[3.5] Fetching FPL API availability status...
  [OK] Matched 424/487 players with FPL status
    Unmatched: 63 players (showing first 10: ['Alisson (liverpool)', 'Anton Stach (leeds_united)', 'Ao Tanaka (leeds

In [6]:
goals_model = pipeline.get_model('goals')
print(f"Goals Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")

goals_model = pipeline.get_model('assists')
print(f"assists Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")
goals_model = pipeline.get_model('defcon')
print(f"defcon Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")
goals_model = pipeline.get_model('minutes')
print(f"minutes Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")
goals_model = pipeline.get_model('bonus')
print(f"bonus Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")

goals_model = pipeline.get_model('clean_sheet')
print(f"cs Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")

Goals Model Features (14):
------------------------------------------------------------
   1. lifetime_xg_per90
   2. is_forward
   3. lifetime_shots_per90
   4. lifetime_goals_per90
   5. is_midfielder
   6. is_defender
   7. is_home
   8. xg_per90_roll10
   9. team_shots_roll5
  10. opp_conceded_roll20
  11. team_goals_roll20
  12. opp_xg_against_roll10
  13. team_xg_roll20
  14. team_shots_roll20
assists Model Features (10):
------------------------------------------------------------
   1. lifetime_xag_per90
   2. lifetime_sca_per90
   3. lifetime_gca_per90
   4. lifetime_key_passes_per90
   5. team_goals_roll20
   6. team_shots_roll20
   7. sca_per90_roll5
   8. key_passes_per90_roll5
   9. team_xg_roll20
  10. team_xg_roll10
defcon Model Features (20):
------------------------------------------------------------
   1. lifetime_defcon_per90
   2. defcon_per90_roll10
   3. is_midfielder
   4. lifetime_minutes
   5. is_defender
   6. is_home
   7. defcon_per90_roll5
   8. hit_thresh

In [7]:
import pandas as pd
from pathlib import Path
from models.clean_sheet_model import CleanSheetModel

TARGET_GW = 20  # Update to current gameweek
SEASON = "2025-26"
DATA_DIR = Path("data")

# Load predictions
pred_file = DATA_DIR / "predictions" / f"gw{TARGET_GW}_2025-26_predictions.csv"
df = pd.read_csv(pred_file)

# Filter for Arsenal and get first row (all Arsenal players have same team features)
arsenal = df[df['team'].str.contains('arsenal', case=False, na=False)].iloc[0]

# Get all CS features
cs_features = CleanSheetModel.FEATURES

# Create a DataFrame with feature names and values
feature_df = pd.DataFrame({
    'Feature': cs_features,
    'Value': [arsenal.get(f, None) for f in cs_features]
})

# Add prediction
feature_df = pd.concat([
    pd.DataFrame([{
        'Feature': 'pred_cs_prob',
        'Value': arsenal.get('pred_cs_prob', None)
    }]),
    feature_df
], ignore_index=True)

# Display
print(f"\nArsenal vs {arsenal.get('opponent', 'Unknown')} - GW{TARGET_GW}")
print(f"Home/Away: {'Home' if arsenal.get('is_home', 0) == 1 else 'Away'}")
print(f"\nClean Sheet Features:")
print(feature_df.to_string(index=False))


Arsenal vs bournemouth - GW20
Home/Away: Away

Clean Sheet Features:
                   Feature    Value
              pred_cs_prob 0.241303
 team_goals_conceded_last1 1.000000
 team_goals_conceded_roll3 0.666667
 team_goals_conceded_roll5      NaN
team_goals_conceded_roll10      NaN
team_goals_conceded_roll30 0.766667
            team_xga_roll5 1.380000
   team_clean_sheets_roll5 1.000000
  team_clean_sheets_roll10      NaN
  team_clean_sheets_roll30      NaN
       team_xga_roll5_home 1.380000
       team_xga_roll5_away 0.780000
    opp_goals_scored_last1      NaN
    opp_goals_scored_roll3      NaN
    opp_goals_scored_roll5      NaN
   opp_goals_scored_roll10      NaN
   opp_goals_scored_roll30 1.333333
              opp_xg_roll5 2.040000
         opp_xg_roll5_home 1.620000
         opp_xg_roll5_away 2.040000
              xga_xg_ratio      NaN
       defensive_advantage      NaN
                   is_home 0.000000
              team_encoded      NaN
          opponent_encoded    

In [8]:
import pandas as pd
from pathlib import Path
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline
from models.clean_sheet_model import CleanSheetModel

TARGET_GW = 20  # Update to current gameweek
SEASON = "2025-26"
DATA_DIR = Path("data")

# Load predictions
pred_file = DATA_DIR / "predictions" / f"gw{TARGET_GW}_2025-26_predictions.csv"
if pred_file.exists():
    df = pd.read_csv(pred_file)
else:
    # Run pipeline if file doesn't exist
    pipeline = FPLPredictionPipeline(str(DATA_DIR))
    df = pipeline.run(target_gw=TARGET_GW, target_season=SEASON, verbose=False)
    pipeline_for_model = pipeline  # Keep reference to pipeline for model access

# Get Arsenal row
arsenal = df[df['team'].str.contains('arsenal', case=False, na=False)].iloc[0]

# Get feature importances - need to run pipeline to get trained model
if not pred_file.exists():
    # Already have pipeline
    pipeline = pipeline_for_model
else:
    # Need to run pipeline to get model
    pipeline = FPLPredictionPipeline(str(DATA_DIR))
    pipeline.run(target_gw=TARGET_GW, target_season=SEASON, verbose=False)

# Get feature importances
fi_df = pipeline.models['clean_sheet'].feature_importance()

# Create combined DataFrame
cs_features = CleanSheetModel.FEATURES
combined_data = []
for feat in cs_features:
    val = arsenal.get(feat, None)
    importance_row = fi_df[fi_df['feature'] == feat]
    importance = importance_row.iloc[0]['importance'] if len(importance_row) > 0 else 0.0
    
    combined_data.append({
        'Feature': feat,
        'Value': val if val is not None and not pd.isna(val) else 'N/A',
        'Importance': importance
    })

combined_df = pd.DataFrame(combined_data).sort_values('Importance', ascending=False)

# Display
print(f"\nArsenal vs {arsenal.get('opponent', 'Unknown')} - GW{TARGET_GW}")
print(f"Home/Away: {'Home' if arsenal.get('is_home', 0) == 1 else 'Away'}")
print(f"Predicted CS Probability: {arsenal.get('pred_cs_prob', 0):.1%}")
print("\n" + "=" * 100)
print("CLEAN SHEET FEATURES (sorted by importance):")
print("=" * 100)
print(combined_df.to_string(index=False))


Arsenal vs bournemouth - GW20
Home/Away: Away
Predicted CS Probability: 24.1%

CLEAN SHEET FEATURES (sorted by importance):
                   Feature     Value  Importance
   opp_goals_scored_roll30  1.333333    0.185023
         opp_xg_roll5_home      1.62    0.120496
team_goals_conceded_roll30  0.766667    0.113478
       team_xga_roll5_home      1.38    0.107059
       team_xga_roll5_away      0.78    0.094489
         opp_xg_roll5_away      2.04    0.078598
                   is_home         0    0.069216
            team_xga_roll5      1.38    0.068688
 team_goals_conceded_last1       1.0    0.057744
   team_clean_sheets_roll5       1.0    0.055423
 team_goals_conceded_roll3  0.666667    0.049788
              team_encoded       N/A    0.000000
       defensive_advantage       N/A    0.000000
              xga_xg_ratio       N/A    0.000000
              opp_xg_roll5      2.04    0.000000
    opp_goals_scored_roll3       N/A    0.000000
   opp_goals_scored_roll10       N/A    0.

In [9]:
predictions_df.loc[predictions_df['player_name'].str.contains("Szobo")][display_cols]

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
110,Dominik Szoboszlai,liverpool,MID,Arsenal,0,88.63031,0.216073,0.206507,0.21647,0.090181,0.277,4.373715


In [10]:
predictions_df.loc[predictions_df['player_name'] == 'Cole Palmer'][display_cols]

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
79,Cole Palmer,chelsea,MID,Fulham,0,83.681023,0.248127,0.224496,0.25714,0.012376,0.197,4.393014


In [11]:
predictions_df.sort_values(by="pred_exp_goals", ascending=False)[display_cols].head(15)

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
439,Thiago,brentford,FWD,Sunderland,1,88.0,0.653154,0.106732,0.314135,0.007624,2.222,7.170061
129,Erling Haaland,manchester_city,FWD,Brighton,1,88.0,0.597685,0.137139,0.276714,0.001791,0.481,5.286739
356,Nick Woltemade,newcastle_united,FWD,Leeds,1,70.0,0.48093,0.041684,0.30325,0.002348,0.245,4.298467
262,Kevin Schade,brentford,MID,Sunderland,1,88.0,0.436084,0.118055,0.314135,0.015294,0.627,5.506307
65,Callum Wilson,west_ham_united,FWD,Nott'm Forest,1,83.695465,0.432505,0.089892,0.269529,0.001645,0.506,4.508985
481,Yoane Wissa,newcastle_united,FWD,Leeds,1,73.803085,0.384895,0.083443,0.30325,0.003069,0.134,3.930048
78,Cody Gakpo,liverpool,MID,Arsenal,0,82.493652,0.375283,0.140632,0.21647,0.00587,0.207,4.73352
209,Jarrod Bowen,west_ham_united,FWD,Nott'm Forest,1,90.0,0.368401,0.128275,0.269529,0.016592,0.622,4.513614
214,Jean-Philippe Mateta,crystal_palace,FWD,Aston Villa,1,85.36805,0.367674,0.076414,0.263171,0.005244,0.28,3.990427
387,Phil Foden,manchester_city,MID,Brighton,1,88.0,0.345264,0.250912,0.276714,0.088543,0.294,5.22686


In [12]:
predictions_df.sort_values(by="pred_exp_assists", ascending=False)[display_cols].head(15)

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
393,Rayan Cherki,manchester_city,MID,Brighton,1,90.0,0.123053,0.363304,0.276714,0.128024,0.105,4.34294
387,Phil Foden,manchester_city,MID,Brighton,1,88.0,0.345264,0.250912,0.276714,0.088543,0.294,5.22686
62,Bukayo Saka,arsenal,MID,Liverpool,1,88.0,0.343376,0.250618,0.321195,0.019917,1.149,5.978763
219,Jeremy Doku,manchester_city,MID,Brighton,1,80.0,0.134668,0.244686,0.276714,0.026148,0.124,3.86041
309,Martin Ødegaard,arsenal,MID,Liverpool,1,81.963051,0.122804,0.23373,0.321195,0.010757,0.26,3.917921
79,Cole Palmer,chelsea,MID,Fulham,0,83.681023,0.248127,0.224496,0.25714,0.012376,0.197,4.393014
380,Pascal Groß,brighton__hove_albion,MID,Man City,0,88.0,0.140238,0.220962,0.205228,0.041558,0.207,3.859419
185,Jack Grealish,everton,MID,Wolves,1,87.42157,0.190741,0.217275,0.320798,0.012692,0.803,4.754713
385,Pedro Neto,chelsea,MID,Fulham,0,88.0,0.150622,0.212699,0.25714,0.011364,0.193,3.864075
110,Dominik Szoboszlai,liverpool,MID,Arsenal,0,88.63031,0.216073,0.206507,0.21647,0.090181,0.277,4.373715


In [13]:

team = ["Cole Palmer", "Maxence Lacroix", "Enzo Fernandez"]
predictions_df.loc[(predictions_df['player_name']=="Cole Palmer") | (predictions_df['player_name']=="Maxence Lacroix")| (predictions_df['player_name']=="Danny Welbeck")| (predictions_df['player_name']=="Ibrahima Konate")| (predictions_df['player_name']=="Yeremy Pino")| (predictions_df['player_name']=="Raul Jimenez")| (predictions_df['player_name']=="Marcus Tavernier")| (predictions_df['player_name']=="Justin Kluivert")| (predictions_df['player_name']=="Phil Foden")| (predictions_df['player_name']=="Yeremy Pino")| (predictions_df['player_name']=="Raul Jimenez")| (predictions_df['player_name']=="Marcus Tavernier")| (predictions_df['player_name']=="Justin Kluivert")| (predictions_df['player_name']=="Rayan Cherki")| (predictions_df['player_name']=="Phil Foden")| (predictions_df['player_name']=="Yankuba Minteh")| (predictions_df['player_name']=="João Pedro")| (predictions_df['player_name']=="Enzo Fernández")| (predictions_df['player_name']=="Sávio") ][display_cols].sort_values("exp_total_pts")

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
435,Sávio,manchester_city,MID,Brighton,1,11.298695,0.026192,0.032843,0.276714,0.000368,0.0,1.50694
94,Danny Welbeck,brighton__hove_albion,FWD,Man City,0,59.975109,0.280815,0.076603,0.205228,0.002291,0.075,2.432651
245,João Pedro,chelsea,FWD,Fulham,0,73.375229,0.233132,0.128733,0.25714,0.007204,0.404,3.737134
476,Yankuba Minteh,brighton__hove_albion,MID,Man City,0,80.0,0.223984,0.156883,0.205228,0.025542,0.15,3.996884
253,Justin Kluivert,bournemouth,MID,Spurs,1,80.0,0.21764,0.140524,0.26387,0.011109,0.391,4.186859
393,Rayan Cherki,manchester_city,MID,Brighton,1,90.0,0.123053,0.363304,0.276714,0.128024,0.105,4.34294
79,Cole Palmer,chelsea,MID,Fulham,0,83.681023,0.248127,0.224496,0.25714,0.012376,0.197,4.393014
127,Enzo Fernández,chelsea,MID,Fulham,0,87.648895,0.17264,0.163706,0.25714,0.131197,0.593,4.466854
306,Marcus Tavernier,bournemouth,MID,Spurs,1,88.0,0.162661,0.151293,0.26387,0.266059,0.43,4.493172
327,Maxence Lacroix,crystal_palace,DEF,Aston Villa,1,89.354401,0.050102,0.026212,0.263171,0.558827,0.055,4.604586


In [14]:
predictions_df.loc[predictions_df['player_name'].str.contains("vio")]

Unnamed: 0,player_name,team,team_display,opponent,position,is_home,gameweek,season,player_id,player_roll5_goals,...,pred_cs_prob,pred_bonus,fpl_position,exp_goals_pts,exp_assists_pts,exp_cs_pts,exp_defcon_pts,exp_bonus_pts,exp_appearance_pts,exp_total_pts
435,Sávio,manchester_city,Man City,Brighton,AM,1,21,2025-26,Sávio_manchester_city,0.0,...,0.276714,0.0,MID,0.13096,0.098528,0.276714,0.000737,0.0,1.0,1.50694
