In [1]:
# ============================================================================
# FPL Model Tuning Notebook
# ============================================================================
# This notebook demonstrates the tune mode for evaluating and optimizing models.
# Use this to:
#   - Evaluate model performance with proper train/test splits
#   - Compare different hyperparameter configurations
#   - Run feature selection (importance-based or RFE)
#   - Run Optuna hyperparameter optimization
#   - View feature importance and model metrics
# ============================================================================

import pandas as pd
import numpy as np
import warnings
from pathlib import Path
import os

warnings.filterwarnings('ignore')

# Set up path - only change if not already in project root
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir("..")
print(f"Working directory: {os.getcwd()}")

# Import the pipeline
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline, TuneConfig, ModelMetrics


Working directory: c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj


In [2]:
#from update_data import get_current_and_next_gw, get_next_gw_fixtures, get_players_for_teams
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline, TuneConfig, load_raw_data, compute_rolling_features

In [3]:
# ============================================================================
# GW17 PREDICTIONS - Using Tuned Models
# ============================================================================
# This cell:
# 1. Scrapes GW16 data from FBRef (if not already present)
# 2. Gets GW17 fixtures from FPL API
# 3. Runs predictions using the best tuned model configurations
# 4. Prints model accuracy summaries and player projections table
# ============================================================================

import sys
import requests
import json
from pathlib import Path
from datetime import datetime

# Add scripts to path
sys.path.insert(0, str(Path('scripts').resolve()))

from update_data import get_current_and_next_gw, get_next_gw_fixtures, get_players_for_teams
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline, TuneConfig

DATA_DIR = Path(r'c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data')
SEASON = "2025-26"

print("=" * 80)
print("GW17 2025-26 PREDICTIONS USING TUNED MODELS")
print("=" * 80)
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()

# ============================================================================
# STEP 1: Check current gameweek status from FPL API
# ============================================================================
print("STEP 1: Checking FPL API for gameweek status...")
current_gw, next_gw = get_current_and_next_gw()
print(f"  Current GW: {current_gw} (just completed/in progress)")
print(f"  Next GW: {next_gw} (to predict)")
print()

# Set target gameweeks
COMPLETED_GW = 21  # The gameweek that just finished (need to scrape this)
TARGET_GW = 22     # The gameweek we want to predict
print(f"Scraping completed GW: {COMPLETED_GW}")
print(f"Target for prediction: GW{TARGET_GW}")
print()

# ============================================================================
# STEP 2: SCRAPE GW16 DATA FROM FBREF
# ============================================================================
print("STEP 2: Scraping GW16 data from FBRef...")
print("=" * 80)

gw16_dir = DATA_DIR / SEASON / f"gw{COMPLETED_GW}"

# Check if we already have GW16 data
if gw16_dir.exists():
    existing_matches = [m for m in gw16_dir.iterdir() if m.is_dir()]
    print(f"  Found existing GW{COMPLETED_GW} data: {len(existing_matches)} matches")
    
    # Ask whether to re-scrape (in notebook context, we'll check match count)
    # FPL has 10 matches per gameweek typically
    if len(existing_matches) >= 10:
        print(f"  ✓ GW{COMPLETED_GW} data appears complete, skipping scrape")
        SKIP_SCRAPE = True
    else:
        print(f"  ⚠️ GW{COMPLETED_GW} data incomplete ({len(existing_matches)}/10 matches), will re-scrape")
        SKIP_SCRAPE = False
else:
    print(f"  GW{COMPLETED_GW} data not found, will scrape from FBRef")
    SKIP_SCRAPE = False

if not SKIP_SCRAPE:
    # Import and run the scraper
    from scrape_gameweek import scrape_gameweek
    
    print()
    scrape_result = scrape_gameweek(
        gameweek=COMPLETED_GW,
        season=SEASON,
        delete_existing=True,
        verbose=True
    )
    
    if scrape_result['matches_scraped'] == 0:
        print("\n⚠️ WARNING: No matches were scraped for GW{COMPLETED_GW}!")
        print("    This could mean:")
        print("    - GW16 matches haven't been played yet")
        print("    - FBRef hasn't updated match reports yet")
        print("    - There was a connection issue")
        print("\n    Predictions will use data up to the latest available GW.")
    else:
        print(f"\n✓ Successfully scraped {scrape_result['matches_scraped']} GW{COMPLETED_GW} matches")

print()

# ============================================================================
# STEP 3: Get GW17 fixtures from FPL API
# ============================================================================
print("STEP 3: Fetching GW17 fixtures from FPL API...")
fixtures_df = get_next_gw_fixtures(TARGET_GW)
print()

# Get teams playing in GW17
teams_playing = list(set(fixtures_df['home_team'].tolist() + fixtures_df['away_team'].tolist()))
print(f"Teams playing in GW{TARGET_GW}: {len(teams_playing)}")

# Save fixtures
fixtures_path = DATA_DIR / "upcoming" / f"gw{TARGET_GW}_fixtures.csv"
fixtures_path.parent.mkdir(parents=True, exist_ok=True)
fixtures_df.to_csv(fixtures_path, index=False)
print(f"Saved fixtures to: {fixtures_path}")
print()


GW17 2025-26 PREDICTIONS USING TUNED MODELS
Timestamp: 2026-01-10 16:26:28

STEP 1: Checking FPL API for gameweek status...
Checking FPL API for current gameweek...
  Current GW: 21, Next GW: 22
  Current GW: 21 (just completed/in progress)
  Next GW: 22 (to predict)

Scraping completed GW: 21
Target for prediction: GW22

STEP 2: Scraping GW16 data from FBRef...
  Found existing GW21 data: 10 matches
  ✓ GW21 data appears complete, skipping scrape

STEP 3: Fetching GW17 fixtures from FPL API...

[1] Fetching GW22 fixtures from FPL API...
  Found 10 fixtures for GW22:
    Man Utd vs Man City
    Chelsea vs Brentford
    Leeds vs Fulham
    Liverpool vs Burnley
    Spurs vs West Ham
    Sunderland vs Crystal Palace
    Nott'm Forest vs Arsenal
    Wolves vs Newcastle
    Aston Villa vs Everton
    Brighton vs Bournemouth

Teams playing in GW22: 20
Saved fixtures to: c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\upcoming\gw22_fixtures.csv



In [4]:
# ============================================================================
# SUBPROCESS-BASED OPTUNA TUNING (Memory-Safe!)
# ============================================================================
# Each model runs in a separate Python process - memory fully released between models!
# This is like restarting the kernel for each model automatically.
# 
# RUN THIS CELL INSTEAD OF THE OLD TUNING CELL ABOVE!
# ============================================================================
import subprocess
import json
from pathlib import Path

print("SUBPROCESS-BASED OPTUNA TUNING")
print("=" * 80)
print("Each model runs in isolated subprocess - memory released between models!")
print()

MODELS = ['goals', 'assists', 'minutes', 'defcon', 'clean_sheet', 'bonus']
N_TRIALS = 100  # <-- CHANGE THIS! 100 Optuna trials per model
RESULTS_DIR = DATA_DIR / 'tuning_results'
RESULTS_DIR.mkdir(exist_ok=True)

tuning_results = {}

for model_name in MODELS:
    print(f"\n{'='*70}")
    print(f"Starting {model_name.upper()} in subprocess ({N_TRIALS} Optuna trials)...")
    print(f"{'='*70}")
    
    output_file = RESULTS_DIR / f'{model_name}_tuned.json'
    
    # Run in subprocess - ALL memory is released when subprocess exits!
    result = subprocess.run(
        ['python', 'scripts/tune_single_model.py', model_name, str(N_TRIALS), str(output_file)],
        cwd=str(Path.cwd()),
    )
    
    if result.returncode == 0 and output_file.exists():
        with open(output_file) as f:
            tuning_results[model_name] = json.load(f)
        print(f"\n✓ {model_name.upper()} complete!")
    else:
        print(f"\n✗ {model_name.upper()} failed with code {result.returncode}")
        tuning_results[model_name] = {'mae': 0, 'rmse': 0, 'r2': 0, 'samples': 0, 'best_params': {}}

# Print summary
print()
print("=" * 80)
print("MODEL ACCURACY SUMMARY")
print("=" * 80)
print()
print(f"{'Model':<15} {'MAE':>10} {'RMSE':>10} {'R²':>10} {'Samples':>10}")
print("-" * 55)
for name, res in tuning_results.items():
    print(f"{name.upper():<15} {res['mae']:>10.4f} {res['rmse']:>10.4f} {res['r2']:>10.4f} {res['samples']:>10,}")
print("-" * 55)

# Save tuning results summary
print(f"\nResults saved in: {RESULTS_DIR}")
print("You can now run predictions using these tuned parameters!")


SUBPROCESS-BASED OPTUNA TUNING
Each model runs in isolated subprocess - memory released between models!


Starting GOALS in subprocess (100 Optuna trials)...

✓ GOALS complete!

Starting ASSISTS in subprocess (100 Optuna trials)...

✓ ASSISTS complete!

Starting MINUTES in subprocess (100 Optuna trials)...

✓ MINUTES complete!

Starting DEFCON in subprocess (100 Optuna trials)...

✓ DEFCON complete!

Starting CLEAN_SHEET in subprocess (100 Optuna trials)...

✓ CLEAN_SHEET complete!

Starting BONUS in subprocess (100 Optuna trials)...

✓ BONUS complete!

MODEL ACCURACY SUMMARY

Model                  MAE       RMSE         R²    Samples
-------------------------------------------------------
GOALS               0.2698     1.8077     0.0044     18,594
ASSISTS             0.1960     1.4450     0.0024     18,594
MINUTES            17.9735    27.2325     0.2445     18,594
DEFCON              5.1257    14.3048     0.0471     18,594
CLEAN_SHEET         0.1924     0.4387     0.6319      1,299


In [5]:
# ============================================================================
# RUN PREDICTIONS WITH TUNED PARAMETERS
# ============================================================================
# Run this after the subprocess tuning completes!
# ============================================================================

print("=" * 80)
print("RUNNING PREDICTIONS WITH TUNED PARAMETERS")
print("=" * 80)

# Initialize fresh pipeline
pipeline = FPLPredictionPipeline(data_dir=str(DATA_DIR))

# Run predictions
predictions_df = pipeline.run(target_gw=TARGET_GW, target_season=SEASON, verbose=True)

# ============================================================================
# PLAYER PROJECTIONS TABLE - TOP 50
# ============================================================================
print()
print("=" * 80)
print(f"GW{TARGET_GW} PLAYER PROJECTIONS - TOP 50 BY EXPECTED POINTS")
print("=" * 80)

display_cols = [
    'player_name', 'team', 'fpl_position', 'opponent', 'is_home',
    'pred_minutes', 'pred_exp_goals', 'pred_exp_assists', 
    'pred_cs_prob', 'pred_defcon_prob', 'pred_bonus', 'exp_total_pts'
]

top_players = predictions_df[predictions_df['pred_minutes'] >= 30].copy()
top_players = top_players.nlargest(50, 'exp_total_pts')

top_players_display = top_players[display_cols].copy()
top_players_display.columns = [
    'Player', 'Team', 'Pos', 'Opponent', 'Home',
    'Mins', 'xG', 'xA', 'CS%', 'Def%', 'Bonus', 'xPts'
]

top_players_display['Mins'] = top_players_display['Mins'].round(0).astype(int)
top_players_display['xG'] = top_players_display['xG'].round(3)
top_players_display['xA'] = top_players_display['xA'].round(3)
top_players_display['CS%'] = (top_players_display['CS%'] * 100).round(1).astype(str) + '%'
top_players_display['Def%'] = (top_players_display['Def%'] * 100).round(1).astype(str) + '%'
top_players_display['Bonus'] = top_players_display['Bonus'].round(2)
top_players_display['xPts'] = top_players_display['xPts'].round(2)
top_players_display['Home'] = top_players_display['Home'].map({1: 'H', 0: 'A'})

top_players_display = top_players_display.reset_index(drop=True)
top_players_display.index = top_players_display.index + 1

print()
print(top_players_display.to_string())

# ============================================================================
# TOP PLAYERS BY POSITION
# ============================================================================
print()
print("=" * 80)
print("TOP PLAYERS BY POSITION")
print("=" * 80)

for pos in ['GK', 'DEF', 'MID', 'FWD']:
    pos_players = predictions_df[
        (predictions_df['fpl_position'] == pos) & 
        (predictions_df['pred_minutes'] >= 30)
    ].nlargest(5, 'exp_total_pts')
    
    print(f"\n{pos} - Top 5:")
    print("-" * 60)
    for _, row in pos_players.iterrows():
        home_away = "(H)" if row['is_home'] == 1 else "(A)"
        print(f"  {row['player_name']:25s} vs {row['opponent']:15s} {home_away} | xPts: {row['exp_total_pts']:.2f}")

# ============================================================================
# SAVE PREDICTIONS
# ============================================================================
print()
print("=" * 80)
output_path = DATA_DIR / 'predictions' / f'gw{TARGET_GW}_{SEASON}_all_predictions.csv'
predictions_df.to_csv(output_path, index=False)
print(f"✓ Full predictions saved to: {output_path}")
print(f"✓ Total players predicted: {len(predictions_df)}")
print("=" * 80)


RUNNING PREDICTIONS WITH TUNED PARAMETERS
FPL PREDICTION PIPELINE - GW22 2025-26
[1] Loading data from raw CSV files...
  Loaded 92969 player-match records from 6492 team-matches
  Seasons: ['2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26']
[2] Computing rolling features...
  Computing lifetime player profile features...
    Added 36 lifetime player profile features
  Computed 158 rolling + lifetime features
[3] Splitting train/test data...
  Mode: Prediction (GW22 is upcoming)
  Building prediction set for GW22...
    Using historical data up to GW21
    Found 10 fixtures, 20 teams
    Found 491 players in prior GW data
    Teams matched: 20/20
    Final test set: 491 players with features
  Train: 92969 records, Test: 491 records
[3.5] Fetching FPL API availability status...
  [*] Loaded 595 name matches from c:\Users\dpfin\OneDrive\Desktop\ultimate_p_proj\data\name_matching\name_matches.json
  [OK] Matched 440/491 players with FPL sta

In [6]:
goals_model = pipeline.get_model('goals')
print(f"Goals Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")

goals_model = pipeline.get_model('assists')
print(f"assists Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")
goals_model = pipeline.get_model('defcon')
print(f"defcon Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")
goals_model = pipeline.get_model('minutes')
print(f"minutes Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")
goals_model = pipeline.get_model('bonus')
print(f"bonus Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")

goals_model = pipeline.get_model('clean_sheet')
print(f"cs Model Features ({len(goals_model.FEATURES)}):")
print("-" * 60)
for i, feat in enumerate(goals_model.FEATURES, 1):
    print(f"  {i:2}. {feat}")

Goals Model Features (16):
------------------------------------------------------------
   1. lifetime_xg_per90
   2. lifetime_goals_per90
   3. lifetime_shots_per90
   4. xg_per90_roll10
   5. xg_per90_roll5
   6. shots_per90_roll5
   7. shots_per90_roll10
   8. team_shots_roll20
   9. team_xg_roll20
  10. team_goals_roll20
  11. team_shots_roll5
  12. team_goals_roll10
  13. team_shots_roll10
  14. opp_conceded_roll20
  15. is_home
  16. opp_xg_against_roll10
assists Model Features (19):
------------------------------------------------------------
   1. lifetime_xag_per90
   2. lifetime_gca_per90
   3. lifetime_sca_per90
   4. lifetime_key_passes_per90
   5. team_goals_roll20
   6. team_xg_roll20
   7. sca_per90_roll5
   8. team_shots_roll20
   9. team_xg_roll10
  10. key_passes_per90_roll5
  11. opp_shots_roll20
  12. xag_per90_roll5
  13. opp_conceded_roll5
  14. opp_conceded_roll10
  15. opp_xg_against_roll20
  16. lifetime_minutes
  17. opp_conceded_roll20
  18. opp_shots_roll10


In [7]:
import pandas as pd
from pathlib import Path
from models.clean_sheet_model import CleanSheetModel

TARGET_GW = 20  # Update to current gameweek
SEASON = "2025-26"
DATA_DIR = Path("data")

# Load predictions
pred_file = DATA_DIR / "predictions" / f"gw{TARGET_GW}_2025-26_predictions.csv"
df = pd.read_csv(pred_file)

# Filter for Arsenal and get first row (all Arsenal players have same team features)
arsenal = df[df['team'].str.contains('arsenal', case=False, na=False)].iloc[0]

# Get all CS features
cs_features = CleanSheetModel.FEATURES

# Create a DataFrame with feature names and values
feature_df = pd.DataFrame({
    'Feature': cs_features,
    'Value': [arsenal.get(f, None) for f in cs_features]
})

# Add prediction
feature_df = pd.concat([
    pd.DataFrame([{
        'Feature': 'pred_cs_prob',
        'Value': arsenal.get('pred_cs_prob', None)
    }]),
    feature_df
], ignore_index=True)

# Display
print(f"\nArsenal vs {arsenal.get('opponent', 'Unknown')} - GW{TARGET_GW}")
print(f"Home/Away: {'Home' if arsenal.get('is_home', 0) == 1 else 'Away'}")
print(f"\nClean Sheet Features:")
print(feature_df.to_string(index=False))


Arsenal vs bournemouth - GW20
Home/Away: Away

Clean Sheet Features:
                   Feature    Value
              pred_cs_prob 0.238082
 team_goals_conceded_last1      NaN
 team_goals_conceded_roll3      NaN
 team_goals_conceded_roll5      NaN
team_goals_conceded_roll10      NaN
team_goals_conceded_roll30 0.766667
            team_xga_roll5 1.380000
   team_clean_sheets_roll5 1.000000
  team_clean_sheets_roll10      NaN
  team_clean_sheets_roll30      NaN
       team_xga_roll5_home 1.380000
       team_xga_roll5_away 0.780000
    opp_goals_scored_last1      NaN
    opp_goals_scored_roll3      NaN
    opp_goals_scored_roll5      NaN
   opp_goals_scored_roll10 1.300000
   opp_goals_scored_roll30 1.333333
              opp_xg_roll5 2.040000
         opp_xg_roll5_home 1.620000
         opp_xg_roll5_away 2.040000
              xga_xg_ratio      NaN
       defensive_advantage 0.240000
                   is_home 0.000000
              team_encoded 0.000000
          opponent_encoded    

In [8]:
import pandas as pd
from pathlib import Path
from pipelines.fpl_prediction_pipeline import FPLPredictionPipeline
from models.clean_sheet_model import CleanSheetModel

TARGET_GW = 20  # Update to current gameweek
SEASON = "2025-26"
DATA_DIR = Path("data")

# Load predictions
pred_file = DATA_DIR / "predictions" / f"gw{TARGET_GW}_2025-26_predictions.csv"
if pred_file.exists():
    df = pd.read_csv(pred_file)
else:
    # Run pipeline if file doesn't exist
    pipeline = FPLPredictionPipeline(str(DATA_DIR))
    df = pipeline.run(target_gw=TARGET_GW, target_season=SEASON, verbose=False)
    pipeline_for_model = pipeline  # Keep reference to pipeline for model access

# Get Arsenal row
arsenal = df[df['team'].str.contains('arsenal', case=False, na=False)].iloc[0]

# Get feature importances - need to run pipeline to get trained model
if not pred_file.exists():
    # Already have pipeline
    pipeline = pipeline_for_model
else:
    # Need to run pipeline to get model
    pipeline = FPLPredictionPipeline(str(DATA_DIR))
    pipeline.run(target_gw=TARGET_GW, target_season=SEASON, verbose=False)

# Get feature importances
fi_df = pipeline.models['clean_sheet'].feature_importance()

# Create combined DataFrame
cs_features = CleanSheetModel.FEATURES
combined_data = []
for feat in cs_features:
    val = arsenal.get(feat, None)
    importance_row = fi_df[fi_df['feature'] == feat]
    importance = importance_row.iloc[0]['importance'] if len(importance_row) > 0 else 0.0
    
    combined_data.append({
        'Feature': feat,
        'Value': val if val is not None and not pd.isna(val) else 'N/A',
        'Importance': importance
    })

combined_df = pd.DataFrame(combined_data).sort_values('Importance', ascending=False)

# Display
print(f"\nArsenal vs {arsenal.get('opponent', 'Unknown')} - GW{TARGET_GW}")
print(f"Home/Away: {'Home' if arsenal.get('is_home', 0) == 1 else 'Away'}")
print(f"Predicted CS Probability: {arsenal.get('pred_cs_prob', 0):.1%}")
print("\n" + "=" * 100)
print("CLEAN SHEET FEATURES (sorted by importance):")
print("=" * 100)
print(combined_df.to_string(index=False))


Arsenal vs bournemouth - GW20
Home/Away: Away
Predicted CS Probability: 23.8%

CLEAN SHEET FEATURES (sorted by importance):
                   Feature     Value  Importance
   opp_goals_scored_roll30  1.333333    0.170504
         opp_xg_roll5_home      1.62    0.098771
team_goals_conceded_roll30  0.766667    0.097416
       team_xga_roll5_home      1.38    0.083885
       team_xga_roll5_away      0.78    0.077458
                   is_home         0    0.075213
   opp_goals_scored_roll10       1.3    0.070530
              team_encoded         0    0.069327
            team_xga_roll5      1.38    0.068042
         opp_xg_roll5_away      2.04    0.067686
       defensive_advantage      0.24    0.064136
   team_clean_sheets_roll5       1.0    0.057030
 team_goals_conceded_last1       N/A    0.000000
              xga_xg_ratio       N/A    0.000000
              opp_xg_roll5      2.04    0.000000
    opp_goals_scored_roll3       N/A    0.000000
    opp_goals_scored_roll5       N/A    0.

In [9]:
predictions_df.loc[predictions_df['player_name'].str.contains("Bruno")][display_cols]

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
59,Bruno Fernandes,manchester_united,MID,Man City,1,88.0,0.42907,0.371543,0.266867,0.403533,1.233,7.566914
60,Bruno Guimarães,newcastle_united,MID,Wolves,0,88.102562,0.129968,0.149145,0.374797,0.242423,0.085,4.041918


In [10]:
predictions_df.loc[predictions_df['player_name'] == 'Cole Palmer'][display_cols]

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
79,Cole Palmer,chelsea,MID,Brentford,1,80.0,0.414162,0.231185,0.280385,0.010842,0.299,5.365437


In [11]:
predictions_df.sort_values(by="pred_exp_goals", ascending=False)[display_cols].head(15)

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
129,Erling Haaland,manchester_city,FWD,Man Utd,0,88.0,0.625438,0.122267,0.310735,0.000277,0.444,5.31311
442,Thiago,brentford,FWD,Chelsea,0,88.0,0.48947,0.129458,0.282932,0.026808,1.736,6.135871
59,Bruno Fernandes,manchester_united,MID,Man City,1,88.0,0.42907,0.371543,0.266867,0.403533,1.233,7.566914
61,Bryan Mbeumo,manchester_united,MID,Man City,1,89.266045,0.428902,0.196319,0.266867,0.028426,0.483,5.540185
65,Callum Wilson,west_ham_united,FWD,Spurs,0,86.808495,0.415445,0.125637,0.236408,0.000179,0.498,4.537052
79,Cole Palmer,chelsea,MID,Brentford,1,80.0,0.414162,0.231185,0.280385,0.010842,0.299,5.365437
172,Hugo Ekitike,liverpool,FWD,Burnley,1,90.0,0.399577,0.096057,0.379063,0.002243,0.339,4.229963
375,Ollie Watkins,aston_villa,FWD,Everton,1,85.971527,0.387784,0.121338,0.294557,0.00025,0.427,4.342649
315,Matheus Cunha,manchester_united,MID,Man City,1,88.0,0.38326,0.188725,0.266867,0.059897,0.386,5.255138
467,Viktor Gyökeres,arsenal,FWD,Nott'm Forest,0,80.0,0.382618,0.060944,0.357666,0.000314,0.287,4.00093


In [12]:
predictions_df.sort_values(by="pred_exp_assists", ascending=False)[display_cols].head(15)

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
59,Bruno Fernandes,manchester_united,MID,Man City,1,88.0,0.42907,0.371543,0.266867,0.403533,1.233,7.566914
396,Rayan Cherki,manchester_city,MID,Man Utd,0,88.0,0.129759,0.29131,0.310735,0.166817,0.124,4.291092
185,Jack Grealish,everton,MID,Aston Villa,0,81.047012,0.169198,0.276287,0.193138,0.027314,0.33,4.252616
390,Phil Foden,manchester_city,MID,Man Utd,0,88.0,0.319066,0.239643,0.310735,0.032455,0.641,5.330902
79,Cole Palmer,chelsea,MID,Brentford,1,80.0,0.414162,0.231185,0.280385,0.010842,0.299,5.365437
388,Pedro Neto,chelsea,MID,Brentford,1,88.0,0.188449,0.228052,0.280385,0.004967,0.371,4.287721
309,Martin Ødegaard,arsenal,MID,Nott'm Forest,0,83.160172,0.164455,0.21803,0.357666,0.01168,0.371,4.228391
71,Charalampos Kostoulas,brighton__hove_albion,FWD,Bournemouth,1,58.626076,0.374539,0.214166,0.260499,0.015409,0.534,3.705475
62,Bukayo Saka,arsenal,MID,Nott'm Forest,0,80.0,0.29089,0.212741,0.357666,0.020509,0.313,4.804357
127,Enzo Fernández,chelsea,MID,Brentford,1,87.678047,0.263328,0.205129,0.280385,0.311495,0.361,5.196404


In [13]:

team = ["Cole Palmer", "Maxence Lacroix", "Enzo Fernandez"]
predictions_df.loc[(predictions_df['player_name']=="Cole Palmer") | (predictions_df['player_name']=="Maxence Lacroix")| (predictions_df['player_name']=="Danny Welbeck")| (predictions_df['player_name']=="Ibrahima Konate")| (predictions_df['player_name']=="Yeremy Pino")| (predictions_df['player_name']=="Raul Jimenez")| (predictions_df['player_name']=="Marcus Tavernier")| (predictions_df['player_name']=="Justin Kluivert")| (predictions_df['player_name']=="Phil Foden")| (predictions_df['player_name']=="Yeremy Pino")| (predictions_df['player_name']=="Raul Jimenez")| (predictions_df['player_name']=="Marcus Tavernier")| (predictions_df['player_name']=="Justin Kluivert")| (predictions_df['player_name']=="Rayan Cherki")| (predictions_df['player_name']=="Phil Foden")| (predictions_df['player_name']=="Yankuba Minteh")| (predictions_df['player_name']=="João Pedro")| (predictions_df['player_name']=="Enzo Fernández")| (predictions_df['player_name']=="Sávio") ][display_cols].sort_values("exp_total_pts")

Unnamed: 0,player_name,team,fpl_position,opponent,is_home,pred_minutes,pred_exp_goals,pred_exp_assists,pred_cs_prob,pred_defcon_prob,pred_bonus,exp_total_pts
253,Justin Kluivert,bournemouth,MID,Brighton,0,0.0,0.0,0.0,0.239461,0.0,0.0,0.239461
438,Sávio,manchester_city,MID,Man Utd,0,11.326872,0.029038,0.039319,0.310735,3.278081e-10,0.001,1.57488
480,Yankuba Minteh,brighton__hove_albion,MID,Bournemouth,1,20.0,0.041014,0.047538,0.260499,5.731661e-07,0.018,1.626183
94,Danny Welbeck,brighton__hove_albion,FWD,Bournemouth,1,46.083317,0.154709,0.044091,0.260499,1.058267e-05,0.095,1.846131
245,João Pedro,chelsea,FWD,Brentford,1,70.0,0.315454,0.129582,0.280385,0.001904352,0.191,3.845368
396,Rayan Cherki,manchester_city,MID,Man Utd,0,88.0,0.129759,0.29131,0.310735,0.166817,0.124,4.291092
306,Marcus Tavernier,bournemouth,MID,Brighton,0,88.0,0.224594,0.177677,0.239461,0.114532,0.437,4.561528
127,Enzo Fernández,chelsea,MID,Brentford,1,87.678047,0.263328,0.205129,0.280385,0.3114948,0.361,5.196404
390,Phil Foden,manchester_city,MID,Man Utd,0,88.0,0.319066,0.239643,0.310735,0.03245466,0.641,5.330902
328,Maxence Lacroix,crystal_palace,DEF,Sunderland,0,89.382576,0.037013,0.025926,0.335359,0.7771174,0.148,5.343529


In [14]:
predictions_df.loc[predictions_df['player_name'].str.contains("vio")]

Unnamed: 0,player_name,team,team_display,opponent,position,is_home,gameweek,season,player_id,player_roll5_goals,...,pred_cs_prob,pred_bonus,fpl_position,exp_goals_pts,exp_assists_pts,exp_cs_pts,exp_defcon_pts,exp_bonus_pts,exp_appearance_pts,exp_total_pts
438,Sávio,manchester_city,Man City,Man Utd,AM,0,22,2025-26,Sávio_manchester_city,0.0,...,0.310735,0.001,MID,0.145188,0.117957,0.310735,6.556162e-10,0.001,1.0,1.57488
