In [79]:
# ===============================
# Cell 1: Imports and Data Loading
# ===============================

import json
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

# Paths
DATA_DIR = Path("data")
ELEMENT_GW_DIR = DATA_DIR / "element_gameweek_live"
GW_DATA_DIR = DATA_DIR / "gw_data"
GW_PICKS_DIR = DATA_DIR / "gw_picks"
BOOTSTRAP_PATH = DATA_DIR / "bootstrap-static.json"
ENTRY_HISTORY_PATH = DATA_DIR / "entry_history.json"
ENTRY_SUMMARY_PATH = DATA_DIR / "entry_summary.json"
ENTRY_TRANSFERS_PATH = DATA_DIR / "entry_transfers.json"

# --- Load bootstrap-static.json ---
with open(BOOTSTRAP_PATH, encoding="utf-8") as f:
    bootstrap = json.load(f)

players_meta = pd.DataFrame(bootstrap['elements'])
teams_meta = pd.DataFrame(bootstrap['teams'])
element_types = pd.DataFrame(bootstrap['element_types'])
chips_meta = pd.DataFrame(bootstrap['chips'])
events_meta = pd.DataFrame(bootstrap['events'])

# --- Load element_gameweek_live ---
def load_element_gw():
    dfs = []
    for file in sorted(ELEMENT_GW_DIR.glob("GW*.json"), key=lambda x: int(x.stem[2:])):
        with open(file, encoding="utf-8") as f:
            data = json.load(f)
        gw_num = int(file.stem[2:])
        for player in data['elements']:
            row = {'element': player['id'], 'gw': gw_num}
            row.update(player['stats'])
            dfs.append(row)
    return pd.DataFrame(dfs)

element_gw_df = load_element_gw()

# --- Load gw_data ---
def load_gw_data():
    dfs = []
    for file in sorted(GW_DATA_DIR.glob("GW*.json"), key=lambda x: int(x.stem[2:])):
        with open(file, encoding="utf-8") as f:
            data = json.load(f)
        for gw in data:
            gw_num = gw['event']
            row_base = {
                'gw': gw_num,
                'team_h': gw['team_h'],
                'team_a': gw['team_a'],
                'team_h_score': gw.get('team_h_score', None),
                'team_a_score': gw.get('team_a_score', None),
                'team_h_difficulty': gw.get('team_h_difficulty', None),
                'team_a_difficulty': gw.get('team_a_difficulty', None),
                'finished': gw.get('finished', False)
            }
            dfs.append(row_base)
    return pd.DataFrame(dfs)

gw_data_df = load_gw_data()

# --- Load gw_picks ---
def load_gw_picks():
    dfs = []
    for file in sorted(GW_PICKS_DIR.glob("GW*.json"), key=lambda x: int(x.stem[2:])):
        with open(file, encoding="utf-8") as f:
            data = json.load(f)
        gw_num = int(file.stem[2:])
        for pick in data['picks']:
            dfs.append({
                'gw': gw_num,
                'element': pick['element'],
                'position': pick['position'],
                'multiplier': pick['multiplier'],
                'is_captain': pick['is_captain'],
                'is_vice_captain': pick['is_vice_captain'],
                'element_type': pick['element_type']
            })
    return pd.DataFrame(dfs)

gw_picks_df = load_gw_picks()

# --- Load entry_transfers ---
with open(ENTRY_TRANSFERS_PATH, encoding="utf-8") as f:
    entry_transfers = pd.DataFrame(json.load(f))


In [82]:
# ===============================
# Cell 2 & 3: Feature Preparation + Model Prediction
# ===============================

# --- Merge player status from bootstrap ---
status_df = players_meta[['id', 'status']].rename(columns={'id': 'element'})
model_df = model_df.merge(status_df, on='element', how='left')

# --- Merge GW stats from element_gw_df ---
stats_cols = [
    'minutes','goals_scored','assists','clean_sheets','goals_conceded',
    'own_goals','penalties_saved','penalties_missed','yellow_cards','red_cards',
    'saves','bonus','bps','influence','creativity','threat','ict_index',
    'clearances_blocks_interceptions','recoveries','tackles','defensive_contribution',
    'starts','now_cost','form','ep_next','ep_this','team_score','opp_score',
    'team','opponent','opponent_difficulty','home','multiplier'
]

model_df = model_df.merge(
    element_gw_df[['element','gw'] + [c for c in stats_cols if c in element_gw_df.columns]],
    on=['element','gw'],
    how='left'
)

# --- Fill missing stats with 0 ---
for col in stats_cols:
    if col not in model_df.columns:
        model_df[col] = 0

# --- Ensure numeric types ---
for col in stats_cols:
    if model_df[col].dtype == 'object':
        model_df[col] = pd.to_numeric(model_df[col], errors='coerce').fillna(0)

# --- Model Prediction ---
# Make sure xgb_model and rf_model are already trained
model_df['pred_points_xgb'] = xgb_model.predict(model_df[stats_cols])
model_df['pred_points_rf'] = rf_model.predict(model_df[stats_cols])

print("Predictions added successfully. Sample:")
print(model_df[['element','gw','pred_points_xgb','pred_points_rf']].head())


Predictions added successfully. Sample:
   element  gw  pred_points_xgb  pred_points_rf
0        1   1         0.002342             0.0
1        2   1         0.000461             0.0
2        3   1         0.000461             0.0
3        4   1         0.000461             0.0
4        5   1         0.040798             0.0


In [83]:
# ===============================
# Cell 4: Team Picker & Chip Strategy
# ===============================

team_size = 15
starting_xi_size = 11
bench_size = 4
max_transfers = 2
safe_captain_threshold = 0.7
risky_captain_threshold = 0.9

def pick_starting_xi(gw_df):
    gw_df = gw_df.sort_values('pred_points_xgb', ascending=False)
    starters = gw_df[gw_df['status'].isin(['a','n'])].head(starting_xi_size)
    bench = gw_df[gw_df['status'].isin(['a','n']) & ~gw_df.index.isin(starters.index)].head(bench_size)
    return starters, bench

def pick_captains(starters):
    top_player = starters.iloc[0]
    second_player = starters.iloc[1]
    safe_candidates = starters[starters['pred_points_xgb'] >= safe_captain_threshold * starters['pred_points_xgb'].max()]
    risky_candidates = starters[starters['pred_points_xgb'] >= risky_captain_threshold * starters['pred_points_xgb'].max()]
    captain = safe_candidates.iloc[0]['element'] if not safe_candidates.empty else top_player['element']
    vice_captain = risky_candidates.iloc[0]['element'] if not risky_candidates.empty else second_player['element']
    return captain, vice_captain

def suggest_transfers(current_team, predictions_df, free_transfers=1, points_threshold=4):
    current_elements = set(current_team['element'])
    best_candidates = predictions_df[~predictions_df['element'].isin(current_elements)]
    best_candidates = best_candidates.sort_values('pred_points_xgb', ascending=False)
    transfers = []
    for i in range(free_transfers):
        if best_candidates.empty:
            break
        candidate = best_candidates.iloc[i]
        if candidate['pred_points_xgb'] > points_threshold:
            worst_player = current_team.sort_values('pred_points_xgb').iloc[0]
            transfers.append((worst_player['element'], candidate['element']))
            current_team.loc[current_team['element'] == worst_player['element'], 'element'] = candidate['element']
    return transfers

def suggest_chip_strategy(gw, gw_predictions, chips_used, total_gws=38):
    suggestions = []
    if '3xc' not in chips_used:
        top_player_points = gw_predictions['pred_points_xgb'].max()
        if top_player_points >= 10:
            suggestions.append(f"Consider Triple Captain on {gw_predictions.iloc[0]['web_name']} in GW{gw}")
    if 'bboost' not in chips_used:
        bench_points = gw_predictions['pred_points_xgb'].sort_values(ascending=False).head(4).sum()
        if bench_points >= 15:
            suggestions.append(f"Consider Bench Boost in GW{gw}")
    if 'freehit' not in chips_used:
        if 'team_difficulty' in gw_predictions.columns:
            difficult_team = gw_predictions['team_difficulty'].mean()
            if difficult_team >= 3.0:
                suggestions.append(f"Consider Free Hit in GW{gw}")
    if 'wildcard' not in chips_used and gw >= total_gws//2:
        suggestions.append(f"Consider Wildcard for GW{gw}")
    return suggestions


In [85]:
# ===============================
# Ensure status exists in model_df
# ===============================

# Merge player status once from players_meta
if 'status' not in model_df.columns:
    model_df = model_df.merge(
        players_meta[['id','status']].rename(columns={'id':'element'}),
        on='element',
        how='left'
    )

# Fill missing status as 'a' (assume available if unknown)
model_df['status'] = model_df['status'].fillna('a')

# ===============================
# Example GW7 Usage (Safe)
# ===============================
gw = 7
gw_df = model_df[model_df['gw'] == gw].copy()

starters, bench = pick_starting_xi(gw_df)
captain, vice_captain = pick_captains(starters)
transfers = suggest_transfers(starters, gw_df, free_transfers=1, points_threshold=4)
chips_used = []
chip_suggestions = suggest_chip_strategy(gw, gw_df, chips_used)

# Use web_name if it exists; otherwise fall back to 'element'
print(f"GW{gw} Starters: {[p['web_name'] if 'web_name' in p else p['element'] for i,p in starters.iterrows()]}")
print(f"Bench: {[p['web_name'] if 'web_name' in p else p['element'] for i,p in bench.iterrows()]}")
print(f"Captain: {captain}, Vice-Captain: {vice_captain}")
print(f"Suggested Transfers: {transfers}")
print(f"Chip Suggestions: {chip_suggestions}")


GW7 Starters: ['Semenyo', 'Haaland', 'Gabriel', 'Alderete', 'Gyökeres', 'P.M.Sarr', 'Diouf', 'Krejčí', 'S.Bueno', 'Vicario', 'Ampadu']
Bench: ['Bogarde', 'Maguire', 'Konsa', 'Leno']
Captain: 82, Vice-Captain: 82
Suggested Transfers: []
Chip Suggestions: []
