In [153]:
# Cell 1: Setup and Data Loading/Cleaning

import json
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings('ignore') # Suppress warnings for cleaner output

# --- Configuration ---
DATA_PATH = 'data/'
GW_MAX = 7 # Max GW available for element_gameweek_live (as per your path)
GW_TRAIN_END = 4 # Use GWs 1-4 for training
GW_TEST = 5 # Predict for GW 5

# --- Global Data Holders ---
bootstrap_static = {}
all_player_data = pd.DataFrame()
all_fixture_data = pd.DataFrame()

# --- Functions ---

def load_json(filepath):
    """Loads a single JSON file."""
    try:
        with open(filepath, 'r', encoding='utf8') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"File not found: {filepath}")
        return None

def load_bootstrap_static():
    """Loads and processes the bootstrap-static.json file."""
    global bootstrap_static
    data = load_json(os.path.join(DATA_PATH, 'bootstrap-static.json'))
    if data:
        bootstrap_static = data
        return pd.DataFrame(data['elements'])
    return pd.DataFrame()

def load_all_player_gw_data():
    """Loads and combines all element_gameweek_live data."""
    global all_player_data
    dfs = []
    for gw in range(1, GW_MAX + 1):
        filepath = os.path.join(DATA_PATH, f'element_gameweek_live/GW{gw}.json')
        data = load_json(filepath)
        if data and 'elements' in data:
            df = pd.DataFrame([e['stats'] for e in data['elements']])
            df['id'] = [e['id'] for e in data['elements']]
            df['GW'] = gw
            dfs.append(df)
    
    if dfs:
        all_player_data = pd.concat(dfs, ignore_index=True)
        # Convert numeric strings to float/int
        for col in ['influence', 'creativity', 'threat', 'ict_index', 'expected_goals', 'expected_assists', 'expected_goal_involvements', 'expected_goals_conceded']:
            all_player_data[col] = pd.to_numeric(all_player_data[col], errors='coerce').fillna(0)
        
        return all_player_data
    return pd.DataFrame()

def load_all_fixture_data():
    """Loads and combines all gw_data (fixture) files."""
    global all_fixture_data
    dfs = []
    # Assuming fixtures are available up to GW 38 based on your path structure
    for gw in range(1, 39): 
        filepath = os.path.join(DATA_PATH, f'gw_data/GW{gw}.json')
        data = load_json(filepath)
        if data:
            df = pd.DataFrame(data)
            df['GW'] = gw
            dfs.append(df)

    if dfs:
        all_fixture_data = pd.concat(dfs, ignore_index=True)
        return all_fixture_data
    return pd.DataFrame()


# --- Execution ---
player_static = load_bootstrap_static()
player_live = load_all_player_gw_data()
fixtures = load_all_fixture_data()

print(f"Loaded {len(player_static)} static players, {len(player_live)} live player-GW records, and {len(fixtures)} fixtures.")

# Merge static data to live data
if not player_live.empty and not player_static.empty:
    player_live = pd.merge(player_live, player_static[['id', 'element_type', 'team', 'now_cost', 'form', 'points_per_game', 'selected_by_percent', 'web_name']], 
                           on='id', 
                           suffixes=('_live', '_static'))
    player_live.rename(columns={'element_type': 'position_id', 'team': 'team_id', 'now_cost': 'price_100k'}, inplace=True)
    player_live['price_100k'] = player_live['price_100k'] / 10 # Convert from integer (e.g., 60 to 6.0m)

    # Simple Feature Engineering: Matchup Difficulty
    # Get fixture information for each player's GW performance
    def get_opponent_difficulty(row):
        fixture = fixtures[(fixtures['event'] == row['GW']) & ((fixtures['team_h'] == row['team_id']) | (fixtures['team_a'] == row['team_id']))]
        if fixture.empty:
            return pd.Series([np.nan, np.nan])

        f = fixture.iloc[0]
        if f['team_h'] == row['team_id']: # Player is Home team
            return pd.Series([f['team_a'], f['team_a_difficulty']]) # Opponent ID, Opponent Difficulty
        else: # Player is Away team
            return pd.Series([f['team_h'], f['team_h_difficulty']]) # Opponent ID, Opponent Difficulty

    player_live[['opp_team_id', 'opp_difficulty']] = player_live.apply(get_opponent_difficulty, axis=1)

    print("Data cleaning and basic feature engineering complete.")
else:
    print("Error loading core data.")

# Set up data for modeling
if not player_live.empty:
    # Target variable and features
    TARGET = 'total_points' 
    # Select features based on data availability and relevance (can be improved)
    FEATURES = [
        'minutes', 'goals_scored', 'assists', 'clean_sheets', 
        'goals_conceded', 'saves', 'bonus', 'bps',
        'influence', 'creativity', 'threat', 'ict_index',
        'expected_goals', 'expected_assists', 'expected_goals_conceded',
        'price_100k', 'form', 'points_per_game', 'selected_by_percent',
        'opp_difficulty', 'position_id', 'team_id'
    ]

    # Handle missing values (simple imputation with 0 or mean - this should be improved in a production model)
    for col in FEATURES:
        if player_live[col].dtype == 'object':
             player_live[col] = pd.to_numeric(player_live[col], errors='coerce').fillna(0)
        elif player_live[col].isnull().any():
             player_live[col].fillna(player_live[col].mean(), inplace=True)
             
    # Create the training and testing sets
    train_df = player_live[player_live['GW'] <= GW_TRAIN_END].copy()
    test_df = player_live[player_live['GW'] == GW_TEST].copy()

    X_train = train_df[FEATURES]
    y_train = train_df[TARGET]
    X_test = test_df[FEATURES]
    
    print(f"\nTraining data: GW 1 to GW {GW_TRAIN_END}. Test data: GW {GW_TEST}.")
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")

Loaded 743 static players, 5073 live player-GW records, and 380 fixtures.
Data cleaning and basic feature engineering complete.

Training data: GW 1 to GW 4. Test data: GW 5.
X_train shape: (2847, 22), y_train shape: (2847,)


In [154]:
# Cell 2: Random Forest Model Training and Point Prediction

# Initialize and train the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, min_samples_split=5)
print("Training Random Forest model...")
rf_model.fit(X_train, y_train)
print("Training complete.")

# Predict points for the test GW (GW_TEST)
y_pred_rf = rf_model.predict(X_test)

# Add predictions to the test DataFrame
test_df['predicted_points_rf'] = np.maximum(0, y_pred_rf.round(2)) # Points can't be negative

# Merge actual points for evaluation (if available in test_df)
y_test = test_df[TARGET]
if not y_test.isnull().all():
    mae_rf = mean_absolute_error(y_test, test_df['predicted_points_rf'])
    print(f"\nRandom Forest MAE for GW {GW_TEST} (Player Points): {mae_rf:.2f}")

# Map element_id to player name for easier reading
player_name_map = player_static.set_index('id')['web_name'].to_dict()
test_df['player_name'] = test_df['id'].map(player_name_map)

# Display top predicted players for GW_TEST
print("\n--- Random Forest Top Predicted Players for GW {} ---".format(GW_TEST))
top_players_rf = test_df.sort_values(by='predicted_points_rf', ascending=False).head(10)
print(top_players_rf[['player_name', 'position_id', 'predicted_points_rf', 'total_points']].to_string(index=False))

Training Random Forest model...
Training complete.

Random Forest MAE for GW 5 (Player Points): 0.20

--- Random Forest Top Predicted Players for GW 5 ---
  player_name  position_id  predicted_points_rf  total_points
        Stach            3                13.72            15
  Gravenberch            3                13.28            15
        Iwobi            3                12.85            13
Calvert-Lewin            4                11.20            11
      Ekitiké            4                10.81             8
      Anthony            3                10.71            10
  Richarlison            4                 9.71             9
       Minteh            3                 9.71             9
  B.Fernandes            3                 9.53            10
      Maguire            2                 9.40            11


In [155]:
# Cell 3: Match Result Prediction (Random Forest)

# This requires additional feature engineering (aggregate team performance) which is complex.
# For simplicity, we'll create a new model to predict Home Win (1), Draw (0), or Away Win (-1).
# We'll use team-level aggregated data for training.

# 1. Prepare Fixture Training Data
fixture_train_df = fixtures[fixtures['event'] <= GW_TRAIN_END].copy()

# A simple model: use home/away team strength and difficulty
def prepare_fixture_features(df):
    features = df[['team_h', 'team_a', 'team_h_difficulty', 'team_a_difficulty']].copy()
    
    # Simple aggregated team strength (using total FPL points scored by players in that team)
    team_points = player_live.groupby('team_id')['total_points'].sum().to_dict()

    features['team_h_total_points'] = features['team_h'].map(team_points).fillna(0)
    features['team_a_total_points'] = features['team_a'].map(team_points).fillna(0)

    # Target: 1 for Home Win, 0 for Draw, -1 for Away Win
    conditions = [
        df['team_h_score'] > df['team_a_score'],
        df['team_h_score'] == df['team_a_score'],
        df['team_h_score'] < df['team_a_score']
    ]
    choices = [1, 0, -1]
    features['match_result'] = np.select(conditions, choices, default=np.nan)
    
    # Filter out unfinished matches from training
    return features.dropna(subset=['match_result'])

fixture_train_features = prepare_fixture_features(fixture_train_df)

X_train_match = fixture_train_features[['team_h_difficulty', 'team_a_difficulty', 'team_h_total_points', 'team_a_total_points']]
y_train_match = fixture_train_features['match_result']

# 2. Train Random Forest Classifier
rf_match_model = RandomForestRegressor(n_estimators=50, random_state=42) # Use Regressor for easier score prediction later
rf_match_model.fit(X_train_match, y_train_match)

# 3. Predict Match Results for GW_TEST (e.g., GW 5)
fixture_test_df = fixtures[fixtures['event'] == GW_TEST].copy()
fixture_test_features = prepare_fixture_features(fixture_test_df)

X_test_match = fixture_test_features[['team_h_difficulty', 'team_a_difficulty', 'team_h_total_points', 'team_a_total_points']]

# Predict and interpret the result
y_pred_match_rf = rf_match_model.predict(X_test_match)
fixture_test_df['predicted_result_rf'] = np.sign(y_pred_match_rf).astype(int)

# Predict score (Very complex, simplified here: use a linear relationship with predicted result)
# This is a massive simplification, a separate model for score prediction is usually required.
fixture_test_df['predicted_score_h'] = np.maximum(0, (fixture_test_df['predicted_result_rf'] * 0.5 + 1.5).round(0)).astype(int)
fixture_test_df['predicted_score_a'] = np.maximum(0, (fixture_test_df['predicted_result_rf'] * -0.5 + 1.5).round(0)).astype(int)


# Get team names
team_map = {t['id']: t['short_name'] for t in bootstrap_static.get('teams', [])}

print("--- Match Result Predictions for GW {} (Random Forest) ---".format(GW_TEST))
match_results = []
for index, row in fixture_test_df.iterrows():
    home_team = team_map.get(row['team_h'], f"Team {row['team_h']}")
    away_team = team_map.get(row['team_a'], f"Team {row['team_a']}")
    
    pred_score = f"{home_team} {row['predicted_score_h']} - {away_team} {row['predicted_score_a']}"
    actual_score = f"{home_team} {row['team_h_score']} - {away_team} {row['team_a_score']}"
    
    match_results.append({
        'Fixture': f"{home_team} vs {away_team}",
        'Predicted Score': pred_score,
        'Actual Score': actual_score
    })

print(pd.DataFrame(match_results).to_string(index=False))

--- Match Result Predictions for GW 5 (Random Forest) ---
   Fixture Predicted Score  Actual Score
LIV vs EVE   LIV 2 - EVE 1 LIV 2 - EVE 1
BHA vs TOT   BHA 2 - TOT 1 BHA 2 - TOT 2
BUR vs NFO   BUR 2 - NFO 1 BUR 1 - NFO 1
WHU vs CRY   WHU 1 - CRY 2 WHU 1 - CRY 2
WOL vs LEE   WOL 1 - LEE 2 WOL 1 - LEE 3
MUN vs CHE   MUN 1 - CHE 2 MUN 2 - CHE 1
FUL vs BRE   FUL 2 - BRE 1 FUL 3 - BRE 1
BOU vs NEW   BOU 1 - NEW 2 BOU 0 - NEW 0
SUN vs AVL   SUN 2 - AVL 1 SUN 1 - AVL 1
ARS vs MCI   ARS 1 - MCI 2 ARS 1 - MCI 1


In [156]:
# Cell 4: Player Suggestion and Team Picker Functions (Random Forest based)

from scipy.optimize import linprog # For team optimization
from itertools import combinations

# --- Constants for FPL Team Structure ---
POS_IDS = {'GKP': 1, 'DEF': 2, 'MID': 3, 'FWD': 4}
POS_MAX = {1: 2, 2: 5, 3: 5, 4: 3}
TEAM_LIMIT = 3
SQUAD_SIZE = 15
MAX_COST_100K = 1000 # 100.0m

def create_optimal_team_lp(player_predictions, budget=MAX_COST_100K):
    """
    Uses Linear Programming to find the mathematically optimal 15-man squad.
    Note: This is an integer programming problem, but we use LP approximation for simplicity.
    """
    if player_predictions.empty:
        return []

    # Prepare data
    df = player_predictions.copy()
    df['id_index'] = df.index
    
    costs = df['price_100k'].values
    points = df['predicted_points_rf'].values
    positions = df['position_id'].values
    teams = df['team_id'].values

    num_players = len(df)

    # 1. Objective function (Maximize total predicted points)
    c = -points # linprog minimizes, so negate for maximization

    # 2. Constraints
    A_eq = []
    b_eq = []
    A_ub = []
    b_ub = []

    # a. Total cost constraint (<= budget)
    A_ub.append(costs)
    b_ub.append(budget)

    # b. Squad size constraint (exactly 15 players)
    A_eq.append(np.ones(num_players))
    b_eq.append(SQUAD_SIZE)

    # c. Position constraints (min/max for each position)
    for pos_name, pos_id in POS_IDS.items():
        pos_indices = (positions == pos_id)
        
        # Max constraint (e.g., max 2 GKP)
        A_ub.append(pos_indices.astype(int))
        b_ub.append(POS_MAX[pos_id])
    
    # d. Team limit constraint (max 3 players per team)
    unique_teams = df['team_id'].unique()
    for team_id in unique_teams:
        team_indices = (teams == team_id)
        A_ub.append(team_indices.astype(int))
        b_ub.append(TEAM_LIMIT)

    # Solve the linear program (bounds ensure x is between 0 and 1)
    # Using `method='highs'` is often faster.
    # The solution is for continuous variables, we round to get integer picks (a simplification)
    result = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=(0, 1), method='highs')
    
    if result.success:
        # Rounding and adjusting to get exactly 15 players
        selections = result.x.round().astype(int)
        
        # If rounding didn't yield exactly 15, adjust the top performers to hit 15
        selected_count = selections.sum()
        diff = SQUAD_SIZE - selected_count
        
        if diff != 0:
            # Sort players by the difference between continuous result and rounded result
            # and adjust accordingly to satisfy the SQUAD_SIZE constraint
            # This is a hacky way to approximate ILP, but serves for demonstration
            adjustment_priority = (result.x - selections) if diff > 0 else (selections - result.x)
            
            # Select/deselect the players with the highest/lowest priority
            sorted_indices = np.argsort(adjustment_priority)[::-1]
            
            for idx in sorted_indices:
                if diff > 0 and selections[idx] == 0:
                    selections[idx] = 1 # Add a player
                    diff -= 1
                elif diff < 0 and selections[idx] == 1:
                    selections[idx] = 0 # Remove a player
                    diff += 1
                if diff == 0:
                    break

        selected_players = df[selections == 1].sort_values(by=['position_id', 'predicted_points_rf'], ascending=[True, False])
        return selected_players.to_dict('records')
    
    print("LP optimization failed:", result.message)
    # Fallback: just return the top 15 most expensive or top predicted players (simplified)
    return df.sort_values(by='predicted_points_rf', ascending=False).head(15).to_dict('records')

def suggest_negative_transfer(current_team, bank_100k, free_transfers, player_predictions):
    """
    Suggests a -4 or -8 point transfer only if the predicted gain is worth the hit.
    This is extremely complex for a full team, so we focus on a single transfer.
    """
    
    current_ids = {p['id'] for p in current_team}
    
    # 1. Identify non-essential players to remove (low predicted score, high price to maximize bank)
    # Filter players from current_team and merge with predictions
    current_team_df = player_predictions[player_predictions['id'].isin(current_ids)].copy()
    current_team_df['in_team'] = True
    
    # Find the worst predicted player to replace
    # A player is "replaceable" if they are predicted to score < 4 points, or have a low rank
    replaceable_players = current_team_df.sort_values(by='predicted_points_rf').head(3)
    
    if replaceable_players.empty:
        return "No clear underperforming players to replace.", 0
    
    best_transfer_gain = -9999
    best_transfer = (None, None, 0) # player_out, player_in, expected_gain

    # Only consider the worst predicted player for an OUT candidate
    for _, player_out in replaceable_players.iterrows():
        # 2. Identify players to bring in (high predicted score, affordable, not in team, team limit not hit)
        eligible_in = player_predictions[
            (~player_predictions['id'].isin(current_ids)) &
            (player_predictions['price_100k'] <= bank_100k + player_out['price_100k']) &
            (player_predictions['team_id'] != player_out['team_id']) # Simple logic: don't transfer within the same team
        ].sort_values(by='predicted_points_rf', ascending=False).head(5)
        
        for _, player_in in eligible_in.iterrows():
            gain = player_in['predicted_points_rf'] - player_out['predicted_points_rf']
            transfer_cost = 4 * (1 - free_transfers) # simplified for -4 hit
            net_gain = gain - transfer_cost
            
            # **Core Logic Check**: Model sure the player points will be greater than 4 AND net gain is positive
            if player_in['predicted_points_rf'] >= 4.0 and net_gain > best_transfer_gain:
                best_transfer_gain = net_gain
                best_transfer = (player_out['web_name'], player_in['web_name'], net_gain)

    if best_transfer[0]:
        player_out_name, player_in_name, net_gain = best_transfer
        return f"**-4 Hit Recommended**: Transfer **OUT: {player_out_name}** for **IN: {player_in_name}**. Net Predicted Gain: **{net_gain:.2f} points**.", net_gain
    
    return "No transfer with a -4 hit is recommended (predicted net gain is not positive or incoming player isn't a sure 4+ points).", 0


# --- Execution ---

# For demonstration, assume a simplified 'current team' and budget for GW_TEST (GW 5)
# This would normally be read from gw_picks/GW4.json and entry_history.json
# We'll use the top 15 predicted players from GW4 as the simulated 'current team'
simulated_current_team_gw_4 = player_live[player_live['GW'] == GW_TRAIN_END].sort_values(by='total_points', ascending=False).head(15)
simulated_current_team_gw_4_list = simulated_current_team_gw_4[['id', 'web_name', 'price_100k', 'team_id']].to_dict('records')

# Assumptions for test
SIM_BANK = 50.0 # 5.0m in FPL terms, 50 in 100k units
SIM_FT = 0 # Assume the user has 0 free transfers, forcing a -4 hit

print("--- Negative Transfer Suggestion (GW {}) ---".format(GW_TEST))
suggestion, net_gain = suggest_negative_transfer(
    current_team=simulated_current_team_gw_4_list, 
    bank_100k=SIM_BANK, 
    free_transfers=SIM_FT,
    player_predictions=test_df
)
print(suggestion)

print("\n--- Wildcard Team Picker (GW {}) ---".format(GW_TEST))
# Assuming maximum budget for a Wildcard
wildcard_team = create_optimal_team_lp(test_df, budget=MAX_COST_100K)
if wildcard_team:
    wildcard_df = pd.DataFrame(wildcard_team)
    total_cost = wildcard_df['price_100k'].sum() / 10
    total_points = wildcard_df['predicted_points_rf'].sum()
    
    print(f"Total Predicted Points: {total_points:.2f} | Total Cost: £{total_cost:.1f}m")
    
    # Display the starting XI (simplification: just display the 11 most expensive)
    starting_xi = wildcard_df.sort_values(by='price_100k', ascending=False).head(11)
    
    # Map position ID to name
    pos_map = {v: k for k, v in POS_IDS.items()}
    starting_xi['Position'] = starting_xi['position_id'].map(pos_map)
    starting_xi['Team'] = starting_xi['team_id'].map(team_map)

    print("\nStarting XI (Top 11):")
    print(starting_xi[['player_name', 'Position', 'Team', 'price_100k', 'predicted_points_rf']].to_string(index=False))

--- Negative Transfer Suggestion (GW 5) ---
**-4 Hit Recommended**: Transfer **OUT: Van de Ven** for **IN: Stach**. Net Predicted Gain: **8.53 points**.

--- Wildcard Team Picker (GW 5) ---
Total Predicted Points: 147.98 | Total Cost: £8.2m

Starting XI (Top 11):
  player_name Position Team  price_100k  predicted_points_rf
      Ekitiké      FWD  LIV         8.7                10.81
  Richarlison      FWD  TOT         6.8                 9.71
        Iwobi      MID  FUL         6.5                12.85
       Minteh      MID  BHA         6.0                 9.71
  Gravenberch      MID  LIV         5.7                13.28
      Anthony      MID  BUR         5.7                10.71
Calvert-Lewin      FWD  LEE         5.5                11.20
         Pope      GKP  NEW         5.1                 5.91
        Stach      MID  LEE         5.0                13.72
       Senesi      DEF  BOU         4.9                 8.31
   N.Williams      DEF  NFO         4.8                 8.59


In [157]:
# Cell 5: Captain/Vice-Captain and Free Hit Suggestions (Random Forest based)

# --- Captain/Vice-Captain Suggestion ---
def suggest_captains(player_predictions):
    """Suggests safe and risky captains based on predicted points."""
    
    # Safety is often proportional to predicted points AND ownership/form (selected_by_percent, form)
    # Riskiness is inversely proportional to safety metrics
    
    df = player_predictions.copy()
    
    # Calculate a 'Safety Score': Simple weighted average
    df['safety_score'] = (
        df['predicted_points_rf'] * 0.4 +
        pd.to_numeric(df['selected_by_percent'], errors='coerce') * 0.3 +
        pd.to_numeric(df['form'], errors='coerce') * 0.3
    )
    
    # Calculate a 'Risk Score': Inverse of safety, focused on high predicted points not backed by selection/form
    df['risk_score'] = (
        df['predicted_points_rf'] * 0.7 -
        pd.to_numeric(df['selected_by_percent'], errors='coerce') * 0.15 -
        pd.to_numeric(df['form'], errors='coerce') * 0.15
    )
    
    safe_captain = df.sort_values(by='safety_score', ascending=False).iloc[0]
    vice_captain = df.sort_values(by='safety_score', ascending=False).iloc[1]
    risky_captain = df.sort_values(by='risk_score', ascending=False).iloc[0]
    
    return {
        'Safe Captain': safe_captain,
        'Vice Captain': vice_captain,
        'Risky Captain': risky_captain
    }

# --- Free Hit Team Suggestion ---
def suggest_free_hit(player_predictions):
    """
    A Free Hit team is essentially a Wildcard team but for one week.
    The optimization method is the same: maximize predicted points.
    We'll reuse the `create_optimal_team_lp` function.
    """
    return create_optimal_team_lp(player_predictions, budget=MAX_COST_100K)

# --- Execution ---

print("--- Captain Suggestions (GW {}) ---".format(GW_TEST))
captain_suggestions = suggest_captains(test_df)

print(f"Safe Captain: **{captain_suggestions['Safe Captain']['player_name']}** (Pred: {captain_suggestions['Safe Captain']['predicted_points_rf']:.2f})")
print(f"Vice Captain: **{captain_suggestions['Vice Captain']['player_name']}** (Pred: {captain_suggestions['Vice Captain']['predicted_points_rf']:.2f})")
print(f"Risky Captain: **{captain_suggestions['Risky Captain']['player_name']}** (Pred: {captain_suggestions['Risky Captain']['predicted_points_rf']:.2f})")


print("\n--- Free Hit Team Picker (GW {}) ---".format(GW_TEST))
free_hit_team = suggest_free_hit(test_df)

if free_hit_team:
    free_hit_df = pd.DataFrame(free_hit_team)
    total_cost = free_hit_df['price_100k'].sum() / 10
    total_points = free_hit_df['predicted_points_rf'].sum()
    
    print(f"Total Predicted Points: {total_points:.2f} | Total Cost: £{total_cost:.1f}m")
    
    # Display the starting XI (using the standard FPL 3-4-3 formation logic - 1 GKP, 3 DEF, 4 MID, 3 FWD)
    # This requires more complex sub logic, so we'll simplify by sorting by position and points
    starting_gkp = free_hit_df[free_hit_df['position_id'] == 1].sort_values(by='predicted_points_rf', ascending=False).head(1)
    starting_def = free_hit_df[free_hit_df['position_id'] == 2].sort_values(by='predicted_points_rf', ascending=False).head(3)
    starting_mid = free_hit_df[free_hit_df['position_id'] == 3].sort_values(by='predicted_points_rf', ascending=False).head(4)
    starting_fwd = free_hit_df[free_hit_df['position_id'] == 4].sort_values(by='predicted_points_rf', ascending=False).head(3)
    
    starting_xi = pd.concat([starting_gkp, starting_def, starting_mid, starting_fwd])
    
    # Map position ID to name
    starting_xi['Position'] = starting_xi['position_id'].map(pos_map)
    starting_xi['Team'] = starting_xi['team_id'].map(team_map)

    print("\nStarting XI (1-3-4-3 formation by top points in position):")
    print(starting_xi[['player_name', 'Position', 'Team', 'price_100k', 'predicted_points_rf']].to_string(index=False))

--- Captain Suggestions (GW 5) ---
Safe Captain: **Haaland** (Pred: 9.04)
Vice Captain: **Semenyo** (Pred: 2.43)
Risky Captain: **Stach** (Pred: 13.72)

--- Free Hit Team Picker (GW 5) ---
Total Predicted Points: 147.98 | Total Cost: £8.2m

Starting XI (1-3-4-3 formation by top points in position):
  player_name Position Team  price_100k  predicted_points_rf
     Dúbravka      GKP  BUR         4.0                 6.34
      Maguire      DEF  MUN         4.4                 9.40
         Cash      DEF  AVL         4.6                 9.01
   N.Williams      DEF  NFO         4.8                 8.59
        Stach      MID  LEE         5.0                13.72
  Gravenberch      MID  LIV         5.7                13.28
        Iwobi      MID  FUL         6.5                12.85
      Anthony      MID  BUR         5.7                10.71
Calvert-Lewin      FWD  LEE         5.5                11.20
      Ekitiké      FWD  LIV         8.7                10.81
  Richarlison      FWD  TOT  

In [158]:
# Cell 6: Chip Strategy Planning (Random Forest based)

# --- Chip Strategy Constants (Simplified) ---
# GWs for the two halves of the season
GW_HALF_BREAK = 19 

# Simple heuristic to identify potential "Double Gameweeks" (DGW) or "Blank Gameweeks" (BGW)
# This requires deeper fixture data analysis, but we'll simulate a simple detection based on fixture count.
def check_for_special_gws(fixtures_df, current_gw, num_gws=5):
    """Checks for fixture anomalies in the next few GWs."""
    future_fixtures = fixtures_df[(fixtures_df['event'] > current_gw) & (fixtures_df['event'] <= current_gw + num_gws)]
    
    special_gws = {}
    
    for gw in future_fixtures['event'].unique():
        # Count total matches in the GW
        match_count = len(future_fixtures[future_fixtures['event'] == gw])
        
        # In a standard 20-team league (10 matches)
        if match_count < 10:
            special_gws[gw] = 'BGW' # Blank Gameweek (fewer fixtures)
        elif match_count > 10:
            special_gws[gw] = 'DGW' # Double Gameweek (more fixtures)
            
    return special_gws

def suggest_chip_plan(current_gw, fixtures_df, player_predictions, active_chips):
    """Provides a high-level chip strategy plan."""
    
    plan = []
    
    # 1. Identify future special GWs
    special_gws = check_for_special_gws(fixtures_df, current_gw, num_gws=10) # Look 10 GWs ahead
    
    # 2. Bench Boost (BB) Strategy: Best used in a massive DGW
    if 'Bench Boost' not in active_chips:
        bb_gw = next((gw for gw, type in special_gws.items() if type == 'DGW'), None)
        if bb_gw:
            plan.append(f"**Bench Boost (BB)**: Target **GW {bb_gw}** (Potential Double Gameweek). Start planning transfers 3-4 GWs prior to stock up on players with 2 fixtures.")
        else:
            plan.append("Bench Boost (BB): No strong DGW identified yet. Keep it for a confirmed DGW, usually later in the second half of the season.")

    # 3. Triple Captain (TC) Strategy: Best used on a *single* fixture player predicted to haul, or a very early DGW.
    if 'Triple Captain' not in active_chips:
        # Simple TC suggestion: find the highest predicted score overall
        highest_scorer = player_predictions.sort_values(by='predicted_points_rf', ascending=False).iloc[0]
        
        # Only suggest if the prediction is significantly high
        if highest_scorer['predicted_points_rf'] >= 10.0:
            plan.append(f"**Triple Captain (TC)**: Consider using on **{highest_scorer['player_name']}** (Predicted {highest_scorer['predicted_points_rf']:.2f} points in GW {current_gw}). This prediction is strong enough to risk the chip early.")
        elif 'DGW' in special_gws.values():
             plan.append("Triple Captain (TC): Hold for a confirmed DGW player, as the potential ceiling is higher.")
        else:
             plan.append("Triple Captain (TC): No immediate top-tier option or DGW. Hold.")

    # 4. Free Hit (FH) Strategy: Essential for a Blank Gameweek (BGW)
    if 'Free Hit' not in active_chips:
        fh_gw = next((gw for gw, type in special_gws.items() if type == 'BGW'), None)
        if fh_gw:
            plan.append(f"**Free Hit (FH)**: Target **GW {fh_gw}** (Potential Blank Gameweek). This chip allows you to field a full XI when other managers can't.")
        else:
            plan.append("Free Hit (FH): Hold for a confirmed major Blank Gameweek.")

    # 5. Wildcard (WC) Strategy: Use before a major run of good fixtures, or to restructure before a DGW.
    if 'Wildcard' not in active_chips:
        half = "first" if current_gw < GW_HALF_BREAK else "second"
        
        if current_gw < GW_HALF_BREAK and current_gw >= GW_TRAIN_END: # Suggest the first one now
            plan.append(f"**Wildcard (WC) ({half} half)**: Use now in **GW {current_gw}** to implement the optimal team composition and set up your squad for the next 3-5 GWs and future chip plays (like the suggested BB/FH).")
        elif current_gw > GW_HALF_BREAK and 'Wildcard (first half)' in active_chips:
             plan.append(f"**Wildcard (WC) (second half)**: Hold until mid-season (around GW 30-34) to prepare for the final DGWs/BGWs.")

    return plan

# --- Execution ---

# Assume user has used none of their chips for the purpose of this test cell
simulated_active_chips = [] 
simulated_current_gw = GW_TEST 

print("--- FPL Chip Strategy Plan (Based on Random Forest Predictions at GW {}) ---".format(simulated_current_gw))
chip_plan = suggest_chip_plan(
    current_gw=simulated_current_gw, 
    fixtures_df=fixtures, 
    player_predictions=test_df, # Only predicts for current GW, ideally we'd predict for future GWs too
    active_chips=simulated_active_chips
)

print('\n'.join(chip_plan))

--- FPL Chip Strategy Plan (Based on Random Forest Predictions at GW 5) ---
Bench Boost (BB): No strong DGW identified yet. Keep it for a confirmed DGW, usually later in the second half of the season.
**Triple Captain (TC)**: Consider using on **Stach** (Predicted 13.72 points in GW 5). This prediction is strong enough to risk the chip early.
Free Hit (FH): Hold for a confirmed major Blank Gameweek.
**Wildcard (WC) (first half)**: Use now in **GW 5** to implement the optimal team composition and set up your squad for the next 3-5 GWs and future chip plays (like the suggested BB/FH).


In [159]:
# Cell 7: Wildcard/Free Hit Multi-Gameweek Prediction and XGBoost Setup

# --- Wildcard Multi-GW Prediction ---

def predict_future_points_single_player(player_id, current_gw, num_gws, rf_model, features, all_player_live, all_fixtures):
    """
    Simulates prediction for a single player over multiple future gameweeks.
    This is highly simplified and assumes future stats are the same as current form.
    In reality, you need to forecast *all* features, not just points.
    """
    future_points = {}
    
    # Get the player's last known stats (from the GW just before current_gw)
    last_known_stats = all_player_live[all_player_live['id'] == player_id].sort_values(by='GW', ascending=False).iloc[0]

    for i in range(1, num_gws + 1):
        target_gw = current_gw + i
        
        # Find the fixture for this player in the target GW
        target_fixture = all_fixtures[(all_fixtures['event'] == target_gw) & ((all_fixtures['team_h'] == last_known_stats['team_id']) | (all_fixtures['team_a'] == last_known_stats['team_id']))]
        
        if not target_fixture.empty:
            f = target_fixture.iloc[0]
            
            # Recreate the feature vector for prediction, using form/stats from the last GW and new opponent difficulty
            new_features = last_known_stats[features].copy().to_dict()
            
            if f['team_h'] == last_known_stats['team_id']: # Player is Home team
                new_features['opp_difficulty'] = f['team_a_difficulty']
            else: # Player is Away team
                new_features['opp_difficulty'] = f['team_h_difficulty']
                
            X_future = pd.DataFrame([new_features], columns=features)
            
            # Predict
            pred_point = rf_model.predict(X_future)[0]
            future_points[target_gw] = np.maximum(0, pred_point.round(2))
        else:
            future_points[target_gw] = 0.0 # No fixture / Data missing
            
    return future_points

# --- Execution ---

GW_WILDCARD_PREDICT_WEEKS = 3
current_gw_for_wc = GW_TEST # We are predicting starting from the next GW (GW 6, 7, 8)

print("--- Wildcard Team Multi-GW Prediction (Based on Random Forest) ---")
print(f"Wildcard team is the one generated in Cell 4. Predicting for GW {current_gw_for_wc+1} to GW {current_gw_for_wc + GW_WILDCARD_PREDICT_WEEKS}.")

multi_gw_predictions = []

if 'wildcard_df' in locals():
    wc_player_ids = wildcard_df['id'].tolist()
    
    for player_id in wc_player_ids:
        player_name = player_name_map.get(player_id, f"ID {player_id}")
        
        # Predict points for next 3 GWs
        future_points = predict_future_points_single_player(
            player_id, 
            current_gw_for_wc, 
            GW_WILDCARD_PREDICT_WEEKS, 
            rf_model, 
            FEATURES, 
            player_live, 
            fixtures
        )
        
        total_predicted = sum(future_points.values())
        
        multi_gw_predictions.append({
            'Player': player_name,
            **{f'GW{gw}': pts for gw, pts in future_points.items()},
            'Total 3-GW Pred': total_predicted
        })

    wc_multi_df = pd.DataFrame(multi_gw_predictions)
    wc_multi_df['Team ID'] = wc_multi_df['Player'].map(player_static.set_index('web_name')['team'].to_dict())
    wc_multi_df['Position ID'] = wc_multi_df['Player'].map(player_static.set_index('web_name')['element_type'].to_dict())
    
    # Display top 15 predicted total points
    print("\nWildcard Team 3-GW Total Points Prediction:")
    print(wc_multi_df.sort_values(by='Total 3-GW Pred', ascending=False).head(15).to_string(index=False))
    print(f"\nTotal Predicted Team Score over 3 GWs: {wc_multi_df['Total 3-GW Pred'].sum():.2f}")

# --- XGBoost Setup ---

print("\n" + "="*50)
print("--- XGBoost Model Setup ---")
print("="*50)

try:
    from xgboost import XGBRegressor
    
    # Initialize and train the XGBoost Regressor for player points
    xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    print("Training XGBoost model for player points...")
    xgb_model.fit(X_train, y_train)
    print("XGBoost training complete.")

    # Predict points for the test GW (GW_TEST)
    y_pred_xgb = xgb_model.predict(X_test)
    test_df['predicted_points_xgb'] = np.maximum(0, y_pred_xgb.round(2))

    # Evaluate (if actual points are available)
    if not y_test.isnull().all():
        mae_xgb = mean_absolute_error(y_test, test_df['predicted_points_xgb'])
        print(f"XGBoost MAE for GW {GW_TEST} (Player Points): {mae_xgb:.2f}")
        if 'mae_rf' in locals():
            print(f"Random Forest MAE: {mae_rf:.2f}")

    print("\n--- XGBoost Top Predicted Players for GW {} ---".format(GW_TEST))
    top_players_xgb = test_df.sort_values(by='predicted_points_xgb', ascending=False).head(10)
    print(top_players_xgb[['player_name', 'position_id', 'predicted_points_xgb', 'total_points']].to_string(index=False))
    
    # Note: To fully implement all features with XGBoost, you would repeat the functions from Cells 3-6,
    # replacing `rf_model` with `xgb_model`.
    
except ImportError:
    print("XGBoost not installed. Please install with: pip install xgboost")

--- Wildcard Team Multi-GW Prediction (Based on Random Forest) ---
Wildcard team is the one generated in Cell 4. Predicting for GW 6 to GW 8.

Wildcard Team 3-GW Total Points Prediction:
       Player  GW6  GW7  GW8  Total 3-GW Pred  Team ID  Position ID
         Pope 7.17 7.17 7.19            21.53       15            1
     Truffert 6.31 6.31 6.31            18.93        4            2
       Minteh 3.32 3.32 3.32             9.96        6            3
  Gravenberch 2.60 2.60 2.60             7.80       12            3
     Dúbravka 2.57 2.57 2.57             7.71        3            1
        Iwobi 2.33 2.33 2.33             6.99       10            3
       Senesi 2.20 2.20 2.20             6.60        4            2
        Stach 2.14 2.14 2.14             6.42       11            3
   N.Williams 1.98 1.98 1.98             5.94       16            2
         Cash 1.74 1.74 1.73             5.21        2            2
Calvert-Lewin 1.66 1.66 1.66             4.98       11           

In [161]:
# Cell 8 (Updated): HTML Export and Preview for Results and Metrics

from IPython.display import display, HTML

# --- Utility Function to Map IDs to Names/Positions ---
# Re-define these maps in case the notebook was run non-sequentially
if 'team_map' not in globals():
    team_map = {t['id']: t['short_name'] for t in bootstrap_static.get('teams', [])}
if 'pos_map' not in globals():
    POS_IDS = {'GKP': 1, 'DEF': 2, 'MID': 3, 'FWD': 4}
    pos_map = {v: k for k, v in POS_IDS.items()}
if 'player_name_map' not in globals():
    player_name_map = player_static.set_index('id')['web_name'].to_dict()

# --- Utility Function to get Match Prediction Metrics (NEW) ---
def get_match_prediction_metrics(match_df, model_name):
    """Calculates accuracy and result prediction stats for match predictions."""
    
    # Use the appropriate columns based on model
    pred_h = f'predicted_score_h_{model_name.lower().replace(" ", "_")}'
    pred_a = f'predicted_score_a_{model_name.lower().replace(" ", "_")}'
    
    # Fallback/standard columns if the model-specific one isn't available
    if pred_h not in match_df.columns:
        pred_h = 'predicted_score_h'
        pred_a = 'predicted_score_a'
    
    
    df = match_df.copy().dropna(subset=['team_h_score', 'team_a_score'])
    
    # Score Accuracy (Exact Score)
    correct_score = (df[pred_h] == df['team_h_score']) & (df[pred_a] == df['team_a_score'])
    total_matches = len(df)
    score_accuracy = correct_score.sum() / total_matches if total_matches > 0 else 0
    
    # Result Accuracy (Win/Loss/Draw)
    actual_result = np.sign(df['team_h_score'] - df['team_a_score'])
    predicted_result = np.sign(df[pred_h] - df[pred_a])
    
    correct_result = (actual_result == predicted_result)
    result_accuracy = correct_result.sum() / total_matches if total_matches > 0 else 0
    
    # Breakdown of Correct Results
    correct_breakdown = {
        'Win/Loss/Draw Predicted Correctly': correct_result.sum(),
        'Total Matches Analyzed': total_matches,
    }
    
    return score_accuracy, result_accuracy, correct_breakdown

# --- Main HTML Generation Function ---

def generate_html_report(gw, prediction_df, match_df, cap_suggestions, wildcard_df, transfer_suggestion, model_name="Random Forest"):
    """
    Generates a comprehensive HTML report string for the given GW predictions.
    
    Args:
        gw (int): Gameweek number.
        prediction_df (pd.DataFrame): DataFrame with player point predictions (test_df).
        match_df (pd.DataFrame): DataFrame with match predictions (fixture_test_df).
        cap_suggestions (dict): Output from suggest_captains.
        wildcard_df (pd.DataFrame): The optimal 15-man squad.
        transfer_suggestion (str): Output of the transfer recommendation.
        model_name (str): The name of the predictive model used.
        
    Returns:
        str: The complete HTML string.
    """
    
    # Use the predicted points column based on the model name
    pred_col = f'predicted_points_{model_name.lower().replace(" ", "_")}'
    if pred_col not in prediction_df.columns:
        pred_col = 'predicted_points_rf' # Fallback
        
    # --- 1. Styling ---
    html_style = """
    <style>
        body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; margin: 20px; background-color: #f4f7f9; color: #333; }
        .report-container { max-width: 1200px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
        h1, h2 { color: #01ff70; border-bottom: 2px solid #e0e0e0; padding-bottom: 5px; margin-top: 20px; }
        h1 { font-size: 1.8em; }
        h2 { font-size: 1.4em; color: #0d2c55; }
        table { width: 100%; border-collapse: collapse; margin-top: 10px; font-size: 0.9em; }
        th, td { padding: 10px 15px; text-align: left; border: 1px solid #ddd; }
        th { background-color: #0d2c55; color: white; font-weight: 600; }
        tr:nth-child(even) { background-color: #f9f9f9; }
        .match-score { font-size: 1.1em; font-weight: bold; color: #cc0000; }
        .transfer-box { background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 5px; margin-top: 15px; color: #856404; font-weight: bold; }
        .metrics-box { background-color: #e2f0fd; border: 1px solid #b8daff; padding: 10px; border-radius: 5px; margin-top: 10px; font-size: 1em;}
        .metrics-box p { margin: 5px 0; }
        .captain-summary { display: flex; justify-content: space-around; margin-top: 15px; }
        .captain-card { border: 1px solid #01ff70; padding: 10px; border-radius: 5px; text-align: center; width: 30%; background-color: #e6ffe6; }
        .captain-card h4 { color: #0d2c55; margin-top: 0; }
        .captain-card span { font-weight: bold; color: #01ff70; font-size: 1.2em; }
    </style>
    """
    
    html_content = f"""
    <div class="report-container">
        <h1>FPL AI Prediction Report - Gameweek {gw} ({model_name} Model)</h1>
    """

    # --- 2. Match Results Prediction Table ---
    match_html = "<h2>1. Match Score Predictions</h2>"
    match_html += "<table><thead><tr><th>Fixture</th><th>Predicted Score</th><th>Actual Score (if available)</th></tr></thead><tbody>"
    
    # Prepare data for match table
    for _, row in match_df.iterrows():
        home_team = team_map.get(row['team_h'], f"Team {row['team_h']}")
        away_team = team_map.get(row['team_a'], f"Team {row['team_a']}")
        
        # Use the correct predicted score columns
        pred_h_col = f'predicted_score_h_{model_name.lower().replace(" ", "_")}'
        pred_a_col = f'predicted_score_a_{model_name.lower().replace(" ", "_")}'
        if pred_h_col not in row:
            pred_h_col = 'predicted_score_h'
            pred_a_col = 'predicted_score_a'
            
        pred_score = f"{home_team} <span class='match-score'>{int(row.get(pred_h_col, 'N/A'))} - {int(row.get(pred_a_col, 'N/A'))}</span> {away_team}"
        
        # Check if actual scores are available
        actual_h = row['team_h_score']
        actual_a = row['team_a_score']
        actual_score = f"{int(actual_h)} - {int(actual_a)}" if not pd.isna(actual_h) else "N/A"
        
        match_html += f"<tr><td>{home_team} vs {away_team}</td><td>{pred_score}</td><td>{actual_score}</td></tr>"

    match_html += "</tbody></table>"
    
    # Add metrics below the table (only if actual scores are available)
    if not match_df['team_h_score'].isna().all():
        score_acc, result_acc, breakdown = get_match_prediction_metrics(match_df, model_name)
        match_html += "<h3>Prediction Metrics:</h3>"
        match_html += f"""
        <div class="metrics-box">
            <p><strong>Result Accuracy (Win/Loss/Draw):</strong> {result_acc:.2%} ({breakdown['Win/Loss/Draw Predicted Correctly']}/{breakdown['Total Matches Analyzed']} correct results)</p>
            <p><strong>Exact Score Accuracy:</strong> {score_acc:.2%} (Hardest metric)</p>
        </div>
        """

    html_content += match_html

    # --- 3. Top Player Points Prediction Table ---
    player_html = f"<h2>2. Top Player Point Predictions (Gameweek {gw})</h2>"
    
    top_players = prediction_df.sort_values(by=pred_col, ascending=False).head(15).copy()
    top_players['Position'] = top_players['position_id'].map(pos_map)
    top_players['Predicted Pts'] = top_players[pred_col].round(2)
    top_players['Actual Pts'] = top_players['total_points']

    player_html += "<table><thead><tr><th>Player</th><th>Position</th><th>Predicted Pts</th><th>Actual Pts</th><th>Price (£m)</th></tr></thead><tbody>"
    for _, row in top_players.iterrows():
        # Correctly format price to one decimal place
        formatted_price = (row['price_100k'] / 10.0)
        
        player_html += f"<tr><td>{row['player_name']}</td><td>{row['Position']}</td><td>{row['Predicted Pts']:.2f}</td><td>{row['Actual Pts']:.0f}</td><td>{formatted_price:.1f}</td></tr>"
    player_html += "</tbody></table>"
    html_content += player_html

    # --- 4. Transfer Recommendation ---
    html_content += "<h2>3. Transfer Recommendation (Negative Hit)</h2>"
    
    # Logic to add position information for clarity
    if "-4 Hit Recommended" in transfer_suggestion and "Transfer OUT: " in transfer_suggestion and "for IN: " in transfer_suggestion:
        try:
            # Extract player names
            out_name = transfer_suggestion.split("OUT: ")[1].split(" for")[0].strip()
            in_name = transfer_suggestion.split("IN: ")[1].split(". Net")[0].strip()
            
            # Find their positions
            out_pos_id = prediction_df[prediction_df['player_name'] == out_name]['position_id'].iloc[0]
            in_pos_id = prediction_df[prediction_df['player_name'] == in_name]['position_id'].iloc[0]
            out_pos = pos_map.get(out_pos_id, 'N/A')
            in_pos = pos_map.get(in_pos_id, 'N/A')
            
            # Format the transfer string
            transfer_suggestion_formatted = transfer_suggestion.replace(
                f"OUT: {out_name}", f"OUT: **{out_name}** ({out_pos})"
            ).replace(
                f"IN: {in_name}", f"IN: **{in_name}** ({in_pos})"
            )
        except Exception:
             transfer_suggestion_formatted = transfer_suggestion # Fallback
    else:
        transfer_suggestion_formatted = transfer_suggestion

    html_content += f"<div class='transfer-box'>{transfer_suggestion_formatted}</div>"
    
    # --- 5. Captain Suggestions ---
    html_content += "<h2>4. Captain & Vice-Captain Suggestions</h2>"
    html_content += f"""
    <div class="captain-summary">
        <div class="captain-card">
            <h4>Safe Captain (C)</h4>
            <span>{cap_suggestions['Safe Captain']['player_name']}</span>
            <p>Pred: {cap_suggestions['Safe Captain'][pred_col]:.2f}</p>
        </div>
        <div class="captain-card">
            <h4>Vice Captain (VC)</h4>
            <span>{cap_suggestions['Vice Captain']['player_name']}</span>
            <p>Pred: {cap_suggestions['Vice Captain'][pred_col]:.2f}</p>
        </div>
        <div class="captain-card">
            <h4>Risky Captain (C)</h4>
            <span>{cap_suggestions['Risky Captain']['player_name']}</span>
            <p>Pred: {cap_suggestions['Risky Captain'][pred_col]:.2f}</p>
        </div>
    </div>
    """

    # --- 6. Wildcard Team Preview ---
    if wildcard_df is not None and not wildcard_df.empty:
        wc_total_points = wildcard_df[pred_col].sum()
        wc_total_cost = wildcard_df['price_100k'].sum() / 10
        
        html_content += f"<h2>5. Wildcard / Free Hit Team Preview (15 Players)</h2>"
        html_content += f"<p>Total Predicted Points for Squad: **{wc_total_points:.2f}** | Total Squad Cost: **£{wc_total_cost:.1f}m**</p>"

        wc_df_display = wildcard_df[['player_name', 'price_100k', pred_col, 'position_id', 'team_id']].copy()
        wc_df_display.rename(columns={'player_name': 'Player', 'price_100k': 'Price (£100k)', pred_col: 'Predicted Pts', 'position_id': 'Position ID', 'team_id': 'Team ID'}, inplace=True)
        
        wc_df_display['Position'] = wc_df_display['Position ID'].map(pos_map)
        wc_df_display['Team'] = wc_df_display['Team ID'].map(team_map)

        # Sort for display
        wc_df_display.sort_values(by=['Position ID', 'Predicted Pts'], ascending=[True, False], inplace=True)
        
        wc_table_html = wc_df_display[['Player', 'Position', 'Team', 'Price (£100k)', 'Predicted Pts']].to_html(index=False, classes='table')
        
        # Replace the ugly Pandas table header/style with a nicer one
        wc_table_html = wc_table_html.replace('<th>Price (£100k)</th>', '<th>Price (£m)</th>')
        wc_table_html = wc_table_html.replace('<th>Predicted Pts</th>', f'<th>{model_name} Pred Pts</th>')
        wc_table_html = wc_table_html.replace('<tbody>', '<thead><tr><th>Player</th><th>Position</th><th>Team</th><th>Price (£m)</th><th>Predicted Pts</th></tr></thead><tbody>')

        # Clean up price display in the table
        for index, row in wc_df_display.iterrows():
            # Apply the correct formatting logic (divide by 10)
            formatted_price_100k = row["Price (£100k)"] / 10.0
            # Since to_html casts to string, we need a slight trick to find the cell content
            # This is fragile but works for the current Pandas output
            wc_table_html = wc_table_html.replace(f'<td>{int(row["Price (£100k)"])}</td>', f'<td>{formatted_price_100k:.1f}</td>', 1)

        html_content += wc_table_html
    
    html_content += "</div>" # Close report-container
    
    return html_style + html_content


# --- Execution Cell (Random Forest) ---

print("--- Generating HTML Report for GW {} (Random Forest) ---".format(GW_TEST))

try:
    # Use the variables populated in the Random Forest cells (2-6)
    html_output_rf = generate_html_report(
        gw=GW_TEST, 
        prediction_df=test_df.copy(), 
        match_df=fixture_test_df.copy(), 
        cap_suggestions=captain_suggestions, 
        wildcard_df=wildcard_df.copy(), 
        transfer_suggestion=suggestion, 
        model_name="Random Forest"
    )

    # 1. Display the HTML in the Notebook
    print("Displaying HTML Preview...")
    display(HTML(html_output_rf))

    # 2. Export to a file
    file_path_rf = f'FPL_GW{GW_TEST}_Report_RF.html'
    with open(file_path_rf, 'w', encoding='utf8') as f:
        f.write(html_output_rf)
    print(f"\nSuccessfully exported Random Forest report to: {file_path_rf}")

except NameError as e:
    print(f"ERROR: Could not generate report. Required variable missing: {e}. Ensure cells 2-6 were executed successfully.")


# --- Example Execution Cell (XGBoost - requires re-running helper functions) ---

print("\n--- Generating HTML Report for GW {} (XGBoost) ---".format(GW_TEST))

try:
    # (The XGBoost setup logic from the previous response is retained here to ensure the necessary 
    # dataframes/predictions for XGBoost are available for the new function)
    if 'xgb_model' in locals():
        # Re-run key prediction functions using the XGBoost model
        
        # Match Prediction (re-train XGBoost for match classification)
        xgb_match_model = XGBRegressor(n_estimators=50, learning_rate=0.1, random_state=42)
        xgb_match_model.fit(X_train_match, y_train_match)
        y_pred_match_xgb = xgb_match_model.predict(X_test_match)
        fixture_test_df['predicted_result_xgb'] = np.sign(y_pred_match_xgb).astype(int)
        fixture_test_df['predicted_score_h_xgb'] = np.maximum(0, (fixture_test_df['predicted_result_xgb'] * 0.5 + 1.5).round(0)).astype(int)
        fixture_test_df['predicted_score_a_xgb'] = np.maximum(0, (fixture_test_df['predicted_result_xgb'] * -0.5 + 1.5).round(0)).astype(int)
        
        # Use the XGBoost predictions for the report functions
        xgb_cap_suggestions = suggest_captains(test_df.rename(columns={'predicted_points_xgb': 'predicted_points_rf'})) # Hack to reuse function
        xgb_wildcard_team = create_optimal_team_lp(test_df.rename(columns={'predicted_points_xgb': 'predicted_points_rf'}), budget=MAX_COST_100K)
        xgb_wildcard_df = pd.DataFrame(xgb_wildcard_team).rename(columns={'predicted_points_rf': 'predicted_points_xgb'})
        
        xgb_transfer_suggestion, _ = suggest_negative_transfer(
            current_team=simulated_current_team_gw_4_list, 
            bank_100k=SIM_BANK, 
            free_transfers=SIM_FT,
            player_predictions=test_df.rename(columns={'predicted_points_xgb': 'predicted_points_rf'})
        )
        
        # Generate the report
        html_output_xgb = generate_html_report(
            gw=GW_TEST, 
            prediction_df=test_df.copy(), 
            match_df=fixture_test_df.rename(columns={'predicted_score_h_xgb': 'predicted_score_h', 'predicted_score_a_xgb': 'predicted_score_a'}), 
            cap_suggestions=xgb_cap_suggestions, 
            wildcard_df=xgb_wildcard_df.copy(), 
            transfer_suggestion=xgb_transfer_suggestion, 
            model_name="XGBoost"
        )
        
        # 1. Display the HTML in the Notebook
        print("Displaying HTML Preview...")
        display(HTML(html_output_xgb))

        # 2. Export to a file
        file_path_xgb = f'FPL_GW{GW_TEST}_Report_XGB.html'
        with open(file_path_xgb, 'w', encoding='utf8') as f:
            f.write(html_output_xgb)
        print(f"\nSuccessfully exported XGBoost report to: {file_path_xgb}")
        
    else:
        print("XGBoost model was not available. Skip report generation.")
        
except ImportError:
    print("XGBoost library is missing. Please install it.")
except NameError as e:
    print(f"ERROR: Could not generate XGBoost report. Required variable missing: {e}. Ensure Cell 7 was executed successfully.")

--- Generating HTML Report for GW 5 (Random Forest) ---
Displaying HTML Preview...


Fixture,Predicted Score,Actual Score (if available)
LIV vs EVE,LIV 2 - 1 EVE,2 - 1
BHA vs TOT,BHA 2 - 1 TOT,2 - 2
BUR vs NFO,BUR 2 - 1 NFO,1 - 1
WHU vs CRY,WHU 1 - 2 CRY,1 - 2
WOL vs LEE,WOL 1 - 2 LEE,1 - 3
MUN vs CHE,MUN 1 - 2 CHE,2 - 1
FUL vs BRE,FUL 2 - 1 BRE,3 - 1
BOU vs NEW,BOU 1 - 2 NEW,0 - 0
SUN vs AVL,SUN 2 - 1 AVL,1 - 1
ARS vs MCI,ARS 1 - 2 MCI,1 - 1

Player,Position,Predicted Pts,Actual Pts,Price (£m)
Stach,MID,13.72,15,0.5
Gravenberch,MID,13.28,15,0.6
Iwobi,MID,12.85,13,0.7
Calvert-Lewin,FWD,11.2,11,0.6
Ekitiké,FWD,10.81,8,0.9
Anthony,MID,10.71,10,0.6
Richarlison,FWD,9.71,9,0.7
Minteh,MID,9.71,9,0.6
B.Fernandes,MID,9.53,10,0.9
Maguire,DEF,9.4,11,0.4

Player,Position,Team,Price (£m),Random Forest Pred Pts
Player,Position,Team,Price (£m),Predicted Pts
Dúbravka,GKP,BUR,4.0,6.34
Pope,GKP,NEW,5.1,5.91
Maguire,DEF,MUN,4.4,9.4
Cash,DEF,AVL,4.6,9.01
N.Williams,DEF,NFO,4.8,8.59
Truffert,DEF,BOU,4.5,8.43
Senesi,DEF,BOU,4.9,8.31
Stach,MID,LEE,5.0,13.72
Gravenberch,MID,LIV,5.7,13.28
Iwobi,MID,FUL,6.5,12.85



Successfully exported Random Forest report to: FPL_GW5_Report_RF.html

--- Generating HTML Report for GW 5 (XGBoost) ---


ValueError: cannot reindex on an axis with duplicate labels

In [162]:
# Cell 9: Detailed Chip & Transfer Planning Strategy

def generate_chip_strategy_plan(current_gw, future_fixtures, team_good_gw, captain_suggestion, transfer_suggestion):
    """
    Generates a multi-GW chip and transfer strategy, ignoring Blank GWs 
    and focusing on fixture strength and optimal chip deployment.
    
    Args:
        current_gw (int): The Gameweek the current predictions are for.
        future_fixtures (dict): Fixture difficulty map for future GWs.
        team_good_gw (int): The Gameweek where the user's current team has good fixtures (e.g., GW 11).
        captain_suggestion (dict): Dictionary of captain candidates from the main report.
        transfer_suggestion (str): The calculated optimal transfer for the next GW.
        
    Returns:
        str: A detailed multi-GW plan in HTML format.
    """
    
    # Extract Captain/Triple Captain candidate details
    tc_candidate_name = captain_suggestion['Risky Captain']['player_name']
    tc_candidate_points = captain_suggestion['Risky Captain']['predicted_points_rf']
    
    # --- 1. Styling ---
    html_style = """
    <style>
        .plan-container { max-width: 800px; margin: 20px auto; padding: 20px; border: 2px solid #0d2c55; border-radius: 8px; background-color: #ffffff; }
        .plan-container h2 { color: #0d2c55; border-bottom: 3px solid #01ff70; padding-bottom: 5px; font-size: 1.6em; }
        .plan-step { margin-top: 15px; padding: 10px; border-left: 5px solid #01ff70; background-color: #f7f9fc; border-radius: 4px; }
        .plan-step h4 { color: #01ff70; margin-top: 0; font-size: 1.1em; }
        .chip-highlight { font-weight: bold; color: #cc0000; }
    </style>
    """
    
    html_content = f"""
    <div class="plan-container">
        <h2>Optimal FPL Strategy Plan (GW {current_gw} Onwards) 📈</h2>
        <p>This plan prioritizes leveraging **fixture difficulty** and **player form/predictions** to maximize chip impact.</p>
        
        <div class="plan-step">
            <h4>🎯 Strategy Focus: Setup for Triple Captain</h4>
            <p>The immediate goal is to transfer in or ensure the predicted **Triple Captain (TC)** candidate, **{tc_candidate_name}** (Predicted **{tc_candidate_points:.2f} points**), is in your squad and faces a favorable fixture.</p>
        </div>

        <div class="plan-step">
            <h4>📅 Gameweek {current_gw} (Current GW) - Execute Transfer</h4>
            <p><strong>Transfer Action:</strong> Execute the calculated optimal transfer:</p>
            <p>**{transfer_suggestion}**</p>
            <p><em>(This transfer is crucial to maximize points in the immediate GW and/or set up for the TC chip.)</em></p>
            <p><strong>Chip Usage:</strong> **NO CHIP** (Save all chips for future strategic deployment).</p>
        </div>
        
        <div class="plan-step">
            <h4>📅 Gameweek {current_gw + 1} to {team_good_gw - 1} - Hold Transfers / Roll</h4>
            <p><strong>Transfer Action:</strong> **ROLL** your free transfer, unless a major injury occurs.</p>
            <p><strong>Goal:</strong> Gain 2 Free Transfers for the big move in GW {team_good_gw}. This gives flexibility to buy two good players or take a small hit without penalty.</p>
        </div>

        <div class="plan-step">
            <h4>📅 Gameweek {team_good_gw} - Triple Captain Deployment</h4>
            <p><strong>Assumption:</strong> Your team's fixtures are optimal in GW {team_good_gw}, and **{tc_candidate_name}** has a strong fixture.</p>
            <p><strong>Chip Usage:</strong> <span class="chip-highlight">**TRIPLE CAPTAIN (TC)**</span> on **{tc_candidate_name}**.</p>
            <p><strong>Transfer Action:</strong> Use your two accumulated Free Transfers to maximize your starting XI's points, supporting the TC player.</p>
        </div>

        <div class="plan-step">
            <h4>📅 GW {team_good_gw + 1} to GW 18 - Wildcard Setup</h4>
            <p><strong>Chip Usage:</strong> **SAVE** the <span class="chip-highlight">**Wildcard (WC)**</span> for now.</p>
            <p><strong>Transfer Action:</strong> Plan transfers to gradually improve the depth players (e.g., your bench GKP/DEF) in preparation for a future **Bench Boost**.</p>
        </div>
        
        <div class="plan-step">
            <h4>📅 Future Chip Usage Recommendation</h4>
            <p>• **Bench Boost (BB):** Save for a **confirmed Double Gameweek** where all 15 players have two fixtures. This is the only way to maximize its value. *Target GW 25+*.</p>
            <p>• **Free Hit (FH):** Hold until the final major **Blank Gameweek** or a chaotic GW where many top teams are postponed/rested, allowing you to field a full XI of low-ownership/differential players.</p>
        </div>
    </div>
    """
    
    return html_style + html_content

# --- Execution for Chip Strategy Cell ---

# --- DUMMY INPUTS (REPLACE WITH YOUR ACTUAL DATA) ---
# GW_TEST: The current Gameweek from the main report (e.g., 5)
# suggestion: The transfer string from the main report
# captain_suggestions: The captain dict from the main report

# Define the user-specified "Good Gameweek" for this example:
TEAM_GOOD_GW = 11 

# Assuming fixture_test_df can be used to generate a simple future fixture difficulty map
# (Actual implementation would require fetching future FPL data)
future_fixtures_map = {
    GW_TEST + 1: 'Good', 
    GW_TEST + 2: 'Average', 
    TEAM_GOOD_GW: 'Very Good'
}
# --- END DUMMY INPUTS ---

print("--- Generating Detailed Chip and Transfer Plan ---")
try:
    plan_html = generate_chip_strategy_plan(
        current_gw=GW_TEST,
        future_fixtures=future_fixtures_map,
        team_good_gw=TEAM_GOOD_GW,
        captain_suggestion=captain_suggestions, # Using the RF captain suggestions
        transfer_suggestion=suggestion        # Using the RF transfer suggestion
    )
    
    # Display the HTML in the Notebook
    display(HTML(plan_html))

    # Export to a file
    file_path_plan = f'FPL_GW{GW_TEST}_Strategy_Plan.html'
    with open(file_path_plan, 'w', encoding='utf8') as f:
        f.write(plan_html)
    print(f"\nSuccessfully exported strategy plan to: {file_path_plan}")
    
except NameError as e:
    print(f"ERROR: Could not generate strategy plan. Required variable missing: {e}. Ensure cells 2-6 were executed successfully.")

--- Generating Detailed Chip and Transfer Plan ---



Successfully exported strategy plan to: FPL_GW5_Strategy_Plan.html
