Importing Libs

In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score
import os
from IPython.display import display # For quick DataFrame review

# Setup for plotting
%matplotlib inline

In [39]:
import os
import json
import pandas as pd

# --- Load fixtures across all GWs ---
gw_folder = "data/gw_data"
all_fixtures = []

for file_name in os.listdir(gw_folder):
    if file_name.startswith("GW") and file_name.endswith(".json"):
        with open(os.path.join(gw_folder, file_name), "r", encoding="utf-8") as f:
            gw_data = json.load(f)
            gw_num = int(file_name.replace("GW", "").replace(".json", ""))
            for fixture in gw_data:
                fixture["gameweek"] = gw_num
            all_fixtures.extend(gw_data)

fixtures_df = pd.DataFrame(all_fixtures)

# --- Separate finished and upcoming matches ---
finished_df = fixtures_df[fixtures_df["finished"] == True].copy()
upcoming_df = fixtures_df[fixtures_df["finished"] == False].copy()

# --- Map team IDs to names (from bootstrap-static.json) ---
with open("data/bootstrap-static.json", "r", encoding="utf-8") as f:
    bootstrap_data = json.load(f)

team_id_to_name = {team["id"]: team["name"] for team in bootstrap_data["teams"]}

print(f"✅ Loaded {len(fixtures_df)} fixtures. Finished: {len(finished_df)}, Upcoming: {len(upcoming_df)}")


✅ Loaded 380 fixtures. Finished: 70, Upcoming: 310


In [40]:
from sklearn.preprocessing import LabelEncoder

# --- Encode match results ---
def match_result(row):
    if row["team_h_score"] > row["team_a_score"]:
        return "home_win"
    elif row["team_h_score"] < row["team_a_score"]:
        return "away_win"
    else:
        return "draw"

finished_df["result"] = finished_df.apply(match_result, axis=1)

# --- Features for models ---
features = ["team_h", "team_a", "team_h_difficulty", "team_a_difficulty"]

# Label encoder for result
le = LabelEncoder()
y_cls = le.fit_transform(finished_df["result"])


In [41]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# --- Classifier for match result ---
model_cls = RandomForestClassifier(n_estimators=300, random_state=42)
model_cls.fit(finished_df[features], y_cls)

# --- Regressors for home and away goals ---
home_goal_model = RandomForestRegressor(n_estimators=300, random_state=42)
away_goal_model = RandomForestRegressor(n_estimators=300, random_state=42)

home_goal_model.fit(finished_df[features], finished_df["team_h_score"])
away_goal_model.fit(finished_df[features], finished_df["team_a_score"])

print("✅ Models trained successfully.")


✅ Models trained successfully.


In [42]:
from IPython.display import display

# --- Features for prediction ---
features = ["team_h", "team_a", "team_h_difficulty", "team_a_difficulty"]

# Select the next 3 upcoming gameweeks
next_gws = sorted(upcoming_df["gameweek"].unique())[:3]
upcoming_subset = upcoming_df[upcoming_df["gameweek"].isin(next_gws)].copy()

if not upcoming_subset.empty:
    X_upcoming = upcoming_subset[features]

    # Predict goals
    upcoming_subset["pred_home_goals"] = home_goal_model.predict(X_upcoming).round().astype(int)
    upcoming_subset["pred_away_goals"] = away_goal_model.predict(X_upcoming).round().astype(int)

    # --- Derive result from predicted goals ---
    def derive_result(row):
        if row["pred_home_goals"] > row["pred_away_goals"]:
            return "home_win"
        elif row["pred_home_goals"] < row["pred_away_goals"]:
            return "away_win"
        else:
            return "draw"

    upcoming_subset["predicted_result"] = upcoming_subset.apply(derive_result, axis=1)

    # Map team names
    upcoming_subset["Home Team"] = upcoming_subset["team_h"].map(team_id_to_name)
    upcoming_subset["Away Team"] = upcoming_subset["team_a"].map(team_id_to_name)

    # Create readable scoreline
    upcoming_subset["Predicted Score"] = (
        upcoming_subset["Home Team"] + " " +
        upcoming_subset["pred_home_goals"].astype(str) + " - " +
        upcoming_subset["pred_away_goals"].astype(str) + " " +
        upcoming_subset["Away Team"]
    )

    # --- Print nicely grouped by gameweek ---
    for gw in next_gws:
        print(f"\n⚽️ Gameweek {gw} results:\n")
        gw_df = upcoming_subset[upcoming_subset["gameweek"] == gw][[
            "Home Team", "Away Team", "pred_home_goals", "pred_away_goals",
            "predicted_result", "Predicted Score"
        ]]
        display(gw_df)

else:
    print("✅ No upcoming fixtures to predict.")



⚽️ Gameweek 8 results:



Unnamed: 0,Home Team,Away Team,pred_home_goals,pred_away_goals,predicted_result,Predicted Score
360,Nott'm Forest,Chelsea,2,1,home_win,Nott'm Forest 2 - 1 Chelsea
361,Brighton,Newcastle,1,1,draw,Brighton 1 - 1 Newcastle
362,Burnley,Leeds,2,0,home_win,Burnley 2 - 0 Leeds
363,Crystal Palace,Bournemouth,2,1,home_win,Crystal Palace 2 - 1 Bournemouth
364,Man City,Everton,2,1,home_win,Man City 2 - 1 Everton
365,Sunderland,Wolves,2,1,home_win,Sunderland 2 - 1 Wolves
366,Fulham,Arsenal,0,0,draw,Fulham 0 - 0 Arsenal
367,Spurs,Aston Villa,1,1,draw,Spurs 1 - 1 Aston Villa
368,Liverpool,Man Utd,2,0,home_win,Liverpool 2 - 0 Man Utd
369,West Ham,Brentford,1,2,away_win,West Ham 1 - 2 Brentford



⚽️ Gameweek 9 results:



Unnamed: 0,Home Team,Away Team,pred_home_goals,pred_away_goals,predicted_result,Predicted Score
370,Leeds,West Ham,1,1,draw,Leeds 1 - 1 West Ham
371,Chelsea,Sunderland,1,0,home_win,Chelsea 1 - 0 Sunderland
372,Newcastle,Fulham,2,1,home_win,Newcastle 2 - 1 Fulham
373,Man Utd,Brighton,2,1,home_win,Man Utd 2 - 1 Brighton
374,Brentford,Liverpool,1,1,draw,Brentford 1 - 1 Liverpool
375,Arsenal,Crystal Palace,3,2,home_win,Arsenal 3 - 2 Crystal Palace
376,Aston Villa,Man City,1,1,draw,Aston Villa 1 - 1 Man City
377,Bournemouth,Nott'm Forest,1,1,draw,Bournemouth 1 - 1 Nott'm Forest
378,Wolves,Burnley,2,2,draw,Wolves 2 - 2 Burnley
379,Everton,Spurs,1,2,away_win,Everton 1 - 2 Spurs



⚽️ Gameweek 10 results:



Unnamed: 0,Home Team,Away Team,pred_home_goals,pred_away_goals,predicted_result,Predicted Score
10,Brighton,Leeds,1,0,home_win,Brighton 1 - 0 Leeds
11,Burnley,Arsenal,1,1,draw,Burnley 1 - 1 Arsenal
12,Crystal Palace,Brentford,3,1,home_win,Crystal Palace 3 - 1 Brentford
13,Fulham,Wolves,1,1,draw,Fulham 1 - 1 Wolves
14,Nott'm Forest,Man Utd,1,1,draw,Nott'm Forest 1 - 1 Man Utd
15,Spurs,Chelsea,1,1,draw,Spurs 1 - 1 Chelsea
16,Liverpool,Aston Villa,1,1,draw,Liverpool 1 - 1 Aston Villa
17,West Ham,Newcastle,0,3,away_win,West Ham 0 - 3 Newcastle
18,Man City,Bournemouth,3,2,home_win,Man City 3 - 2 Bournemouth
19,Sunderland,Everton,2,1,home_win,Sunderland 2 - 1 Everton


In [43]:
# === Predict top players likely to explode in next 3 gameweeks ===
players_df = pd.DataFrame(bootstrap_data["elements"])

# Keep only players available for selection
players_df = players_df[players_df["can_select"] & ~players_df["removed"]]

# Convert expected points and form to numeric
players_df["ep_next"] = pd.to_numeric(players_df["ep_next"], errors="coerce").fillna(0)
players_df["form"] = pd.to_numeric(players_df["form"], errors="coerce").fillna(0)

# Simple "explosion score" metric
players_df["explosion_score"] = players_df["ep_next"] * 0.6 + players_df["form"] * 0.4

# Sort and get top 15
top_players = players_df.sort_values("explosion_score", ascending=False).head(15)

# Map team names
top_players["team_name"] = top_players["team"].map(team_id_to_name)

print("🔥 Top 15 players likely to explode in the next gameweeks:\n")
display(top_players[[
    "web_name", "team_name", "element_type", 
    "form", "ep_next", "explosion_score"
]])


🔥 Top 15 players likely to explode in the next gameweeks:



Unnamed: 0,web_name,team_name,element_type,form,ep_next,explosion_score
134,Semenyo,Bournemouth,3,10.8,10.8,10.8
473,Haaland,Man City,4,9.5,10.0,9.8
4,Gabriel,Arsenal,2,8.2,8.7,8.5
431,Gravenberch,Liverpool,3,7.0,8.0,7.6
259,Caicedo,Chelsea,3,7.0,7.5,7.3
630,Alderete,Sunderland,2,7.5,7.0,7.2
463,Doku,Man City,3,5.8,6.3,6.1
24,Zubimendi,Arsenal,3,5.8,6.3,6.1
652,Kudus,Spurs,3,6.0,6.0,6.0
7,J.Timber,Arsenal,2,5.5,6.0,5.8


In [44]:

# --- Load entry history ---
with open("data/entry_history.json", "r", encoding="utf-8") as f:
    entry_history = json.load(f)

# --- Select the latest gameweek ---
latest_event = entry_history["current"][-1]

# --- Extract budget, transfers available, and hit points ---
budget = latest_event["bank"]  # budget in 0.1M units
transfers_available = latest_event["event_transfers"]  # free transfers available
hit_points = latest_event["event_transfers_cost"]      # points hit for extra transfers

print(f"💰 Budget: {budget/10:.1f}M")
print(f"🔄 Free transfers available: {transfers_available}")
print(f"⚠️ Points hit if extra transfers: {hit_points}")


💰 Budget: 0.5M
🔄 Free transfers available: 1
⚠️ Points hit if extra transfers: 0


In [45]:
import json
import pandas as pd
import itertools

# --- Load entry history (budget, free transfers, hit points) ---
with open("data/entry_history.json", "r", encoding="utf-8") as f:
    entry_history = json.load(f)

current_entry = entry_history["current"][-1]
budget = current_entry["bank"] / 10
free_transfers = current_entry["event_transfers"]
hit_points = current_entry["event_transfers_cost"]

# --- Load current team picks ---
with open("data/gw_picks/GW7.json", "r", encoding="utf-8") as f:
    current_team_picks = json.load(f)

current_team_ids = [p["element"] for p in current_team_picks["picks"]]

print(f"💰 Current bank: {budget}M | 🔄 Free transfers: {free_transfers} | ⚡ Hit points: {hit_points}")
print(f"🏟️ Current team player IDs: {current_team_ids}")

# --- Load all players ---
with open("data/bootstrap-static.json", "r", encoding="utf-8") as f:
    bootstrap_data = json.load(f)

players_df = pd.DataFrame(bootstrap_data["elements"])

# --- Normalize useful fields ---
players_df["price"] = players_df["now_cost"] / 10
players_df["expected_points"] = players_df["ep_next"].astype(float)
players_df["form"] = players_df["form"].astype(float)
players_df["injured"] = players_df["status"].isin(["i", "d", "s"])  # injured/suspended/doubtful
players_df["team_name"] = players_df["team"].map({t["id"]: t["name"] for t in bootstrap_data["teams"]})
players_df["position"] = players_df["element_type"].map({1: "GKP", 2: "DEF", 3: "MID", 4: "FWD"})

# Mock rotation probability (you can later replace with real data)
players_df["rotation_prob"] = 0.8  # assume most regulars start
players_df.loc[players_df["form"] < 2, "rotation_prob"] = 0.5

# Mock team form (you can replace this with real recent results)
players_df["team_form"] = 6 + (players_df["team"] % 5) * 0.5

# --- Current team data ---
current_team = players_df[players_df["id"].isin(current_team_ids)].copy()

# --- Team Weakness Index (TWI) ---
def calculate_twi(row):
    penalty = 0
    if row["injured"]:
        penalty += 2
    if row["rotation_prob"] < 0.6:
        penalty += 1.5
    twi = (
        (10 - row["form"]) * 0.4 +
        (10 - row["expected_points"]) * 0.3 +
        (10 - row["team_form"]) * 0.2 +
        penalty
    )
    return twi

current_team["TWI"] = current_team.apply(calculate_twi, axis=1)

# --- Identify weakest position ---
position_avg_twi = current_team.groupby("position")["TWI"].mean()
weakest_position = position_avg_twi.idxmax()
weakest_twi = position_avg_twi.max()

print(f"\n📊 Weakest position: {weakest_position} (Avg TWI: {weakest_twi:.2f})")

if weakest_twi < 4.0:
    print("\n✅ No urgent transfers needed. Team is balanced and performing well!")
    suggest_transfers = False
else:
    suggest_transfers = True

# --- Suggest transfers ---
transfer_list = []

if suggest_transfers:
    available_players = players_df[
        (~players_df["id"].isin(current_team_ids)) &
        (~players_df["injured"]) &
        (players_df["position"] == weakest_position)
    ].copy()

    available_players.sort_values("expected_points", ascending=False, inplace=True)
    weak_team_players = current_team[current_team["position"] == weakest_position].sort_values("expected_points")

    for buy in available_players.itertuples():
        for sell in weak_team_players.itertuples():
            if buy.price - sell.price <= budget:
                gain = buy.expected_points - sell.expected_points
                if gain > 1.5 and buy.form > 2:  # only worth if at least +1.5 EP gain
                    transfer_list.append({
                        "buy": buy.web_name,
                        "buy_ep": buy.expected_points,
                        "buy_form": buy.form,
                        "sell": sell.web_name,
                        "sell_ep": sell.expected_points,
                        "sell_form": sell.form,
                        "gain": gain,
                        "price_diff": buy.price - sell.price,
                        "buy_position": buy.position
                    })
                break  # only replace worst player first

# --- Rank and display top suggestions ---
if suggest_transfers and transfer_list:
    transfer_list = sorted(transfer_list, key=lambda x: x["gain"], reverse=True)

    print(f"\n🔥 Top important {weakest_position} transfers (prioritized by impact):\n")
    for t in transfer_list[:5]:
        print(f"⭐ {t['buy']} ({t['buy_ep']} EP, Form {t['buy_form']}) ⇄ {t['sell']} ({t['sell_ep']} EP, Form {t['sell_form']}) | Gain: +{t['gain']:.2f} | Cost: {t['price_diff']:.1f}M")

        # --- Combo (double/triple) transfer suggestions ---
    print("\n💥 Checking possible combo transfers...\n")
    combo_suggestions = []

    for n in [2, 3]:
        for combo in itertools.combinations(transfer_list[:6], n):

            # ✅ Skip combos where the same player is bought or sold twice
            buys = [t["buy"] for t in combo]
            sells = [t["sell"] for t in combo]
            if len(buys) != len(set(buys)) or len(sells) != len(set(sells)):
                continue

            total_gain = sum(t["gain"] for t in combo)
            total_cost = sum(max(0, t["price_diff"]) for t in combo)
            hit_cost = max(0, (n - free_transfers) * 4)
            net_gain = total_gain - hit_cost * 0.5  # penalize hits slightly

            if total_cost <= budget + 0.5 and net_gain > 3:  # allow small hit margin
                combo_suggestions.append((combo, total_gain, total_cost, net_gain, hit_cost))

    if combo_suggestions:
        combo_suggestions.sort(key=lambda x: x[3], reverse=True)
        best_combo = combo_suggestions[0]
        combo, total_gain, total_cost, net_gain, hit_cost = best_combo

        print(f"💎 Best combo ({len(combo)} transfers, cost -{hit_cost} pts) "
              f"| Total Gain: +{total_gain:.2f} | Net Gain (after hit): {net_gain:.2f}")
        for t in combo:
            print(f" ➕ {t['buy']} ⇄ {t['sell']} | Gain: +{t['gain']:.2f}")
    else:
        print("No combo transfers worth the hit.")


💰 Current bank: 0.5M | 🔄 Free transfers: 1 | ⚡ Hit points: 0
🏟️ Current team player IDs: [469, 5, 72, 291, 261, 200, 82, 717, 430, 666, 249, 565, 384, 224, 335]

📊 Weakest position: GKP (Avg TWI: 4.95)

🔥 Top important GKP transfers (prioritized by impact):

⭐ Roefs (5.5 EP, Form 6.0) ⇄ Vicario (3.5 EP, Form 3.5) | Gain: +2.00 | Cost: -0.5M

💥 Checking possible combo transfers...

No combo transfers worth the hit.


In [46]:
import json
import pandas as pd
import itertools

# --- CONFIGURATION ---
PAST_GW = 3
NEXT_GW = 4
MAX_TRANSFERS = 3
HIT_COST = 4
LOOKAHEAD_WEEKS = 3
N_SUGGESTIONS = 5

# --- Load bootstrap player data ---
with open("data/bootstrap-static.json", "r", encoding="utf-8") as f:
    bootstrap_data = json.load(f)

players_df = pd.DataFrame(bootstrap_data["elements"])
players_df["price"] = players_df["now_cost"] / 10
players_df["expected_points"] = players_df["ep_next"].astype(float)
players_df["form"] = players_df["form"].astype(float)
players_df["position"] = players_df["element_type"].map({1: "GKP", 2: "DEF", 3: "MID", 4: "FWD"})
players_df["injured"] = players_df["status"].isin(["i", "d", "s"])

# --- Load your current team (before PAST_GW) ---
with open(f"data/gw_picks/GW{PAST_GW}.json", "r", encoding="utf-8") as f:
    gw_team_data = json.load(f)

team_ids = [p["element"] for p in gw_team_data["picks"]]
team_df = players_df[players_df["id"].isin(team_ids)].copy()

# --- Identify weakest players ---
team_df["weak_index"] = (10 - team_df["expected_points"]) * 0.6 + (10 - team_df["form"]) * 0.4
weakest_players = team_df.sort_values("weak_index", ascending=False).head(MAX_TRANSFERS)

# --- Load actual next GWs performance (GW4 → GW6) ---
actual_points = {}
for gw in range(NEXT_GW, NEXT_GW + LOOKAHEAD_WEEKS):
    try:
        with open(f"data/element_gameweek_live/GW{gw}.json", "r", encoding="utf-8") as f:
            gw_data = json.load(f)
        df = pd.DataFrame(gw_data["elements"])
        df["id"] = df.index + 1
        for _, row in df.iterrows():
            stats = row["stats"]
            actual_points[row["id"]] = actual_points.get(row["id"], 0) + stats["total_points"]
    except FileNotFoundError:
        pass  # skip missing GWs

# --- Helper to get actual points (3 GWs) ---
def get_actual_points(player_id):
    return actual_points.get(player_id, 0)

# --- Suggest replacements for each weak player ---
suggestions = []
for _, weak in weakest_players.iterrows():
    replacements = players_df[
        (players_df["position"] == weak["position"]) &
        (~players_df["injured"]) &
        (~players_df["id"].isin(team_ids))
    ].copy()
    replacements["score"] = replacements["expected_points"] * 0.6 + replacements["form"] * 0.4
    top_repls = replacements.sort_values("score", ascending=False).head(N_SUGGESTIONS)
    for _, repl in top_repls.iterrows():
        proj_diff = (repl["expected_points"] * LOOKAHEAD_WEEKS) - (weak["expected_points"] * LOOKAHEAD_WEEKS)
        actual_diff = get_actual_points(repl["id"]) - get_actual_points(weak["id"])
        suggestions.append({
            "out": weak["web_name"],
            "in": repl["web_name"],
            "proj_diff": proj_diff,
            "actual_diff": actual_diff
        })

# --- Build valid combinations (no duplicate sold players) ---
options = []
option_num = 1

for n in range(1, MAX_TRANSFERS + 1):
    for combo in itertools.combinations(suggestions, n):
        # ✅ skip combos selling the same player more than once
        sold_players = [c["out"] for c in combo]
        if len(sold_players) != len(set(sold_players)):
            continue

        total_proj_gain = sum(c["proj_diff"] for c in combo)
        total_actual_gain = sum(c["actual_diff"] for c in combo)
        hit_cost = (n - 1) * HIT_COST if n > 1 else 0
        options.append({
            "option_num": option_num,
            "transfers": combo,
            "n_transfers": n,
            "proj_gain": total_proj_gain,
            "actual_gain": total_actual_gain,
            "after_hit_proj": total_proj_gain - hit_cost,
            "after_hit_actual": total_actual_gain - hit_cost
        })
        option_num += 1

# --- Sort and display ---
options = sorted(options, key=lambda x: x["after_hit_proj"], reverse=True)

print(f"📈 Suggested Transfer Strategies (with actual results from GW{NEXT_GW}-{NEXT_GW + LOOKAHEAD_WEEKS - 1}):\n")

for opt in options[:10]:
    print(f"⚡ Option {opt['option_num']}: {opt['n_transfers']} transfers (-{(opt['n_transfers']-1)*HIT_COST})")
    for t in opt["transfers"]:
        print(f"   🔄 {t['out']} ➜ {t['in']} | +{t['proj_diff']:.1f} pts projected | Actual diff: +{t['actual_diff']:.1f}")
    print(f"   💹 Total Projected Gain: +{opt['proj_gain']:.1f} | Actual Gain: +{opt['actual_gain']:.1f}")
    print(f"   🧾 After Hit: Projected +{opt['after_hit_proj']:.1f} | Actual +{opt['after_hit_actual']:.1f}")
    print("—" * 80)


📈 Suggested Transfer Strategies (with actual results from GW4-6):

⚡ Option 91: 3 transfers (-8)
   🔄 Reinildo ➜ Alderete | +21.0 pts projected | Actual diff: +26.0
   🔄 Wood ➜ Bowen | +14.4 pts projected | Actual diff: +15.0
   🔄 Sels ➜ Roefs | +13.5 pts projected | Actual diff: +16.0
   💹 Total Projected Gain: +48.9 | Actual Gain: +57.0
   🧾 After Hit: Projected +40.9 | Actual +49.0
————————————————————————————————————————————————————————————————————————————————
⚡ Option 96: 3 transfers (-8)
   🔄 Reinildo ➜ Alderete | +21.0 pts projected | Actual diff: +26.0
   🔄 Wood ➜ Woltemade | +12.9 pts projected | Actual diff: +13.0
   🔄 Sels ➜ Roefs | +13.5 pts projected | Actual diff: +16.0
   💹 Total Projected Gain: +47.4 | Actual Gain: +55.0
   🧾 After Hit: Projected +39.4 | Actual +47.0
————————————————————————————————————————————————————————————————————————————————
⚡ Option 92: 3 transfers (-8)
   🔄 Reinildo ➜ Alderete | +21.0 pts projected | Actual diff: +26.0
   🔄 Wood ➜ Bowen | +14.4 p

In [47]:
import json
import pandas as pd
import itertools
import os
from collections import defaultdict

# --- CONFIGURATION ---
CURRENT_GW = 7            # GW before which you make transfers
LOOKAHEAD_WEEKS = 3       # how many GWs ahead to predict (e.g. GW7..GW9)
MAX_TRANSFERS = 3
HIT_COST = 4              # -4 points per extra transfer beyond first
N_SUGGESTIONS = 6         # top replacements to consider per weak player
PAST_FORM_MATCHES = 3     # how many past matches to use to compute team form

# --- Load bootstrap-static players & teams ---
with open("data/bootstrap-static.json", "r", encoding="utf-8") as f:
    bootstrap = json.load(f)

players_df = pd.DataFrame(bootstrap["elements"])
teams_df = pd.DataFrame(bootstrap["teams"])[["id", "name"]]

# standard columns / helper columns
players_df["price"] = players_df["now_cost"] / 10
players_df["expected_points"] = players_df["ep_next"].astype(float)
players_df["form"] = players_df["form"].astype(float)
players_df["position"] = players_df["element_type"].map({1: "GKP", 2: "DEF", 3: "MID", 4: "FWD"})
players_df["injured"] = players_df["status"].isin(["i", "d", "s"])  # injured/doubt/suspended
players_df["team_name"] = players_df["team"].map(teams_df.set_index("id")["name"])

# --- Build fixtures_df by reading all GW files in data/gw_data/ ---
gw_folder = "data/gw_data"
all_matches = []
for fn in sorted(os.listdir(gw_folder)):
    if fn.startswith("GW") and fn.endswith(".json"):
        gw_num = int(fn.replace("GW", "").replace(".json", ""))
        with open(os.path.join(gw_folder, fn), "r", encoding="utf-8") as f:
            match_list = json.load(f)
        # match_list may be a list of fixtures (as your sample)
        for m in match_list:
            # attach event if present or use gw_num
            m_event = m.get("event", gw_num)
            m["event"] = m_event
            all_matches.append(m)

fixtures_df = pd.DataFrame(all_matches)

# normalize missing difficulty fields
fixtures_df["team_h_difficulty"] = fixtures_df.get("team_h_difficulty").fillna(3)
fixtures_df["team_a_difficulty"] = fixtures_df.get("team_a_difficulty").fillna(3)

# --- Compute team form based on past matches (last PAST_FORM_MATCHES matches before CURRENT_GW) ---
# We'll compute simple points (3 win,1 draw) per match for each team then normalize.
def team_points_from_match(row, team_id):
    # returns points that team_id gained in this match row
    if not row.get("finished", False):
        return 0
    th, ta = row.get("team_h"), row.get("team_a")
    sh, sa = row.get("team_h_score"), row.get("team_a_score")
    if team_id == th:
        if sh is None or sa is None:
            return 0
        if sh > sa: return 3
        if sh == sa: return 1
        return 0
    if team_id == ta:
        if sh is None or sa is None:
            return 0
        if sa > sh: return 3
        if sa == sh: return 1
        return 0
    return 0

team_form_scores = {}
team_ids = teams_df["id"].tolist()
for tid in team_ids:
    # filter matches for this team with event < CURRENT_GW
    team_matches = fixtures_df[
        ((fixtures_df["team_h"] == tid) | (fixtures_df["team_a"] == tid)) &
        (fixtures_df["event"] < CURRENT_GW) &
        (fixtures_df.get("finished", False) == True)
    ].sort_values("event", ascending=False).head(PAST_FORM_MATCHES)
    pts = sum(team_points_from_match(r, tid) for _, r in team_matches.iterrows())
    # normalize to [0,1] where max possible is 3*PAST_FORM_MATCHES
    norm = pts / (3 * max(1, len(team_matches)))
    team_form_scores[tid] = norm

teams_df["form_score"] = teams_df["id"].map(team_form_scores).fillna(0)

# merge team form into players_df
players_df = players_df.merge(teams_df[["id", "form_score"]], left_on="team", right_on="id", how="left", suffixes=("", "_team"))
players_df.drop(columns=["id_team"], inplace=True)

# --- Build future_fixtures for CURRENT_GW .. CURRENT_GW + LOOKAHEAD_WEEKS -1 ---
future_events = list(range(CURRENT_GW, CURRENT_GW + LOOKAHEAD_WEEKS))
future_fixtures = fixtures_df[fixtures_df["event"].isin(future_events)].copy()

# helper: compute average fixture difficulty (higher=harder) for a team across the next LOOKAHEAD_WEEKS
def avg_fixture_difficulty_for_team(team_id):
    team_fixtures = future_fixtures[
        (future_fixtures["team_h"] == team_id) | (future_fixtures["team_a"] == team_id)
    ].head(LOOKAHEAD_WEEKS)
    if team_fixtures.empty:
        return 3.0  # neutral
    diffs = []
    for _, r in team_fixtures.iterrows():
        if r["team_h"] == team_id:
            diffs.append(r.get("team_h_difficulty", 3))
        else:
            diffs.append(r.get("team_a_difficulty", 3))
    return float(sum(diffs) / max(1, len(diffs)))

players_df["avg_fdr"] = players_df["team"].apply(avg_fixture_difficulty_for_team)

# --- Rotation risk estimate (simple) ---
# use minutes and starts if present; fallback to no risk
players_df["minutes"] = players_df.get("minutes", 0).fillna(0).astype(float)
players_df["starts"] = players_df.get("starts", 0).fillna(0).astype(float)
# minutes per start; if starts==0, use minutes (low confidence)
players_df["minutes_per_start"] = players_df.apply(lambda r: (r["minutes"] / r["starts"]) if r["starts"] > 0 else r["minutes"], axis=1)
players_df["rotation_risk"] = (players_df["minutes_per_start"] < 60).astype(int)  # 1 = risk
players_df["rotation_penalty"] = players_df["rotation_risk"] * 1.5  # penalty applied to projected points

# --- Load current team picks for CURRENT_GW (team before transfers) ---
with open(f"data/gw_picks/GW{CURRENT_GW}.json", "r", encoding="utf-8") as f:
    gw_picks = json.load(f)
current_team_ids = [p["element"] for p in gw_picks["picks"]]

# --- bank/free transfers/hit from entry_history (if exists) ---
try:
    with open("data/entry_history.json", "r", encoding="utf-8") as f:
        entry_history = json.load(f)
    entry = entry_history["current"][-1]
    bank = entry["bank"] / 10
    free_transfers = entry["event_transfers"]
    current_hit = entry["event_transfers_cost"]
except Exception:
    bank = 0.0
    free_transfers = 0
    current_hit = 0

# --- prediction function for LOOKAHEAD_WEEKS ---
def predict_points_for_lookahead(row):
    # base = expected_points per next match (ep_next)
    base_ep = float(row["expected_points"])
    # form factor (0.8..1.3)
    form_factor = 1.0 + (row["form"] / 10.0) * 0.15
    # team form factor (0..0.2)
    team_factor = 1.0 + (row.get("form_score", 0.0) * 0.2)
    # fixture difficulty factor: easier fixtures -> multiply up, scaled so avg diff 3 -> 1.0
    # We convert avg_fdr to an ease score: ease = (6 - avg_fdr)/5 in range (0.2..1)
    ease = (6.0 - float(row.get("avg_fdr", 3.0))) / 5.0
    ease_factor = 0.8 + 0.4 * ease   # between 0.8 and 1.2
    # rotation penalty
    rotation_pen = float(row.get("rotation_penalty", 0.0))
    # projected points over LOOKAHEAD_WEEKS
    projected = base_ep * LOOKAHEAD_WEEKS * form_factor * team_factor * ease_factor
    projected = projected - rotation_pen * LOOKAHEAD_WEEKS
    return float(max(0.0, round(projected, 2)))

players_df["predicted_points_3gw"] = players_df.apply(predict_points_for_lookahead, axis=1)

# --- current team subset and weak players selection ---
team_df = players_df[players_df["id"].isin(current_team_ids)].copy()
team_df["weak_index"] = (10 - team_df["expected_points"]) * 0.5 + (10 - team_df["form"]) * 0.3 + (1 - team_df["form_score"]) * 0.2
weak_candidates = team_df.sort_values("weak_index", ascending=False).head(max(3, MAX_TRANSFERS*2))

# --- Available candidates for buying ---
available_players = players_df[
    (~players_df["injured"]) &
    (players_df["form"] > 0) &
    (~players_df["id"].isin(current_team_ids))
].copy()

# --- For each weak player, compute top replacements (by predicted_points_3gw) ---
replacement_suggestions = []  # list of dicts: {out_id,out_name,out_pos, in_id,in_name,in_pos,proj_diff}
for _, weak in weak_candidates.iterrows():
    candidates = available_players[available_players["position"] == weak["position"]].copy()
    # require that candidate price is not insane: allow candidate.price <= weak.price + bank (independent suggestions)
    candidates = candidates[candidates["price"] <= (weak["price"] + bank)]
    # rank by predicted_points_3gw
    candidates = candidates.sort_values("predicted_points_3gw", ascending=False).head(N_SUGGESTIONS)
    for _, buy in candidates.iterrows():
        proj_diff = buy["predicted_points_3gw"] - weak["predicted_points_3gw"] if "predicted_points_3gw" in weak else buy["predicted_points_3gw"] - (weak["expected_points"]*LOOKAHEAD_WEEKS)
        replacement_suggestions.append({
            "out_id": int(weak["id"]),
            "out_name": weak["web_name"],
            "out_pos": weak["position"],
            "out_price": weak["price"],
            "in_id": int(buy["id"]),
            "in_name": buy["web_name"],
            "in_pos": buy["position"],
            "in_price": buy["price"],
            "proj_gain": round(proj_diff, 2)
        })

# --- Generate transfer bundles (1..MAX_TRANSFERS) ensuring:
#    - we don't sell the same player twice in a bundle
#    - we don't buy the same player twice in a bundle
#    - respect per-bundle affordability using independent budget (bank + sale price per swap)
options = []
option_counter = 1

# pre-build suggestions grouped by out_id to avoid selling a player multiple times in combination generation
# but we still iterate combos across suggestion list and then filter duplicates
for n in range(1, MAX_TRANSFERS + 1):
    for combo in itertools.combinations(replacement_suggestions, n):
        sold_ids = [c["out_id"] for c in combo]
        bought_ids = [c["in_id"] for c in combo]
        # skip if same player sold twice or same player bought twice
        if len(set(sold_ids)) != len(sold_ids): 
            continue
        if len(set(bought_ids)) != len(bought_ids):
            continue
        # check affordability independently for each pair (we treat each pair as independent: bank + out_price must cover in_price)
        affordable = True
        for c in combo:
            available_money = bank + c["out_price"]
            if c["in_price"] > available_money + 1e-9:  # small tolerance
                affordable = False
                break
        if not affordable:
            continue
        total_proj = sum(c["proj_gain"] for c in combo)
        hit_penalty = HIT_COST * max(0, (len(combo) - free_transfers))
        net_proj = total_proj - hit_penalty
        options.append({
            "option_num": option_counter,
            "n_transfers": len(combo),
            "hit": hit_penalty,
            "proj_gain": round(total_proj, 2),
            "net_proj": round(net_proj, 2),
            "details": combo
        })
        option_counter += 1

# sort options by net_proj descending
options = sorted(options, key=lambda x: x["net_proj"], reverse=True)

# --- Print top options ---
print(f"📅 Predicting GW{CURRENT_GW} → GW{CURRENT_GW + LOOKAHEAD_WEEKS - 1}")
print(f"💰 Bank: {bank}M | 🔄 Free transfers: {free_transfers} | ⚡ Current hit: {current_hit}\n")
print("📈 Suggested transfer bundles (projected points over next {} GWs):\n".format(LOOKAHEAD_WEEKS))

if not options:
    print("No feasible transfer bundles found under current constraints.")
else:
    shown = 0
    for opt in options:
        shown += 1
        print(f"⚡ Option {opt['option_num']}: {opt['n_transfers']} transfer{'s' if opt['n_transfers']>1 else ''} "
              f"({'-{}'.format(opt['hit']) if opt['hit'] else 'No hit'})")
        for c in opt["details"]:
            print(f"   🔄 {c['out_name']} ➜ {c['in_name']} | +{c['proj_gain']:.2f} pts projected")
        print(f"   💹 Total Projected Gain (raw): +{opt['proj_gain']:.2f} pts")
        print(f"   🧾 After Hit: +{opt['net_proj']:.2f} pts\n")
        print("-" * 80)
        if shown >= 10:
            break


📅 Predicting GW7 → GW9
💰 Bank: 0.5M | 🔄 Free transfers: 1 | ⚡ Current hit: 0

📈 Suggested transfer bundles (projected points over next 3 GWs):

⚡ Option 565: 3 transfers (-8)
   🔄 King ➜ Longstaff | +16.59 pts projected
   🔄 Cucurella ➜ Alderete | +18.79 pts projected
   🔄 Xavi ➜ Gravenberch | +21.24 pts projected
   💹 Total Projected Gain (raw): +56.62 pts
   🧾 After Hit: +48.62 pts

--------------------------------------------------------------------------------
⚡ Option 697: 3 transfers (-8)
   🔄 King ➜ Longstaff | +16.59 pts projected
   🔄 Xavi ➜ Gravenberch | +21.24 pts projected
   🔄 Tarkowski ➜ Alderete | +18.71 pts projected
   💹 Total Projected Gain (raw): +56.54 pts
   🧾 After Hit: +48.54 pts

--------------------------------------------------------------------------------
⚡ Option 890: 3 transfers (-8)
   🔄 King ➜ Stach | +15.77 pts projected
   🔄 Cucurella ➜ Alderete | +18.79 pts projected
   🔄 Xavi ➜ Gravenberch | +21.24 pts projected
   💹 Total Projected Gain (raw): +55.8