# Alpha-FPL: Risk-Aware Portfolio Optimization for Fantasy Premier League

This notebook runs the full Alpha-FPL pipeline on Google Colab with TPU/GPU support.

**Architecture:**
- Layer 1: Data Engineering (DefCon back-casting)
- Layer 2: Bayesian Inference (NumPyro on TPU)
- Layer 3: t-Copula Dependency Modeling
- Layer 4: Stochastic MIQP Optimization (Gurobi)

## Setup

In [None]:
# Install dependencies
!pip install -q polars pandas numpy scipy pyarrow
!pip install -q soccerdata understatapi thefuzz[speedup] requests aiohttp
!pip install -q jax[tpu] -f https://storage.googleapis.com/jax-releases/libtpu_releases.html
!pip install -q numpyro
!pip install -q cvxpy gurobipy
!pip install -q hydra-core omegaconf wandb
!pip install -q tqdm rich loguru

In [None]:
# Clone repository (if running in Colab)
!git clone https://github.com/YOUR_USERNAME/bayes_and_gaffer.git
%cd bayes_and_gaffer

In [None]:
# Set up Gurobi license (academic)
import os
os.environ['GRB_LICENSE_FILE'] = '/content/gurobi.lic'

# Upload your gurobi.lic file or set up Web License Service
# from google.colab import files
# files.upload()  # Upload gurobi.lic

In [None]:
# Verify TPU/GPU
import jax
print(f"JAX devices: {jax.devices()}")
print(f"Device count: {jax.device_count()}")

## Step 1: Data Ingestion

In [None]:
from src.data.ingest import DataIngester
from src.data.backcast import BackCaster, DefConConfig

# Configure ingester
ingester = DataIngester(
    raw_dir="data/raw",
    processed_dir="data/processed",
    mappings_path="data/meta/player_id_map.json",
    seasons=["2021-22", "2022-23", "2023-24", "2024-25"],
)

# Ingest FPL data
fpl_df = ingester.ingest_fpl_all_seasons()
print(f"FPL data: {len(fpl_df)} rows")

In [None]:
# Back-cast with DefCon rules
backcaster = BackCaster()
backcast_df = backcaster.backcast(fpl_df)

backcast_df.head()

## Step 2: Bayesian Model Training

In [None]:
from src.models.numpyro_model import HierarchicalPointsModel, ModelConfig
import jax

# Configure for TPU
config = ModelConfig(
    chains=4,
    samples=2000,
    warmup=500,
    device="tpu",  # or "gpu" or "cpu"
)

model = HierarchicalPointsModel(config=config)

In [None]:
# Prepare training data (exclude validation season)
train_df = backcast_df.filter(
    ~backcast_df["season"].is_in(["2024-25"])
)

train_data = model.prepare_data(train_df)
print(f"Training on {train_data['n_players']} players")

In [None]:
# Fit model (this will take a few minutes on TPU)
rng_key = jax.random.PRNGKey(42)
model.fit(train_data, rng_key=rng_key)

## Step 3: Copula Fitting

In [None]:
from src.models.copula import TCopulaEngine, CopulaConfig
import numpy as np

copula = TCopulaEngine(CopulaConfig(n_scenarios=1000))

# Generate predictions for training data
predictions = model.predict(train_data, rng_key=jax.random.PRNGKey(0))

# Fit copula on residuals
# (Simplified - in practice use full residual analysis)
print(f"Copula degrees of freedom: {copula.degrees_of_freedom}")

## Step 4: Optimization

In [None]:
from src.optimization.gurobi_solver import StochasticMIQPSolver, SolverConfig

solver = StochasticMIQPSolver(SolverConfig(
    time_limit=300,
    verbose=True,
))

In [None]:
# Get current player data
current_data = ingester.get_current_gameweek_data()

# Prepare for solver
solver_data = {
    "expected_points": np.random.uniform(2, 8, len(current_data)),  # Replace with model predictions
    "point_stds": np.ones(len(current_data)) * 2,
    "xmins_factors": np.ones(len(current_data)),
    "positions": np.array([{"GKP": 0, "DEF": 1, "MID": 2, "FWD": 3}[p] for p in current_data["position"].to_list()]),
    "teams": np.arange(len(current_data)) % 20,
    "prices": current_data["now_cost"].to_numpy() / 10,
}

# Solve for optimal team
solution = solver.solve_wildcard(solver_data, budget=100.0)
print(f"Solution status: {solution['status']}")
print(f"Objective value: {solution.get('objective_value', 0):.2f}")

## Step 5: Backtest (Walk-Forward)

In [None]:
from src.backtest.runner import BacktestRunner, BacktestConfig

backtester = BacktestRunner(
    model=model,
    copula=copula,
    solver=solver,
    config=BacktestConfig(
        start_gameweek=5,
        validation_seasons=["2024-25"],
        wandb_enabled=False,  # Enable for tracking
    )
)

# Run backtest
metrics = backtester.run_season(
    season="2024-25",
    player_data=backcast_df.filter(backcast_df["season"] == "2024-25"),
    results_data=backcast_df.filter(backcast_df["season"] == "2024-25"),
)

print("\n=== BACKTEST RESULTS ===")
for metric, value in metrics.items():
    print(f"{metric}: {value:.2f}")

## Hyperparameter Tuning

In [None]:
# Experiment with different risk aversion levels
lambda_values = [0.0, 0.1, 0.25, 0.5, 1.0]
results = []

for lam in lambda_values:
    solver_config = SolverConfig()
    solver_config.objective_config.risk_aversion_lambda = lam
    
    solver = StochasticMIQPSolver(solver_config)
    # ... run backtest
    # results.append((lam, total_points))

print("Lambda vs Points:")
for lam, pts in results:
    print(f"  Î»={lam:.2f}: {pts} points")