In [None]:
import requests
import pandas as pd
from tqdm import tqdm
from pydantic import create_model

BASE_URL = "https://fantasy.premierleague.com/api/"

# relevant endpoints
ENDPOINTS = {
    "bootstrap_static": "bootstrap-static/",
    "element_summary": "element-summary/{player_id}/",
    "fixtures": "fixtures/",
    "gameweek": "gameweek/{gameweek}/",
}

In [2]:
def fetch_bootstrap_static():
    url = BASE_URL + ENDPOINTS["bootstrap_static"]
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")

In [3]:
# drop everything but the element_stats, element_types, and elements
def extract_relevant_data(data):
    element_stats = data.get("element_stats", [])
    element_types = data.get("element_types", [])
    elements = data.get("elements", [])
    
    return {
        "element_stats": element_stats,
        "element_types": element_types,
        "elements": elements
    }

In [4]:
relevant_data = extract_relevant_data(fetch_bootstrap_static())

In [5]:
relevant_stats = [x["name"] for x in relevant_data["element_stats"]]

In [6]:
historical_relevant_stats = ["season_name", "id", "total_points"] + relevant_stats + ["points_per_game"]

In [7]:
PlayerSeason = create_model(
    "PlayerSeason",
    **{field: (float, None) for field in historical_relevant_stats}
)

In [8]:
# structure of a season object in the 'history_past' field of a player
SEASON_STRUCTURE = {x: type(x) for x in historical_relevant_stats}

In [10]:
# ignore all but the following fields
player_info_fields = ["id", "first_name", "second_name", "birth_date"]

In [11]:
player_list = [{field: player[field] for field in player_info_fields} for player in relevant_data["elements"]]

In [22]:
player_list[0].keys()

dict_keys(['id', 'first_name', 'second_name', 'birth_date'])

In [25]:
def fetch_player_info(player, n_seasons=3):
    player_id = player["id"]
    url = (BASE_URL + ENDPOINTS["element_summary"]).format(player_id=player_id)
    response = requests.get(url)
    
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data for player {player_id}: {response.status_code}")
    
    history = response.json().get("history_past", [])
    return history[-n_seasons:]

fetch_player_info(player_list[380])

[{'season_name': '2022/23',
  'element_code': 118748,
  'start_cost': 130,
  'end_cost': 131,
  'total_points': 239,
  'minutes': 3290,
  'goals_scored': 19,
  'assists': 13,
  'clean_sheets': 13,
  'goals_conceded': 45,
  'own_goals': 0,
  'penalties_saved': 0,
  'penalties_missed': 2,
  'yellow_cards': 2,
  'red_cards': 0,
  'saves': 0,
  'bonus': 23,
  'bps': 651,
  'influence': '1067.4',
  'creativity': '899.2',
  'threat': '1688.0',
  'ict_index': '365.6',
  'defensive_contribution': 0,
  'starts': 37,
  'expected_goals': '21.01',
  'expected_assists': '7.03',
  'expected_goal_involvements': '28.04',
  'expected_goals_conceded': '47.47'},
 {'season_name': '2023/24',
  'element_code': 118748,
  'start_cost': 125,
  'end_cost': 134,
  'total_points': 211,
  'minutes': 2531,
  'goals_scored': 18,
  'assists': 12,
  'clean_sheets': 8,
  'goals_conceded': 30,
  'own_goals': 0,
  'penalties_saved': 0,
  'penalties_missed': 2,
  'yellow_cards': 2,
  'red_cards': 0,
  'saves': 0,
  'bonus

In [26]:
def build_player_row(player, n_seasons=3):
    """
    Builds a flattened dictionary of stats for the last `n_seasons` of a player.
    Uses a MultiIndex format for season-specific columns.
    """
    history = fetch_player_info(player["id"], n_seasons=n_seasons)
    
    # MultiIndex keys: (season_index, field_name)
    row = {}
    for i in range(n_seasons):
        season_data = history[i] if i < len(history) else {}
        for field in PLAYER_FIELDS:
            row[(f"season_{i+1}", field)] = season_data.get(field, None)
    
    # Add player-level identifiers at the top-level
    row[("player_info", "id")] = player["id"]
    row[("player_info", "name")] = player.get("web_name", "Unknown")
    
    return row

In [27]:
def build_all_players_df(player_list, n_seasons=3):
    """
    Builds a DataFrame of all players (one row per player) with MultiIndex columns.
    """
    rows = []
    for player in tqdm(player_list, desc="Fetching players"):
        try:
            player_row = build_player_row(player, n_seasons=n_seasons)
            rows.append(player_row)
        except Exception as e:
            print(f"Error with player {player.get('web_name', 'Unknown')}: {e}")
    
    df = pd.DataFrame(rows)
    df.columns = pd.MultiIndex.from_tuples(df.columns)
    return df