In [None]:
import requests
import pandas as pd
from tqdm import tqdm
from pydantic import create_model

BASE_URL = "https://fantasy.premierleague.com/api/"

# may or may not need CORS

# relevant endpoints
ENDPOINTS = {
    "bootstrap_static": "bootstrap-static/",
    "element_summary": "element-summary/{player_id}/",
    "fixtures": "fixtures/",
    "gameweek": "gameweek/{gameweek}/",
}

In [3]:
def fetch_bootstrap_static():
    url = BASE_URL + ENDPOINTS["bootstrap_static"]
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")

In [None]:
# drop everything but the element_stats, element_types, and elements
def extract_relevant_data(data):
    element_stats = data.get("element_stats", [])
    element_types = data.get("element_types", [])
    elements = data.get("elements", [])
    
    return {
        "element_stats": element_stats,
        "element_types": element_types,
        "elements": elements
    }

In [5]:
relevant_data = extract_relevant_data(fetch_bootstrap_static())

In [6]:
relevant_stats = [x["name"] for x in relevant_data["element_stats"]]

In [7]:
historical_relevant_stats = ["season_name", "id", "total_points"] + relevant_stats + ["points_per_game"]

In [8]:
PlayerSeason = create_model(
    "PlayerSeason",
    **{field: (float, None) for field in historical_relevant_stats}
)

In [9]:
# structure of a season object in the 'history_past' field of a player
SEASON_STRUCTURE = {x: type(x) for x in historical_relevant_stats}

In [None]:
# ignore all but the following fields
player_info_fields = ["id", "first_name", "second_name", "birth_date"]

In [11]:
player_list = [{field: player[field] for field in player_info_fields} for player in relevant_data["elements"]]

In [None]:
def fetch_player_history(id):
    """Fetch history_past stats for a given player."""
    url = BASE_URL + ENDPOINTS["element_summary"].format(player_id=id)
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()
    return data.get("history_past", [])

def extract_relevant_fields(history_list, relevant_fields):
    """Extract only the relevant fields from the history list."""
    cleaned = []
    for season in history_list:
        filtered = {k: season.get(k) for k in relevant_fields}
        cleaned.append(filtered)
    return cleaned

def build_dataframe(player_list, relevant_fields):
    all_rows = []
    for player in tqdm(player_list, desc="Fetching player histories"):
        history = fetch_player_history(player['id'])
        relevant_stats = extract_relevant_fields(history, relevant_fields)
        for season in relevant_stats:
            season['player_id'] = player['id']
            season['first_name'] = player['first_name']
            season['second_name'] = player['second_name']
            season['birth_date'] = player['birth_date']
            all_rows.append(season)
    return pd.DataFrame(all_rows)

df = build_dataframe(player_list, historical_relevant_stats)

# Optional: inspect
print(df.head())

Fetching player histories: 100%|██████████| 658/658 [06:29<00:00,  1.69it/s]

  season_name    id  total_points  minutes  goals_scored  assists  \
0     2021/22  None            95     2160             0        0   
1     2022/23  None           166     3420             0        0   
2     2023/24  None           135     2880             0        0   
3     2024/25  None           142     3420             0        0   
4     2018/19  None           142     3240             0        0   

   clean_sheets  goals_conceded  own_goals  penalties_saved  ...  starts  \
0             8              27          0                0  ...       0   
1            12              46          1                0  ...      38   
2            16              24          0                1  ...      32   
3            13              34          0                0  ...      38   
4            14              39          0                1  ...       0   

   expected_goals  expected_assists  expected_goal_involvements  \
0            0.00              0.00                        0.




In [21]:
# save df to CSV
df.to_csv("fpl_player_history.csv", index=False)