In [24]:
import os
from dotenv import load_dotenv


load_dotenv()

API_KEY = os.getenv("API_FOOTBALL_KEY")

API_KEY
print(API_KEY[:5], "********")


126e2 ********


In [25]:
import requests

BASE_URL = "https://v3.football.api-sports.io"

HEADERS = {
    "x-apisports-key": API_KEY,
    "Accept": "application/json"}


In [26]:
url = f"{BASE_URL}/status"
response = requests.get(url, headers=HEADERS)
response.json()

{'get': 'status',
 'parameters': [],
 'errors': [],
 'results': 0,
 'paging': {'current': 1, 'total': 1},
 'response': {'account': {'firstname': 'Alan',
   'lastname': 'Watters',
   'email': 'alanwatters27@gmail.com'},
  'subscription': {'plan': 'Free',
   'end': '2027-01-27T00:00:00+00:00',
   'active': True},
  'requests': {'current': 0, 'limit_day': 100}}}

In [42]:
import time
import requests

def api_get(path, params=None, max_retries=8):
    url = f"{BASE_URL}{path}"

    for attempt in range(max_retries):
        r = requests.get(url, headers=HEADERS, params=params, timeout=30)
        r.raise_for_status()
        data = r.json()

        
        errors = data.get("errors") or {}
        if errors:
            
            if "rateLimit" in errors:
                wait = 10 + attempt * 5   # 10s, 15s, 20s...
                print(f"Rate limited. Waiting {wait}s then retrying... (attempt {attempt+1}/{max_retries})")
                time.sleep(wait)
                continue

            raise RuntimeError(f"API errors: {errors}")

        return data["response"]

    raise RuntimeError("Max retries exceeded (rate limit kept triggering).")


In [43]:
import pandas as pd
import time

def get_lineups(fixture_id):
    return api_get("/fixtures/lineups", params={"fixture": fixture_id})

def extract_formations(fixtures, league_name, season_year, sleep_s=6.6):
    rows = []

    for i, fx in enumerate(fixtures, start=1):
        fixture_id = fx["fixture"]["id"]
        date = fx["fixture"]["date"]
        home = fx["teams"]["home"]["name"]
        away = fx["teams"]["away"]["name"]

        lineups = get_lineups(fixture_id)

        for team in lineups:
            rows.append({
                "league": league_name,
                "season_year": season_year,
                "fixture_id": fixture_id,
                "date": date,
                "team": team["team"]["name"],
                "opponent": away if team["team"]["name"] == home else home,
                "formation": team.get("formation")
            })

        if i % 5 == 0:
            print(f"{league_name}: {i}/{len(fixtures)} fixtures processed")

        time.sleep(sleep_s)

    return pd.DataFrame(rows)


In [44]:
pl_formations_df = extract_formations(pl_last, "Premier League", pl_season, sleep_s=6.6)
bund_formations_df = extract_formations(bund_last, "Bundesliga", bund_season, sleep_s=6.6)

formations_df = pd.concat([pl_formations_df, bund_formations_df], ignore_index=True)
formations_df.head()


Premier League: 5/50 fixtures processed
Premier League: 10/50 fixtures processed
Premier League: 15/50 fixtures processed
Premier League: 20/50 fixtures processed
Premier League: 25/50 fixtures processed
Premier League: 30/50 fixtures processed
Premier League: 35/50 fixtures processed
Premier League: 40/50 fixtures processed
Premier League: 45/50 fixtures processed
Premier League: 50/50 fixtures processed
Bundesliga: 5/50 fixtures processed
Bundesliga: 10/50 fixtures processed
Bundesliga: 15/50 fixtures processed
Bundesliga: 20/50 fixtures processed
Bundesliga: 25/50 fixtures processed
Bundesliga: 30/50 fixtures processed
Bundesliga: 35/50 fixtures processed
Bundesliga: 40/50 fixtures processed
Bundesliga: 45/50 fixtures processed


RuntimeError: API errors: {'requests': 'You have reached the request limit for the day, Go to https://dashboard.api-football.com to upgrade your plan.'}

In [46]:
len(pl_formations_df)
pl_formations_df["fixture_id"].nunique()


50

In [49]:
if 'bund_formations_df' in globals():
    print(len(bund_formations_df), bund_formations_df["fixture_id"].nunique())


In [50]:
import os
os.makedirs("data/raw/api_football", exist_ok=True)

pl_formations_df.to_csv("data/raw/api_football/formations_last50_premier_league.csv", index=False)
pl_formations_df.to_json("data/raw/api_football/formations_last50_premier_league.json", orient="records", indent=2)


In [51]:
import json

with open("data/raw/api_football/bundesliga_last50_fixtures.json", "w", encoding="utf-8") as f:
    json.dump(bund_last, f, indent=2)


In [52]:
pl_formations_df.shape, pl_formations_df["fixture_id"].nunique()


((100, 7), 50)

In [54]:
bund_formations_df = extract_formations(
    bund_last,
    "Bundesliga",
    bund_season,
    sleep_s=6.6)

bund_formations_df.shape, bund_formations_df["fixture_id"].nunique()


Bundesliga: 5/50 fixtures processed
Bundesliga: 10/50 fixtures processed
Bundesliga: 15/50 fixtures processed
Bundesliga: 20/50 fixtures processed
Bundesliga: 25/50 fixtures processed
Bundesliga: 30/50 fixtures processed
Bundesliga: 35/50 fixtures processed
Bundesliga: 40/50 fixtures processed
Bundesliga: 45/50 fixtures processed
Bundesliga: 50/50 fixtures processed


((100, 7), 50)

In [57]:
import os

os.makedirs("api_football", exist_ok=True)

bund_formations_df.to_csv(
    "api_football/formations_last50_bundesliga.csv",
    index=False)


In [59]:
import pandas as pd

pl = pd.read_csv("api_football/formations_last50_premier_league.csv")
bund = pd.read_csv("api_football/formations_last50_bundesliga.csv")

formations_df = pd.concat([pl, bund], ignore_index=True)

formations_df.to_csv("../cleaned/formations_last50_pl_bundesliga.csv", index=False)
