In [5]:
import os
import json
import math
from typing import Iterable, List, Dict, Optional, Set, Any
import requests
import calendar, datetime as dt

In [None]:
IGDB_BASE = "https://api.igdb.com/v4"
TWITCH_OAUTH = "https://id.twitch.tv/oauth2/token"
CLIENT_ID = '' # remove when upload to git
CLIENT_SECRET = ''
REQ_PAUSE_SEC = 0.35
# Pull up to 500 records per page (IGDB max).
PAGE_SIZE = 500
# PC (Microsoft Windows) platform id in IGDB
PLATFORM_PC = 6

In [3]:

# ---------- Auth ----------

def get_app_access_token(client_id: str, client_secret: str) -> str:
    """
    Fetch a Twitch App Access Token for IGDB.
    """
    resp = requests.post(
        TWITCH_OAUTH,
        params={
            "client_id": client_id,
            "client_secret": client_secret,
            "grant_type": "client_credentials",
        },
        timeout=30,
    )
    resp.raise_for_status()
    return resp.json()["access_token"]


def igdb_post(
    endpoint: str,
    query: str,
    client_id: str,
    access_token: str,
) -> List[Dict[str, Any]]:
    """
    POST a raw IGDB APIquery language (APICALYPSE) body to an endpoint.
    """
    url = f"{IGDB_BASE}/{endpoint.lstrip('/')}"
    headers = {
        "Client-ID": client_id,
        "Authorization": f"Bearer {access_token}",
    }
    r = requests.post(url, data=query, headers=headers, timeout=60)
    # 429 means rate limited; back off and retry once or twice:
    if r.status_code == 429:
        time.sleep(1.2)
        r = requests.post(url, data=query, headers=headers, timeout=60)
    r.raise_for_status()
    return r.json()


In [4]:

# ---------- Data fetchers ----------

def paginate_release_dates_for_platform(
    platform_id: int,
    client_id: str,
    access_token: str,
    date_from: Optional[int] = None,
    date_to: Optional[int] = None,
    region_filter: Optional[List[int]] = None,
) -> List[Dict[str, Any]]:
    """
    Pull all records from /release_dates for a platform, honoring optional date range (UNIX epoch seconds).
    Returns raw release_date rows (each has at least game, platform, date, region, etc.).
    """
    # Build the WHERE clause
    where_parts = [f"platform = {platform_id}", "game != null"]
    if date_from is not None:
        where_parts.append(f"date >= {date_from}")
    if date_to is not None:
        where_parts.append(f"date <= {date_to}")
    if region_filter:
        regions = ", ".join(str(r) for r in region_filter)
        where_parts.append(f"region = ({regions})")

    where_clause = " & ".join(where_parts)

    all_rows: List[Dict[str, Any]] = []
    offset = 0

    while True:
        q = (
            "fields game, platform, date, human, region, y, m, created_at, updated_at; "
            f"where {where_clause}; "
            f"sort date asc; "
            f"limit {PAGE_SIZE}; "
            f"offset {offset};"
        )
        chunk = igdb_post("release_dates", q, client_id, access_token)
        all_rows.extend(chunk)
        offset += PAGE_SIZE
        time.sleep(REQ_PAUSE_SEC)
        if len(chunk) < PAGE_SIZE:
            break

    return all_rows


def batched(iterable: Iterable[Any], size: int) -> Iterable[List[Any]]:
    batch: List[Any] = []
    for x in iterable:
        batch.append(x)
        if len(batch) >= size:
            yield batch
            batch = []
    if batch:
        yield batch


def hydrate_games(
    game_ids: Iterable[int],
    client_id: str,
    access_token: str,
) -> List[Dict[str, Any]]:
    """
    Given a set/list of game IDs, fetch game details in chunks.
    """
    ids = list(set(int(i) for i in game_ids))
    all_games: List[Dict[str, Any]] = []

    # IGDB lets 500 per request; we keep room for fields in body; still fine at 500.
    for chunk in batched(ids, 500):
        id_list = ", ".join(map(str, chunk))
        q = (
            "fields id, name, slug, first_release_date, "
            "platforms, genres.name, themes.name, "
            "aggregated_rating, total_rating, rating, rating_count, "
            "involved_companies.company.name, "
            "cover.image_id, cover.url, "
            "summary, storyline, websites.url; "
            f"where id = ({id_list}); "
            "limit 500;"
        )
        data = igdb_post("games", q, client_id, access_token)
        all_games.extend(data)
        time.sleep(REQ_PAUSE_SEC)

    return all_games

In [6]:
# ---------- Public helper ----------

def fetch_all_pc_games(
    date_from: Optional[int] = None,
    date_to: Optional[int] = None,
    out_path: str = "igdb_pc_games.json",
    include_release_rows: bool = False,
) -> Dict[str, Any]:
    """
    Fetch ALL games that have a release on PC (Microsoft Windows).
    Optional date range (UNIX seconds) lets you narrow to a window (e.g., upcoming).
    Saves JSON to out_path and returns a dict with metadata.
    """
    if not CLIENT_ID or not CLIENT_SECRET:
        raise RuntimeError(
            "Missing IGDB credentials. Set IGDB_CLIENT_ID and IGDB_CLIENT_SECRET environment variables."
        )

    token = get_app_access_token(CLIENT_ID, CLIENT_SECRET)

    # Step 1: page through release_dates for PC (optionally bounded by dates)
    release_rows = paginate_release_dates_for_platform(
        PLATFORM_PC, CLIENT_ID, token, date_from=date_from, date_to=date_to
    )

    game_ids: Set[int] = {row["game"] for row in release_rows if "game" in row and row["game"] is not None}

    # Step 2: hydrate game objects
    games = hydrate_games(game_ids, CLIENT_ID, token)

    payload = {
        "meta": {
            "platform_id": PLATFORM_PC,
            "platform_label": "PC (Microsoft Windows)",
            "total_release_rows": len(release_rows),
            "unique_game_ids": len(game_ids),
        },
        "games": games,
    }

    if include_release_rows:
        payload["release_dates_rows"] = release_rows

    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)

    return payload

In [7]:
start = int(dt.datetime(2010,1,1).timestamp())
end   = int(dt.datetime(2025,12,31,23,59,59).timestamp())
fetch_all_pc_games(date_from=start, date_to=end, out_path="igdb_pc_games.json")

{'meta': {'platform_id': 6,
  'platform_label': 'PC (Microsoft Windows)',
  'total_release_rows': 149608,
  'unique_game_ids': 142135},
 'games': [{'id': 262523,
   'cover': {'id': 325168,
    'image_id': 'co6ywg',
    'url': '//images.igdb.com/igdb/image/upload/t_thumb/co6ywg.jpg'},
   'first_release_date': 1692662400,
   'genres': [{'id': 34, 'name': 'Visual Novel'}],
   'name': 'Only in Passing',
   'platforms': [3, 6, 14],
   'slug': 'only-in-passing',
   'storyline': "That person on the train. That person in the café. That person whose gaze you meet for a moment...\n\nThese are the people you meet only in passing, then never to see again...\n\nCatch a glimpse of the world through Emma Sea's eyes. And meet the people she meets. What will Emma encounter on her commute home?",
   'summary': "A bite-sized, slice-of-life, indie visual novel with a lo-fi aesthetic. It's all about those brief encounters we have during our daily lives.\n\nThe scenarios are randomized. And the visual novel