In [1]:
import os, time, json, requests, pandas as pd
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import constants as constants

In [2]:
itad_uuid = pd.read_csv("../data/merged_cleaned.csv")["itad_uuid"]

In [3]:
# Getting game history

API_KEY = constants.API_KEY
RELEASE_DATE_URL = "https://api.isthereanydeal.com/games/history/v2"
CACHE_DIR = "../data_raw/history"
OUT_CSV = "../data/game_history.parquet"
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)

def retry_session():
    s = requests.Session()
    r = Retry(total=5, backoff_factor=0.6,
             status_forcelist=[429, 500, 502, 503, 504],
             allowed_methods=["GET", "POST"])
    s.mount("https://", HTTPAdapter(max_retries=r))
    return s

def fetch_game_history(uuid, shops=61, since="2000-01-01T00:00:00Z", session=None):
    session = session or retry_session()
    cache_path = os.path.join(CACHE_DIR, f"{uuid}.json")
    if os.path.exists(cache_path):
        with open(cache_path, "r", encoding="utf-8") as f:
            return json.load(f)

    params = { 
        "key": API_KEY,      
        "id": uuid,
        "shops": shops,
        "since": since,
     }
    resp = session.get(RELEASE_DATE_URL, params=params, timeout=30)
    resp.raise_for_status()
    data = resp.json()

    with open(cache_path, "w", encoding="utf-8") as f:
        json.dump(data, f)
    return data

def append_rows(rows):
    df = pd.DataFrame(rows)
    if df.empty: return
    exists = os.path.exists(OUT_CSV)
    df.to_parquet(OUT_CSV, mode="a", index=False, header=not exists)

def process_all(uuids, rpm=60):
    session = retry_session()
    rows_buffer = []
    window = 60.0 / max(rpm, 1)
    for i, uid in enumerate(uuids, 1):
        try:
            data = fetch_game_history(uid, session=session)
            rows = []
            for entry in data:
                rows.append({
                    "itad_uuid": uid,
                    "timestamp": entry.get("timestamp"),
                    "shop_id": entry.get("shop", {}).get("id"),
                    "shop_name": entry.get("shop", {}).get("name"),
                    "price": entry.get("deal", {}).get("price", {}).get("amount"),
                    "regular": entry.get("deal", {}).get("regular", {}).get("amount"),
                    "currency": entry.get("deal", {}).get("price", {}).get("currency"),
                    "discount_pct": entry.get("deal", {}).get("cut"),
                })
            rows_buffer.extend(rows)
            
        except requests.HTTPError as e:
            print(f"[{i}/{len(uuids)}] {uid} -> HTTP {e.response.status_code}")
        except Exception as e:
            print(f"[{i}/{len(uuids)}] {uid} -> ERROR {e}")

        if i % 50 == 0 and rows_buffer:
            append_rows(rows_buffer)
            rows_buffer.clear()
        time.sleep(window)
        
    if rows_buffer:
        append_rows(rows_buffer)

In [4]:
uuids = itad_uuid.dropna().unique().tolist()
print(len(uuids))

4819


In [5]:
process_all(uuids, rpm=60)