In [1]:
import os, time, requests, pandas as pd
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import constants

In [2]:
LOOK_UP_URL = "https://api.isthereanydeal.com/lookup/id/title/v1" 
IN_CSV = '../data/new_game_list.csv'
API_KEY = constants.API_KEY
OUT_PARQUET = "../data/game_list.parquet"

games = pd.read_csv(IN_CSV)
titles = (games['title'].astype(str).str.strip().replace("", pd.NA).dropna().drop_duplicates().tolist())

def chunked(seq, n):
    for i in range(0, len(seq), n):
        yield seq[i: i+n]

def retry_session():
    s = requests.Session()
    r = Retry(total=5, backoff_factor=0.6,
             status_forcelist=[429, 500, 502, 503, 504],
             allowed_methods=["GET", "POST"])
    s.mount("https://", HTTPAdapter(max_retries=r))
    return s

session = retry_session()
params = params = { "key": constants.API_KEY }
mapping = {}
unresolved = []

BATCH = 50
for batch in chunked(titles, BATCH):
    resp = requests.post(LOOK_UP_URL, params=params, json=batch, timeout=30)
    resp.raise_for_status()
    data = resp.json()
    mapping.update(data)

    missing = set(batch) - set(data.keys())
    unresolved.extend(missing)
    time.sleep(0.2)
    
df = (pd.Series(mapping, name='itad_uuid').rename_axis('title').reset_index())

if os.path.exists(OUT_PARQUET):
    old = pd.read_parquet(OUT_PARQUET)
    combined = (pd.concat([old, df], ignore_index=True)
                  .drop_duplicates(subset=["title"], keep="last"))
    combined.to_parquet(OUT_PARQUET, index=False)
else:
    df.to_parquet(OUT_PARQUET, index=False)
if unresolved:
    pd.DataFrame({"title": sorted(unresolved)}).to_csv("data/unresolved_titles.csv", index=False)


In [4]:
game_list = pd.read_parquet('../data/game_list.parquet')