In [141]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import os
from dotenv import load_dotenv

from datetime import datetime, timezone

import os, time, math, json, sqlite3, requests
from datetime import datetime, timezone
from typing import List
from contextlib import closing

In [80]:
STEAM_APP_ID = 812140  # Example: Assassin's Creed Odyssey

#### Loading data from ITAD API

In [81]:
load_dotenv()
ITAD_API_KEY = os.getenv('ITAD_API_KEY')
if not ITAD_API_KEY:
    raise ValueError("Please set the ITAD_API_KEY environment variable.")

In [82]:
url_appID = "https://api.isthereanydeal.com/lookup/id/shop/61/v1"

payload = [f"app/{STEAM_APP_ID}"] # this API accepts a list of identifiers

headers = {
    "Authorization": f"key {ITAD_API_KEY}",
    "Content-Type": "application/json"
}

In [83]:
try:
    resp = requests.post(url_appID, headers=headers, json=payload, timeout=15)
    resp.raise_for_status()
except requests.exceptions.HTTPError as http_err:
    print("HTTP error:", http_err, "| Body:", getattr(resp, "text", ""))
except requests.exceptions.RequestException as req_err:
    print("Request error:", req_err)
else:
    game_id = resp.json()
    print("Raw JSON:", resp.json())

Raw JSON: {'app/812140': '018d937f-0184-7248-8d64-3c723c523111'}


In [84]:
ITAD_GAME_ID = game_id['app/812140']
print("ITAD Game ID:", ITAD_GAME_ID)

ITAD Game ID: 018d937f-0184-7248-8d64-3c723c523111


In [85]:
url_info = f"https://api.isthereanydeal.com/games/info/v2"

params2 = {"id": ITAD_GAME_ID, "key": ITAD_API_KEY}

In [86]:
try:
    r = requests.get(url_info, params=params2, timeout=15)
    r.raise_for_status()
    info = r.json()
    print("Game title:", info.get("title"))
    print("Tags:", info.get("tags"))
    print("Release date:", info.get("releaseDate"))
    print("Publisher:", info.get("publishers"))
    print("Players:", info.get("players"))
    print("Reviews:", info.get("reviews"))
except requests.HTTPError as e:
    print("HTTP error:", e, "| Body:", r.text)

Game title: Assassin's Creed Odyssey
Tags: ['Open World', 'RPG', 'Singleplayer', 'Historical', 'Action']
Release date: 2018-10-03
Publisher: [{'id': 61, 'name': 'Ubisoft'}]
Players: {'recent': 9400, 'day': 10837, 'week': 10837, 'peak': 61984}
Reviews: [{'score': 89, 'source': 'Steam', 'count': 158914, 'url': 'https://store.steampowered.com/app/812140/'}, {'score': 86, 'source': 'Metascore', 'count': 14, 'url': 'https://metacritic.com/game/assassins-creed-odyssey/critic-reviews/?platform=pc'}, {'score': 69, 'source': 'Metacritic User Score', 'count': 5304, 'url': 'https://metacritic.com/game/assassins-creed-odyssey/user-reviews/?platform=pc'}, {'score': 84, 'source': 'OpenCritic', 'count': 169, 'url': 'https://opencritic.com/game/6222/assassins-creed-odyssey'}]


In [91]:
url_history = "https://api.isthereanydeal.com/games/history/v2"

params = {
    "key": ITAD_API_KEY,
    "id": ITAD_GAME_ID,
    "country": "us",
    "shops": "61",
    "since": "2018-01-01T00:00:00Z"
}

In [92]:
try:
    r = requests.get(url_history, params=params, timeout=15)
    print("Request URL:", r.url)
    r.raise_for_status()
    history = r.json()
    print("Price history data:", history)
except requests.HTTPError as e:
    print("HTTP error:", e, "| Body:", r.text)
except requests.RequestException as e:
    print("Request error:", e)

Request URL: https://api.isthereanydeal.com/games/history/v2?key=6e83d100e0379ecd168d625928bec6a244dd08d4&id=018d937f-0184-7248-8d64-3c723c523111&country=us&shops=61&since=2018-01-01T00%3A00%3A00Z
Price history data: [{'timestamp': '2025-09-01T19:16:32+02:00', 'shop': {'id': 61, 'name': 'Steam'}, 'deal': {'price': {'amount': 8.99, 'amountInt': 899, 'currency': 'USD'}, 'regular': {'amount': 59.99, 'amountInt': 5999, 'currency': 'USD'}, 'cut': 85}}, {'timestamp': '2025-07-27T19:15:28+02:00', 'shop': {'id': 61, 'name': 'Steam'}, 'deal': {'price': {'amount': 59.99, 'amountInt': 5999, 'currency': 'USD'}, 'regular': {'amount': 59.99, 'amountInt': 5999, 'currency': 'USD'}, 'cut': 0}}, {'timestamp': '2025-07-20T19:15:45+02:00', 'shop': {'id': 61, 'name': 'Steam'}, 'deal': {'price': {'amount': 11.99, 'amountInt': 1199, 'currency': 'USD'}, 'regular': {'amount': 59.99, 'amountInt': 5999, 'currency': 'USD'}, 'cut': 80}}, {'timestamp': '2025-07-10T19:33:58+02:00', 'shop': {'id': 61, 'name': 'Steam'

In [96]:
from datetime import datetime, timezone
from typing import List, Dict, Optional

def _parse_ts(ts: str) -> datetime:
    # handles both ...Z and timezone offsets like +02:00; returns UTC
    dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
    return dt.astimezone(timezone.utc)

def _dedupe_same_timestamp(events: List[Dict]) -> List[Dict]:
    """
    For events sharing the same timestamp, keep the one with the largest cut.
    This avoids double-logs from scraper updates at identical times.
    """
    bucket = {}
    for e in events:
        t = e["timestamp"]
        cut = (e.get("deal") or {}).get("cut") or 0
        if (t not in bucket) or (cut > ((bucket[t].get("deal") or {}).get("cut") or 0)):
            bucket[t] = e
    return [bucket[t] for t in sorted(bucket.keys(), key=_parse_ts)]

def find_first_sale(
    history: List[Dict],
    release_dt: Optional[datetime] = None,
    min_cut: int = 5,
    post_launch_only: bool = True,
) -> Optional[Dict]:
    """
    history: full list from ITAD /games/history/v2 (already filtered to shop=61, country fixed)
    release_dt: UTC datetime (from /games/info/v2). If None, no pre-release filtering is applied.
    min_cut: minimum % discount to consider a sale
    post_launch_only: ignore events strictly before release_dt if True
    returns dict with keys: timestamp (UTC dt), cut, price, regular  OR None if no sale
    """
    if not history:
        return None

    # normalize + sort
    events_sorted = sorted(history, key=lambda e: _parse_ts(e["timestamp"]))
    # dedupe by identical timestamp (keep max cut)
    events_sorted = _dedupe_same_timestamp(events_sorted)

    for e in events_sorted:
        ts = _parse_ts(e["timestamp"])
        if post_launch_only and release_dt is not None and ts < release_dt:
            continue

        deal = e.get("deal") or {}
        cut = deal.get("cut") or 0
        price = (deal.get("price") or {}).get("amount")
        regular = (deal.get("regular") or {}).get("amount")

        # sanity checks for a true sale
        if cut >= min_cut and price is not None and regular is not None and price < regular:
            return {
                "timestamp": ts,
                "cut": int(cut),
                "price": float(price),
                "regular": float(regular),
            }

    return None

def days_between(a: datetime, b: datetime) -> int:
    return (b - a).days

In [98]:
# 1) get release date (UTC) from /games/info/v2
release_iso = info.get("releaseDate")  # e.g., "2025-08-20T00:00:00Z"
release_dt = datetime.fromisoformat(release_iso.replace("Z", "+00:00")).astimezone(timezone.utc)

# 2) get full history list from /games/history/v2 (shop=61, country consistent)
events = history if isinstance(history, list) else history.get("history", [])

# 3) compute label
first_sale = find_first_sale(events, release_dt=release_dt, min_cut=5, post_launch_only=True)

if first_sale:
    label_days = days_between(release_dt, first_sale["timestamp"])
    print({
        "first_sale_date": first_sale["timestamp"].date().isoformat(),
        "first_sale_cut": first_sale["cut"],
        "first_sale_price": first_sale["price"],
        "first_sale_regular": first_sale["regular"],
        "days_to_first_sale": label_days,
    })
else:
    print({"first_sale_date": None, "days_to_first_sale": None, "censored": True})

{'first_sale_date': '2018-11-21', 'first_sale_cut': 33, 'first_sale_price': 40.19, 'first_sale_regular': 59.99, 'days_to_first_sale': 49}


In [99]:
kaggle_data = pd.read_csv("Kaggle datasets/93182_steam_games.csv")
kaggle_data.head()

  kaggle_data = pd.read_csv("Kaggle datasets/93182_steam_games.csv")


Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,1424640,余烬,"Oct 3, 2020",20000 - 50000,0,0,3.99,0,'Ashes of war' is an anti war theme adventure ...,['Simplified Chinese'],...,0,0,0,宁夏华夏西部影视城有限公司,宁夏华夏西部影视城有限公司,"Single-player,Family Sharing","Adventure,Casual,Indie,RPG","Sokoban,RPG,Puzzle-Platformer,Exploration,Adve...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
1,402890,Nyctophilia,"Sep 23, 2015",50000 - 100000,0,0,0.0,0,NYCTOPHILIA Nyctophilia is an 2D psychological...,"['English', 'Russian']",...,0,0,0,Cat In A Jar Games,Cat In A Jar Games,Single-player,"Adventure,Free To Play,Indie","Free to Play,Indie,Adventure,Horror,2D,Pixel G...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
2,1151740,Prison Princess,"Apr 2, 2020",0 - 20000,0,0,19.99,0,"ABOUT Now nothing more than a phantom, can the...","['English', 'Simplified Chinese', 'Traditional...",...,0,0,0,qureate,qureate,"Single-player,Steam Achievements,Full controll...","Adventure,Indie","Sexual Content,Adventure,Indie,Nudity,Anime,Ma...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
3,875530,Dead In Time,"Oct 12, 2018",0 - 20000,0,0,7.99,0,Is a hardcore action with a non-trivial level ...,"['English', 'Russian']",...,0,0,0,Zelenov Artem,Zelenov Artem,"Single-player,Full controller support,Family S...","Action,Indie","Action,Indie,Souls-like,Fantasy,Early Access,R...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
4,1835360,Panacle: Back To Wild,"Mar 11, 2022",0 - 20000,2,0,3.99,0,Panacle: Back to the Wild is a indie card game...,"['English', 'Japanese', 'Simplified Chinese', ...",...,0,0,0,渡鸦游戏,"渡鸦游戏,电钮组","Single-player,Family Sharing","Indie,Strategy,Early Access","Trading Card Game,Turn-Based Strategy,Lore-Ric...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...


In [104]:
kaggle_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93182 entries, 0 to 93181
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   AppID                       93182 non-null  int64  
 1   Name                        93179 non-null  object 
 2   Release date                93182 non-null  object 
 3   Estimated owners            16462 non-null  object 
 4   Peak CCU                    93182 non-null  int64  
 5   Required age                93182 non-null  int64  
 6   Price                       93182 non-null  float64
 7   DLC count                   93182 non-null  int64  
 8   About the game              88392 non-null  object 
 9   Supported languages         93182 non-null  object 
 10  Full audio languages        93182 non-null  object 
 11  Reviews                     10599 non-null  object 
 12  Header image                93182 non-null  object 
 13  Website                     416

In [105]:
kaggle_data['AppID'].nunique()

93182

In [107]:
kaggle_data["Release date"] = pd.to_datetime(kaggle_data["Release date"], format="%b %d, %Y", errors="coerce")

In [108]:
kaggle_data.head()

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,1424640,余烬,2020-10-03,20000 - 50000,0,0,3.99,0,'Ashes of war' is an anti war theme adventure ...,['Simplified Chinese'],...,0,0,0,宁夏华夏西部影视城有限公司,宁夏华夏西部影视城有限公司,"Single-player,Family Sharing","Adventure,Casual,Indie,RPG","Sokoban,RPG,Puzzle-Platformer,Exploration,Adve...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
1,402890,Nyctophilia,2015-09-23,50000 - 100000,0,0,0.0,0,NYCTOPHILIA Nyctophilia is an 2D psychological...,"['English', 'Russian']",...,0,0,0,Cat In A Jar Games,Cat In A Jar Games,Single-player,"Adventure,Free To Play,Indie","Free to Play,Indie,Adventure,Horror,2D,Pixel G...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
2,1151740,Prison Princess,2020-04-02,0 - 20000,0,0,19.99,0,"ABOUT Now nothing more than a phantom, can the...","['English', 'Simplified Chinese', 'Traditional...",...,0,0,0,qureate,qureate,"Single-player,Steam Achievements,Full controll...","Adventure,Indie","Sexual Content,Adventure,Indie,Nudity,Anime,Ma...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
3,875530,Dead In Time,2018-10-12,0 - 20000,0,0,7.99,0,Is a hardcore action with a non-trivial level ...,"['English', 'Russian']",...,0,0,0,Zelenov Artem,Zelenov Artem,"Single-player,Full controller support,Family S...","Action,Indie","Action,Indie,Souls-like,Fantasy,Early Access,R...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
4,1835360,Panacle: Back To Wild,2022-03-11,0 - 20000,2,0,3.99,0,Panacle: Back to the Wild is a indie card game...,"['English', 'Japanese', 'Simplified Chinese', ...",...,0,0,0,渡鸦游戏,"渡鸦游戏,电钮组","Single-player,Family Sharing","Indie,Strategy,Early Access","Trading Card Game,Turn-Based Strategy,Lore-Ric...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...


In [116]:
kaggle_data[(kaggle_data['Release date'] > '2021-01-01') & (kaggle_data['Release date'] < '2024-12-31')]

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
4,1835360,Panacle: Back To Wild,2022-03-11,0 - 20000,2,0,3.99,0,Panacle: Back to the Wild is a indie card game...,"['English', 'Japanese', 'Simplified Chinese', ...",...,0,0,0,渡鸦游戏,"渡鸦游戏,电钮组","Single-player,Family Sharing","Indie,Strategy,Early Access","Trading Card Game,Turn-Based Strategy,Lore-Ric...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
7,2604580,THE JUSOU 3,2024-02-20,0 - 20000,1,0,9.99,0,■ The Ultimate Horror Escape Game 'THE JUSOU -...,"['Japanese', 'English', 'Simplified Chinese', ...",...,0,0,0,株式会社Metaware,株式会社Metaware,"Single-player,Family Sharing","Adventure,Casual,Indie","Exploration,Puzzle,Female Protagonist,2D,3D,Ho...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
12,2261280,WHITE WATER,2023-01-16,0 - 20000,0,0,1.99,0,Sincerely BLACK COFFEE.,['English'],...,0,0,0,Black Coffee,Black Coffee,"Single-player,Family Sharing","Action,Adventure,Indie,RPG","Adventure,RPG,Interactive Fiction,Action-Adven...",https://shared.akamai.steamstatic.com/store_it...,
13,2325490,Minamochi Factory,2023-06-22,0 - 0,0,0,0.00,0,You can be a master of inspection from today! ...,"['English', 'Japanese']",...,0,0,0,ダイスマン,ダイスマン,"Single-player,Steam Achievements,Steam Cloud","Action,Casual,Free To Play,Indie",,https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
15,2214650,Rolando Deluxe,2024-04-17,0 - 20000,0,0,9.99,0,Roll into action and lead a lovable band of Ro...,"['English', 'French', 'German', 'Spanish - Spa...",...,0,0,0,HandCircus,HandCircus,"Single-player,Steam Achievements,Full controll...","Action,Casual,Indie","2D Platformer,Puzzle-Platformer,Cartoony,Physi...",https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93176,2452810,"Alien IQ Exam: Human Edition, Phase 1",2023-07-06,,0,0,12.99,0,Are humans worthy? Take up the challenge by so...,['English'],...,0,0,0,Super RR Man Productions,Super RR Man Productions,"Single-player,Family Sharing",Casual,,https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
93177,2251030,Mutant Monty (C64/CPC/Spectrum),2023-01-05,,0,0,4.99,0,Originally released in 1984 for home microcomp...,['English'],...,0,0,0,Artic Computing,Pixel Games UK,"Single-player,Partial Controller Support,Steam...",Action,,https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
93179,1844230,Malicious ReloadⅡ,2023-09-05,,0,0,5.99,0,★ To ensure that the game you have purchased w...,"['Japanese', 'English', 'Simplified Chinese', ...",...,0,0,0,UNDER HILL,Playmeow,"Single-player,Family Sharing","Action,Adventure,Simulation",,https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...
93180,2623690,Mutant Frog,2024-01-27,,0,0,0.99,0,As a result of an unknown meteorite hitting an...,['English'],...,0,0,0,Run-O Games,Run-O Games,"Single-player,Family Sharing","Action,Adventure,Casual,Indie",,https://shared.akamai.steamstatic.com/store_it...,http://video.akamai.steamstatic.com/store_trai...


### I am grabbing all the Steam AppIDs from the 2021 upto 2024

In [118]:
filtered = kaggle_data[(kaggle_data['Release date'] > '2021-01-01') &
                       (kaggle_data['Release date'] < '2024-12-31')]

appids = filtered['AppID'].dropna().astype(int).drop_duplicates().tolist()

### Building the database in SQL Lite

In [121]:
ITAD_API_KEY = os.getenv("ITAD_API_KEY")  # or paste your key here temporarily
assert ITAD_API_KEY, "Set ITAD_API_KEY in your environment (e.g., .env)!"

DB_PATH = "data/steam_sales.db"
ITAD_LOOKUP_URL = "https://api.isthereanydeal.com/lookup/id/shop/61/v1"  # 61 = Steam

def utcnow_iso():
    return datetime.now(timezone.utc).isoformat().replace("+00:00","Z")

def get_conn(path=DB_PATH):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    conn = sqlite3.connect(path)
    conn.execute("PRAGMA foreign_keys = ON;")
    conn.execute("PRAGMA journal_mode = WAL;")
    conn.execute("PRAGMA synchronous = NORMAL;")
    return conn

In [122]:
with get_conn() as conn:
    conn.execute("""
    CREATE TABLE IF NOT EXISTS seeds (
        appid      INTEGER PRIMARY KEY,
        itad_id    TEXT,
        status     TEXT,         -- 'mapped', 'not_found', 'error'
        updated_at TEXT
    );
    """)
    conn.commit()

In [123]:
def chunked(seq: List[int], size: int):
    for i in range(0, len(seq), size):
        yield seq[i:i+size]

def upsert_seed_rows(rows):
    """
    rows: list of dicts with keys: appid, itad_id, status, updated_at
    """
    with get_conn() as conn:
        conn.executemany("""
        INSERT INTO seeds (appid, itad_id, status, updated_at)
        VALUES (:appid, :itad_id, :status, :updated_at)
        ON CONFLICT(appid) DO UPDATE SET
            itad_id=excluded.itad_id,
            status=excluded.status,
            updated_at=excluded.updated_at;
        """, rows)
        conn.commit()

In [124]:
def lookup_itad_ids_for_appids(appids: List[int], api_key: str, retry=3, sleep_sec=1.2):
    """
    Calls POST /lookup/id/shop/61/v1 with body ["app/<id>", ...]
    Returns: dict mapping "app/<id>" -> itad_id (or None if not found)
    """
    body = [f"app/{a}" for a in appids]
    params = {"key": api_key}  # query-param auth (simple & works)
    
    for attempt in range(1, retry+1):
        try:
            r = requests.post(ITAD_LOOKUP_URL, params=params, json=body, timeout=20)
            # Helpful for debugging:
            # print("Status:", r.status_code, "Remaining:", r.headers.get("X-RateLimit-Remaining"))
            r.raise_for_status()
            data = r.json()
            # data example: {"app/570":"018d...","app/440":null,...}
            return data
        except requests.HTTPError as e:
            # Backoff on rate limits or transient errors
            if r is not None and r.status_code in (429, 500, 502, 503, 504):
                time.sleep(sleep_sec * attempt)
                continue
            raise
        finally:
            # gentle pacing to respect ~1 r/s recommendation
            time.sleep(sleep_sec)
    return {}

In [125]:
BATCH_SIZE = 40   # 25–50 is a good range
results_summary = {"mapped": 0, "not_found": 0}

for batch in chunked(appids, BATCH_SIZE):
    mapping = lookup_itad_ids_for_appids(batch, ITAD_API_KEY)
    now = utcnow_iso()
    rows = []

    # Build rows for SQLite
    for a in batch:
        key = f"app/{a}"
        val = mapping.get(key, None)
        if isinstance(val, str) and val.strip():
            rows.append({"appid": a, "itad_id": val, "status": "mapped", "updated_at": now})
            results_summary["mapped"] += 1
        else:
            rows.append({"appid": a, "itad_id": None, "status": "not_found", "updated_at": now})
            results_summary["not_found"] += 1

    upsert_seed_rows(rows)

print("Done. Summary:", results_summary)

KeyboardInterrupt: 

In [134]:
import sqlite3, pandas as pd
conn = sqlite3.connect("data/steam_sales.db")

# What tables exist?
pd.read_sql("SELECT * FROM seeds;", conn)

Unnamed: 0,appid,itad_id,status,updated_at
0,8980,018d937f-1d1f-73fa-9019-5296f63a702f,mapped,2025-09-14T20:57:18.186903Z
1,261550,018d937f-2b82-710b-be7f-7324eaa1d8d5,mapped,2025-09-14T20:58:46.695843Z
2,269190,018d937f-1b14-70f7-ba7d-e0d4c3f425d5,mapped,2025-09-14T20:59:59.492887Z
3,280720,018d937f-0845-7221-bb7a-ef2f7b17c451,mapped,2025-09-14T20:58:13.268206Z
4,314430,018d937f-2a0e-70d6-ac76-ce759fc93de3,mapped,2025-09-14T20:56:51.464444Z
...,...,...,...,...
7075,3168000,,not_found,2025-09-14T20:59:00.305714Z
7076,3168050,,not_found,2025-09-14T20:56:31.316981Z
7077,3171800,,not_found,2025-09-14T20:58:42.723809Z
7078,3173490,,not_found,2025-09-14T20:56:28.498163Z


In [136]:
import sqlite3, pandas as pd
conn = sqlite3.connect("data/steam_sales.db")

done = set(pd.read_sql("SELECT appid FROM seeds;", conn)['appid'])
conn.close()

remaining = [a for a in appids if a not in done]
len(remaining), remaining[:10]  # count + sample

(46046,
 [2476720,
  2011080,
  2228960,
  1569550,
  1681480,
  1523420,
  2622850,
  1658830,
  2363060,
  2542050])

In [137]:
BATCH_SIZE = 40   # 25–50 is a good range
results_summary = {"mapped": 0, "not_found": 0}

for batch in chunked(remaining, BATCH_SIZE):
    mapping = lookup_itad_ids_for_appids(batch, ITAD_API_KEY)
    now = utcnow_iso()
    rows = []

    # Build rows for SQLite
    for a in batch:
        key = f"app/{a}"
        val = mapping.get(key, None)
        if isinstance(val, str) and val.strip():
            rows.append({"appid": a, "itad_id": val, "status": "mapped", "updated_at": now})
            results_summary["mapped"] += 1
        else:
            rows.append({"appid": a, "itad_id": None, "status": "not_found", "updated_at": now})
            results_summary["not_found"] += 1

    upsert_seed_rows(rows)

print("Done. Summary:", results_summary)

Done. Summary: {'mapped': 43306, 'not_found': 2740}


In [139]:
conn = sqlite3.connect("data/steam_sales.db")

pd.read_sql("SELECT * FROM seeds;", conn)

Unnamed: 0,appid,itad_id,status,updated_at
0,7800,018d937f-2eb7-7037-82d1-2fab5fddf3d4,mapped,2025-09-14T21:22:13.620948Z
1,8980,018d937f-1d1f-73fa-9019-5296f63a702f,mapped,2025-09-14T20:57:18.186903Z
2,11550,018d937f-35ee-7204-ad6c-ae2e2639119f,mapped,2025-09-14T21:21:58.576307Z
3,24880,018d937e-ffc5-7152-8d6f-33f59b72cdb4,mapped,2025-09-14T21:23:56.160025Z
4,226280,018d937f-4ef3-7168-994d-7968084ab1a0,mapped,2025-09-14T21:13:30.499439Z
...,...,...,...,...
53121,3193740,,not_found,2025-09-14T21:24:44.441926Z
53122,3194700,,not_found,2025-09-14T21:22:59.628007Z
53123,3195690,0191aab8-da1b-7047-86be-bf0466fd99fc,mapped,2025-09-14T21:20:31.276811Z
53124,3196990,,not_found,2025-09-14T21:19:10.907455Z


In [142]:
DB_PATH = "data/steam_sales.db"

with closing(sqlite3.connect(DB_PATH)) as conn:
    conn.executescript("""
    PRAGMA foreign_keys = ON;

    CREATE TABLE IF NOT EXISTS games (
      itad_id      TEXT PRIMARY KEY,
      appid        INTEGER,
      title        TEXT,
      type         TEXT,
      release_date TEXT,     -- ISO date
      early_access INTEGER,  -- 0/1
      mature       INTEGER   -- 0/1
    );

    CREATE TABLE IF NOT EXISTS publishers (
      publisher_id INTEGER PRIMARY KEY,
      name         TEXT UNIQUE
    );
    CREATE TABLE IF NOT EXISTS game_publishers (
      itad_id TEXT,
      publisher_id INTEGER,
      PRIMARY KEY (itad_id, publisher_id),
      FOREIGN KEY (itad_id) REFERENCES games(itad_id) ON DELETE CASCADE,
      FOREIGN KEY (publisher_id) REFERENCES publishers(publisher_id) ON DELETE CASCADE
    );

    CREATE TABLE IF NOT EXISTS developers (
      developer_id INTEGER PRIMARY KEY,
      name         TEXT UNIQUE
    );
    CREATE TABLE IF NOT EXISTS game_developers (
      itad_id TEXT,
      developer_id INTEGER,
      PRIMARY KEY (itad_id, developer_id),
      FOREIGN KEY (itad_id) REFERENCES games(itad_id) ON DELETE CASCADE,
      FOREIGN KEY (developer_id) REFERENCES developers(developer_id) ON DELETE CASCADE
    );

    CREATE TABLE IF NOT EXISTS tags (
      tag_id INTEGER PRIMARY KEY,
      name   TEXT UNIQUE
    );
    CREATE TABLE IF NOT EXISTS game_tags (
      itad_id TEXT,
      tag_id  INTEGER,
      PRIMARY KEY (itad_id, tag_id),
      FOREIGN KEY (itad_id) REFERENCES games(itad_id) ON DELETE CASCADE,
      FOREIGN KEY (tag_id) REFERENCES tags(tag_id) ON DELETE CASCADE
    );

    CREATE TABLE IF NOT EXISTS reviews (
      itad_id TEXT,
      source  TEXT,
      score   INTEGER,
      count   INTEGER,
      url     TEXT,
      PRIMARY KEY (itad_id, source),
      FOREIGN KEY (itad_id) REFERENCES games(itad_id) ON DELETE CASCADE
    );

    CREATE TABLE IF NOT EXISTS players (
      itad_id TEXT PRIMARY KEY,
      recent  INTEGER,
      day     INTEGER,
      week    INTEGER,
      peak    INTEGER,
      FOREIGN KEY (itad_id) REFERENCES games(itad_id) ON DELETE CASCADE
    );

    CREATE INDEX IF NOT EXISTS idx_games_release ON games(release_date);
    """)
    conn.commit()

In [146]:
import pandas as pd

with closing(sqlite3.connect(DB_PATH)) as conn:
    pending = pd.read_sql("""
      SELECT s.itad_id
      FROM seeds s
      LEFT JOIN games g ON g.itad_id = s.itad_id
      WHERE s.status='mapped' AND s.itad_id IS NOT NULL AND g.itad_id IS NULL
    """, conn)['itad_id'].tolist()

len(pending), pending[:5]

(50003,
 ['018d937f-2eb7-7037-82d1-2fab5fddf3d4',
  '018d937f-1d1f-73fa-9019-5296f63a702f',
  '018d937f-35ee-7204-ad6c-ae2e2639119f',
  '018d937e-ffc5-7152-8d6f-33f59b72cdb4',
  '018d937f-4ef3-7168-994d-7968084ab1a0'])

In [148]:
ITAD_API_KEY = os.getenv("ITAD_API_KEY")
assert ITAD_API_KEY, "Set ITAD_API_KEY in your environment!"

INFO_URL = "https://api.isthereanydeal.com/games/info/v2"

def fetch_info(itad_id, retry=3, sleep=1.0):
    params = {"key": ITAD_API_KEY, "id": itad_id}
    for attempt in range(1, retry+1):
        try:
            r = requests.get(INFO_URL, params=params, timeout=15)
            r.raise_for_status()
            return r.json()
        except requests.HTTPError as e:
            if r.status_code in (429, 500, 502, 503, 504):
                time.sleep(sleep * attempt)
                continue
            raise

In [149]:
def upsert_game(conn, info):
    # keep only 2021–2024 window and type == 'game'
    if info.get("type") != "game":
        return False
    rd = info.get("releaseDate")
    if not rd: 
        return False
    year = int(rd[:4])
    if year < 2021 or year > 2024:
        return False

    conn.execute("""
      INSERT INTO games (itad_id, appid, title, type, release_date, early_access, mature)
      VALUES (?, ?, ?, ?, ?, ?, ?)
      ON CONFLICT(itad_id) DO UPDATE SET
        appid=excluded.appid, title=excluded.title, type=excluded.type,
        release_date=excluded.release_date, early_access=excluded.early_access, mature=excluded.mature
    """, (
        info["id"],
        info.get("appid"),
        info.get("title"),
        info.get("type"),
        rd,
        int(bool(info.get("earlyAccess"))),
        int(bool(info.get("mature")))
    ))
    return True

def _id_for_name(conn, table, name_col, name_val):
    cur = conn.execute(f"INSERT OR IGNORE INTO {table} ({name_col}) VALUES (?);", (name_val,))
    # fetch id
    row = conn.execute(f"SELECT rowid AS id FROM {table} WHERE {name_col}=?;", (name_val,)).fetchone()
    return row[0]

def upsert_publishers(conn, itad_id, pubs):
    for p in pubs or []:
        pid = _id_for_name(conn, "publishers", "name", p.get("name"))
        conn.execute("INSERT OR IGNORE INTO game_publishers (itad_id, publisher_id) VALUES (?,?);", (itad_id, pid))

def upsert_developers(conn, itad_id, devs):
    for d in devs or []:
        did = _id_for_name(conn, "developers", "name", d.get("name"))
        conn.execute("INSERT OR IGNORE INTO game_developers (itad_id, developer_id) VALUES (?,?);", (itad_id, did))

def upsert_tags(conn, itad_id, tags):
    for t in tags or []:
        tid = _id_for_name(conn, "tags", "name", t)
        conn.execute("INSERT OR IGNORE INTO game_tags (itad_id, tag_id) VALUES (?,?);", (itad_id, tid))

def upsert_reviews(conn, itad_id, reviews):
    for r in reviews or []:
        conn.execute("""
          INSERT INTO reviews (itad_id, source, score, count, url)
          VALUES (?,?,?,?,?)
          ON CONFLICT(itad_id, source) DO UPDATE SET
            score=excluded.score, count=excluded.count, url=excluded.url
        """, (itad_id, r.get("source"), r.get("score"), r.get("count"), r.get("url")))

def upsert_players(conn, itad_id, players):
    if not players: 
        return
    conn.execute("""
      INSERT INTO players (itad_id, recent, day, week, peak)
      VALUES (?,?,?,?,?)
      ON CONFLICT(itad_id) DO UPDATE SET
        recent=excluded.recent, day=excluded.day, week=excluded.week, peak=excluded.peak
    """, (itad_id, players.get("recent"), players.get("day"), players.get("week"), players.get("peak")))

In [150]:
processed = 0
to_do = pending  # list from 2.1
BATCH_SLEEP = 0.6

with closing(sqlite3.connect(DB_PATH)) as conn:
    conn.execute("PRAGMA foreign_keys = ON;")
    for itad_id in to_do:
        info = fetch_info(itad_id)
        if not info:
            continue

        conn.execute("BEGIN;")
        inserted = upsert_game(conn, info)
        if inserted:
            upsert_publishers(conn, itad_id, info.get("publishers"))
            upsert_developers(conn, itad_id, info.get("developers"))
            upsert_tags(conn, itad_id, info.get("tags"))
            upsert_reviews(conn, itad_id, info.get("reviews"))
            upsert_players(conn, itad_id, info.get("players"))
        conn.commit()

        processed += 1
        if processed % 100 == 0:
            print(f"Processed {processed}/{len(to_do)}")
        time.sleep(BATCH_SLEEP)

print("Done:", processed)

Processed 100/50003
Processed 200/50003
Processed 300/50003
Processed 400/50003
Processed 500/50003
Processed 600/50003
Processed 700/50003
Processed 800/50003
Processed 900/50003
Processed 1000/50003
Processed 1100/50003
Processed 1200/50003
Processed 1300/50003
Processed 1400/50003
Processed 1500/50003
Processed 1600/50003
Processed 1700/50003
Processed 1800/50003
Processed 1900/50003
Processed 2000/50003
Processed 2100/50003
Processed 2200/50003
Processed 2300/50003
Processed 2400/50003
Processed 2500/50003
Processed 2600/50003
Processed 2700/50003
Processed 2800/50003
Processed 2900/50003
Processed 3000/50003
Processed 3100/50003
Processed 3200/50003
Processed 3300/50003
Processed 3400/50003
Processed 3500/50003
Processed 3600/50003
Processed 3700/50003
Processed 3800/50003
Processed 3900/50003
Processed 4000/50003
Processed 4100/50003
Processed 4200/50003
Processed 4300/50003
Processed 4400/50003
Processed 4500/50003
Processed 4600/50003
Processed 4700/50003
Processed 4800/50003
P

ReadTimeout: HTTPSConnectionPool(host='api.isthereanydeal.com', port=443): Read timed out. (read timeout=15)

In [152]:
conn = sqlite3.connect("data/steam_sales.db")
pending = pd.read_sql("""
  SELECT s.itad_id
  FROM seeds s
  LEFT JOIN games g ON g.itad_id = s.itad_id
  WHERE s.status='mapped' AND s.itad_id IS NOT NULL AND g.itad_id IS NULL
""", conn)['itad_id'].tolist()
conn.close()
len(pending)

7947

In [153]:
processed = 0
to_do = pending  # list from 2.1
BATCH_SLEEP = 0.6

with closing(sqlite3.connect(DB_PATH)) as conn:
    conn.execute("PRAGMA foreign_keys = ON;")
    for itad_id in to_do:
        info = fetch_info(itad_id)
        if not info:
            continue

        conn.execute("BEGIN;")
        inserted = upsert_game(conn, info)
        if inserted:
            upsert_publishers(conn, itad_id, info.get("publishers"))
            upsert_developers(conn, itad_id, info.get("developers"))
            upsert_tags(conn, itad_id, info.get("tags"))
            upsert_reviews(conn, itad_id, info.get("reviews"))
            upsert_players(conn, itad_id, info.get("players"))
        conn.commit()

        processed += 1
        if processed % 100 == 0:
            print(f"Processed {processed}/{len(to_do)}")
        time.sleep(BATCH_SLEEP)

print("Done:", processed)

Processed 100/7947
Processed 200/7947
Processed 300/7947
Processed 400/7947
Processed 500/7947
Processed 600/7947
Processed 700/7947
Processed 800/7947
Processed 900/7947
Processed 1000/7947
Processed 1100/7947
Processed 1200/7947
Processed 1300/7947
Processed 1400/7947
Processed 1500/7947
Processed 1600/7947
Processed 1700/7947
Processed 1800/7947
Processed 1900/7947
Processed 2000/7947
Processed 2100/7947
Processed 2200/7947
Processed 2300/7947
Processed 2400/7947
Processed 2500/7947
Processed 2600/7947
Processed 2700/7947
Processed 2800/7947
Processed 2900/7947
Processed 3000/7947
Processed 3100/7947
Processed 3200/7947
Processed 3300/7947
Processed 3400/7947
Processed 3500/7947
Processed 3600/7947
Processed 3700/7947
Processed 3800/7947
Processed 3900/7947
Processed 4000/7947
Processed 4100/7947
Processed 4200/7947
Processed 4300/7947
Processed 4400/7947
Processed 4500/7947
Processed 4600/7947
Processed 4700/7947
Processed 4800/7947
Processed 4900/7947
Processed 5000/7947
Processed

In [165]:
conn = sqlite3.connect("data/steam_sales.db")

pd.read_sql("SELECT * FROM games;", conn)

Unnamed: 0,itad_id,appid,title,type,release_date,early_access,mature
0,018d937f-4ef3-7168-994d-7968084ab1a0,226280.0,Warp Frontier,game,2021-09-28,0,0
1,018d937f-05cc-70d8-a9fd-b60cf6f8048e,226620.0,Desktop Dungeons,game,2023-04-18,0,0
2,018d937e-f55f-735f-8a36-ef267cb911fe,251950.0,WWII Online,game,2023-07-06,0,0
3,018d937f-1b52-72fe-8828-fc57d66141ae,252870.0,PULSAR: Lost Colony,game,2021-06-22,0,0
4,018d937f-4eeb-7084-878f-15fb7e4c22e9,269850.0,Get Packed: Fully Loaded,game,2021-07-28,0,0
...,...,...,...,...,...,...,...
43135,01919e99-3f84-704f-9ba7-79afd7440a36,3186760.0,Baker's Agony,game,2024-09-13,0,0
43136,01919c3d-0228-71d6-9a80-b61de4a3ee1b,3187770.0,NAGAISAN,game,2024-09-11,0,0
43137,01919ce4-2521-719f-9fe8-966805c6ba80,3188750.0,Zoul Dungeon,game,2024-09-11,0,0
43138,01919561-88ca-73f4-8123-1b114c928691,3188840.0,Space Memory: Cats,game,2024-09-10,0,0


### Building the price history database

In [161]:
DB_PATH = "data/steam_sales.db"
ITAD_API_KEY = os.getenv("ITAD_API_KEY")
assert ITAD_API_KEY, "Set ITAD_API_KEY in your environment!"
HIST_URL = "https://api.isthereanydeal.com/games/history/v2"
SHOP_STEAM = "61"
COUNTRY = "us"

In [162]:
with closing(sqlite3.connect(DB_PATH)) as conn:
    conn.executescript("""
    PRAGMA foreign_keys = ON;

    CREATE TABLE IF NOT EXISTS history_events (
      itad_id   TEXT,
      ts_utc    TEXT,   -- ISO8601 UTC '...Z'
      price     REAL,   -- discounted
      regular   REAL,   -- base/list price
      cut       INTEGER,
      PRIMARY KEY (itad_id, ts_utc)
    );

    CREATE INDEX IF NOT EXISTS idx_hist_itad_ts ON history_events(itad_id, ts_utc);
    """)
    conn.commit()

In [163]:
def iso_to_utc_z(ts: str) -> str:
    # Handles '...Z' or timezone offsets like '+02:00', returns UTC 'Z'
    dt = datetime.fromisoformat(ts.replace("Z", "+00:00")).astimezone(timezone.utc)
    return dt.replace(tzinfo=timezone.utc).isoformat().replace("+00:00", "Z")

def dedupe_keep_max_cut(events):
    # If multiple events share the same timestamp, keep the one with the highest 'cut'
    bucket = {}
    for e in events or []:
        t = e.get("timestamp")
        if not t:
            continue
        cur_cut = ((e.get("deal") or {}).get("cut") or 0)
        prev = bucket.get(t)
        if (prev is None) or (cur_cut > ((prev.get("deal") or {}).get("cut") or 0)):
            bucket[t] = e
    # return in chronological order
    return [bucket[t] for t in sorted(bucket.keys())]

def fetch_history(itad_id: str, release_date_iso: str, retry=3, sleep=1.0):
    params = {
        "key": ITAD_API_KEY,
        "id": itad_id,
        "shops": SHOP_STEAM,
        "country": COUNTRY,
        "since": f"{release_date_iso}T00:00:00Z",
    }
    for attempt in range(1, retry+1):
        r = requests.get(HIST_URL, params=params, timeout=20)
        try:
            r.raise_for_status()
            return r.json()  # list of events
        except requests.HTTPError:
            if r.status_code in (429, 500, 502, 503, 504):
                time.sleep(sleep * attempt)
                continue
            raise

def insert_history_rows(conn, itad_id: str, events):
    rows = []
    for e in events:
        deal = e.get("deal") or {}
        price = (deal.get("price") or {}).get("amount")
        regular = (deal.get("regular") or {}).get("amount")
        cut = deal.get("cut")
        ts = e.get("timestamp")
        if ts is None or price is None or regular is None or cut is None:
            continue
        rows.append((itad_id, iso_to_utc_z(ts), float(price), float(regular), int(cut)))
    if rows:
        conn.executemany("""
          INSERT OR REPLACE INTO history_events (itad_id, ts_utc, price, regular, cut)
          VALUES (?,?,?,?,?)
        """, rows)

In [164]:
with closing(sqlite3.connect(DB_PATH)) as conn:
    todo = pd.read_sql("""
      SELECT g.itad_id, g.release_date
      FROM games g
      WHERE g.release_date BETWEEN '2021-01-01' AND '2024-12-31'
        AND NOT EXISTS (
          SELECT 1 FROM history_events h WHERE h.itad_id = g.itad_id
        )
    """, conn)

len(todo), todo.head(3)

(43140,
                                 itad_id release_date
 0  018d937f-4650-70ba-a458-d52b46365f6e   2021-01-01
 1  018d937f-4af3-7108-bc77-be5271928154   2021-01-01
 2  018d937f-503e-73ce-9ddd-9e5e44744fbc   2021-01-01)

In [166]:
processed = 0
BATCH_SLEEP = 0.6  # ~1 req/sec

with closing(sqlite3.connect(DB_PATH)) as conn:
    conn.execute("PRAGMA foreign_keys = ON;")
    for itad_id, release_date_iso in todo.itertuples(index=False):
        try:
            events = fetch_history(itad_id, release_date_iso)
            events = dedupe_keep_max_cut(events)
            conn.execute("BEGIN;")
            insert_history_rows(conn, itad_id, events)
            conn.commit()
        except Exception as e:
            conn.rollback()
            # Optional: log errors to a table or print
            print(f"Error {itad_id}: {e}")
        processed += 1
        if processed % 200 == 0:
            print(f"Processed {processed}/{len(todo)}")
        time.sleep(BATCH_SLEEP)

print("Done:", processed)

Processed 200/43140
Processed 400/43140
Processed 600/43140
Processed 800/43140
Processed 1000/43140
Processed 1200/43140
Processed 1400/43140
Processed 1600/43140
Processed 1800/43140
Processed 2000/43140
Processed 2200/43140
Processed 2400/43140
Processed 2600/43140
Processed 2800/43140
Processed 3000/43140
Processed 3200/43140
Processed 3400/43140
Processed 3600/43140
Processed 3800/43140
Processed 4000/43140
Processed 4200/43140
Processed 4400/43140
Processed 4600/43140
Processed 4800/43140
Processed 5000/43140
Processed 5200/43140
Processed 5400/43140
Processed 5600/43140
Processed 5800/43140
Processed 6000/43140
Processed 6200/43140
Processed 6400/43140
Processed 6600/43140
Processed 6800/43140
Processed 7000/43140
Processed 7200/43140
Processed 7400/43140
Processed 7600/43140
Processed 7800/43140
Processed 8000/43140
Processed 8200/43140
Processed 8400/43140
Processed 8600/43140
Processed 8800/43140
Processed 9000/43140
Processed 9200/43140
Processed 9400/43140
Processed 9600/43