In [None]:
import os
import json
import time
import random
import re
from typing import Optional, List
from huggingface_hub import HfApi, model_info
from huggingface_hub.utils import HfHubHTTPError

SEARCH_TERM = "open"
OUT_RAW = "model_data/hf_models_open_raw.json"
HF_TOKEN = "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"

api = HfApi(token=HF_TOKEN)

# --- Helpers for rate limiting and retries ---
def _status_code(exc) -> Optional[int]:
    try:
        return getattr(getattr(exc, "response", None), "status_code", None)
    except Exception:
        return None

def _retry_after_seconds(exc) -> Optional[float]:
    try:
        hdrs = getattr(getattr(exc, "response", None), "headers", {}) or {}
        ra = hdrs.get("Retry-After")
        return float(ra) if ra not in (None, "") else None
    except Exception:
        return None

def _parse_rate_limit(headers: dict):
    rl = headers.get("RateLimit", "") or ""
    m_t = re.search(r"t=(\d+)", rl)
    reset = int(m_t.group(1)) if m_t else None
    return reset

def _sleep_from_headers(exc, attempt: int, max_backoff: float = 60.0) -> float:
    wait = _retry_after_seconds(exc)
    if wait is not None:
        return wait + random.uniform(0, 0.5)
    headers = getattr(getattr(exc, "response", None), "headers", {}) or {}
    reset_sec = _parse_rate_limit(headers)
    if reset_sec is not None:
        return float(reset_sec) + random.uniform(0, 0.5)
    return min(max_backoff, (2 ** attempt)) + random.uniform(0, 0.25)

def safe_model_info(repo_id: str, token: Optional[str], max_retries: int = 8):
    attempt = 0
    while True:
        try:
            return model_info(repo_id, token=token)
        except HfHubHTTPError as e:
            code = _status_code(e)
            if code in (429, 502, 503, 504) and attempt < max_retries:
                attempt += 1
                wait = _sleep_from_headers(e, attempt)
                print(f"[retry] model_info {repo_id} -> {code}; sleeping {wait:.1f}s (attempt {attempt}/{max_retries})")
                time.sleep(wait)
                continue
            raise

def iter_models_with_retry(api: HfApi, *, search: str, full: bool, sort: str, direction: int, max_retries: int = 8):
    seen = set()
    attempt = 0
    while True:
        try:
            for m in api.list_models(search=search, full=full, sort=sort, direction=direction):
                mid = getattr(m, "modelId", None)
                if not mid or mid in seen:
                    continue
                seen.add(mid)
                yield m
            return
        except HfHubHTTPError as e:
            code = _status_code(e)
            if code in (429, 502, 503, 504) and attempt < max_retries:
                attempt += 1
                wait = _sleep_from_headers(e, attempt)
                print(f"[retry] list_models -> {code}; sleeping {wait:.1f}s (attempt {attempt}/{max_retries})")
                time.sleep(wait)
                continue
            raise

# --- Main execution ---
os.makedirs(os.path.dirname(OUT_RAW), exist_ok=True)
count = 0
written = 0

with open(OUT_RAW, "w", encoding="utf-8") as f_raw:
    for summary in iter_models_with_retry(api, search=SEARCH_TERM, full=True, sort="last_modified", direction=-1):
        mid = getattr(summary, "modelId", None)
        if not mid:
            continue
        count += 1

        try:
            info = safe_model_info(mid, token=HF_TOKEN)
        except HfHubHTTPError:
            continue

        # Write raw model info as JSON
        json.dump(info.__dict__, f_raw, default=str)
        f_raw.write("\n")
        written += 1

        if written % 200 == 0:
            print(f"Saved {written} models so far...")
            time.sleep(1.0)

print(f"Done. Wrote {written} raw model entries to {OUT_RAW}")


Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.


Saved 200 models so far...
Saved 400 models so far...
Saved 600 models so far...
Saved 800 models so far...
[retry] model_info mradermacher/OpenScienceReasoning-Qwen-e10-i1-GGUF -> 429; sleeping 178.1s (attempt 1/8)
Saved 1000 models so far...
Saved 1200 models so far...
Saved 1400 models so far...
Saved 1600 models so far...
Saved 1800 models so far...
