In [1]:
import requests
from textwrap import shorten

BASE_URL = "https://davanstrien-huggingface-datasets-search-v2.hf.space"

def search_models(
    query: str,
    k: int = 10,
    sort_by: str = "similarity",
    min_param_count: int = 0,
    max_param_count: int | None = None,
):
    params = {
        "query": query,
        "k": k,
        "sort_by": sort_by,
        "min_param_count": min_param_count,
    }
    if max_param_count is not None:
        params["max_param_count"] = max_param_count

    resp = requests.get(f"{BASE_URL}/search/models", params=params, timeout=30)
    resp.raise_for_status()
    data = resp.json()

    results = data.get("results", [])
    print(f"\n=== Query: {query!r} (k={k}, sort_by={sort_by}) ===")
    if not results:
        print("No results.")
        return

    for i, r in enumerate(results[::-1], start=1):
        mid = r.get("model_id") or r.get("id")
        summary = r.get("summary") or r.get("description") or ""
        params_str = r.get("param_count")
        likes = r.get("likes")
        downloads = r.get("downloads")
        sim = r.get("similarity")

        print(f"\n[{i}] {mid}")
        print("   summary   :", shorten(summary, width=100, placeholder="…"))
        print("   similarity:", sim)
        print("   params    :", params_str)
        print("   likes     :", likes, "downloads:", downloads)
        if i == 10:
            break

# Example queries:
tests = [
    "small vision transformer for 224x224 image classification that fits on a single GPU",
    "small image model for classifying cats and dogs",
    #"embedding model for English RAG on technical documents",
    #"multilingual text embedding model for European languages",
    #"object detection model for real-time inference on common objects",
    #"speech-to-text model for English audio with good accuracy",
]

search_models(tests[0], k=100, sort_by="similarity")



=== Query: 'small vision transformer for 224x224 image classification that fits on a single GPU' (k=100, sort_by=similarity) ===

[1] Remade-AI/Squish
   summary   : This LoRA model, Squish Effect LoRA for Wan2.1 14B I2V 480p, transforms images into videos of being…
   similarity: 0.3138512969017029
   params    : 0
   likes     : 47 downloads: 2427

[2] QuantStack/Phantom_Wan_14B-GGUF
   summary   : A quantized version of bytedance-research/Phantom, designed for use with ComfyUI and compatible…
   similarity: 0.31362342834472656
   params    : 0
   likes     : 5 downloads: 1420

[3] RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16
   summary   : A quantized version of Llama-3.3-70B-Instruct, optimized for efficient deployment with vLLM and Red…
   similarity: 0.3134850263595581
   params    : 11200046176
   likes     : 0 downloads: 13283

[4] Mungert/olmOCR-7B-0225-preview-GGUF
   summary   : This model is a LoFi, BF16-accelerated OCR model for use with GPUs and TPUs, offering a balan

In [13]:
!pip install -U sentence-transformers

Collecting sentence-transformers


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip



  Downloading sentence_transformers-5.1.2-py3-none-any.whl.metadata (16 kB)
Downloading sentence_transformers-5.1.2-py3-none-any.whl (488 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-5.1.2


In [10]:
from huggingface_hub import InferenceClient

In [None]:
hf_key = 

In [33]:
import json
import os
from pathlib import Path
from typing import Dict, List, Any

import numpy as np
import requests

API_URL = (
    "https://router.huggingface.co/hf-inference/models/"
    "BAAI/bge-small-en-v1.5/pipeline/feature-extraction"
)
BATCH_SIZE = 16


def load_catalog(path: Path) -> List[Dict[str, Any]]:
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)


def flatten_queries(catalog: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    rows: List[Dict[str, Any]] = []
    for model in catalog:
        model_id = model.get("id")
        queries_by_level = model.get("queries_by_level") or {}
        for level in ("expert", "junior", "beginner"):
            for query in queries_by_level.get(level, []) or []:
                if not query:
                    continue
                rows.append(
                    {
                        "model_id": model_id,
                        "level": level,
                        "query": query,
                    }
                )
    return rows


def embed_batch(texts: List[str], token: str) -> np.ndarray:
    if not token:
        raise RuntimeError("HF_TOKEN is required to call the Inference API")

    headers = {
        "Authorization": f"Bearer {token}",
    }
    resp = requests.post(
        API_URL,
        headers=headers,
        json={"inputs": texts},
        timeout=60,
    )
    if not resp.ok:
        raise RuntimeError(
            f"Inference API error {resp.status_code}: {resp.text}"
        )

    data = resp.json()
    # Expect shape [batch, dim]
    try:
        arr = np.array(data, dtype=np.float32)
    except Exception as exc:  # noqa: BLE001
        raise RuntimeError(f"Unexpected embedding response payload: {data}") from exc

    if arr.ndim != 2 or arr.shape[0] != len(texts):
        raise RuntimeError(
            f"Embedding batch shape mismatch: got {arr.shape}, "
            f"expected ({len(texts)}, D)"
        )

    # L2-normalize per vector
    norms = np.linalg.norm(arr, axis=1, keepdims=True)
    arr = arr / np.clip(norms, 1e-12, None)
    return arr.astype(np.float32)


def build_embeddings(rows: List[Dict[str, Any]], token: str) -> np.ndarray:
    vectors: List[np.ndarray] = []
    total_batches = (len(rows) - 1) // BATCH_SIZE + 1

    for i in range(0, len(rows), BATCH_SIZE):
        batch = rows[i : i + BATCH_SIZE]
        texts = [r["query"] for r in batch]
        vecs = embed_batch(texts, token)
        vectors.append(vecs)
        print(f"Embedded batch {i // BATCH_SIZE + 1}/{total_batches}")

    return np.vstack(vectors) if vectors else np.zeros((0, 0), dtype=np.float32)


def main() -> None:
    token = hf_key
    if not token:
        raise SystemExit("HF_TOKEN is not set in the environment.")

    catalog_path = Path("models_catalog_with_queries.json")
    if not catalog_path.exists():
        fallback = Path("models_catalog.json")
        if fallback.exists():
            print(f"Catalog {catalog_path} not found. Falling back to {fallback}.")
            catalog_path = fallback
        else:
            raise SystemExit(
                f"Catalog file not found: {catalog_path} or {fallback}"
            )

    catalog = load_catalog(catalog_path)
    rows = flatten_queries(catalog)
    if not rows:
        raise SystemExit("No queries found in catalog; nothing to embed.")

    print(f"Loaded {len(rows)} queries from {catalog_path}.")
    embeddings = build_embeddings(rows, token)
    print(f"Final embedding matrix shape: {embeddings.shape}")

    output_dir = Path("data")
    output_dir.mkdir(parents=True, exist_ok=True)

    embeddings_path = output_dir / "query_embeddings.npy"
    meta_path = output_dir / "query_meta.json"

    np.save(embeddings_path, embeddings)
    with meta_path.open("w", encoding="utf-8") as f:
        json.dump(rows, f, ensure_ascii=False, indent=2)

    print(f"Saved embeddings to {embeddings_path}")
    print(f"Saved metadata to {meta_path}")


if __name__ == "__main__":
    main()


Catalog models_catalog_with_queries.json not found. Falling back to models_catalog.json.
Loaded 1962 queries from models_catalog.json.
Embedded batch 1/123
Embedded batch 2/123
Embedded batch 3/123
Embedded batch 4/123
Embedded batch 5/123
Embedded batch 6/123
Embedded batch 7/123
Embedded batch 8/123
Embedded batch 9/123
Embedded batch 10/123
Embedded batch 11/123
Embedded batch 12/123
Embedded batch 13/123
Embedded batch 14/123
Embedded batch 15/123
Embedded batch 16/123
Embedded batch 17/123
Embedded batch 18/123
Embedded batch 19/123
Embedded batch 20/123
Embedded batch 21/123
Embedded batch 22/123
Embedded batch 23/123
Embedded batch 24/123
Embedded batch 25/123
Embedded batch 26/123
Embedded batch 27/123
Embedded batch 28/123
Embedded batch 29/123
Embedded batch 30/123
Embedded batch 31/123
Embedded batch 32/123
Embedded batch 33/123
Embedded batch 34/123
Embedded batch 35/123
Embedded batch 36/123
Embedded batch 37/123
Embedded batch 38/123
Embedded batch 39/123
Embedded batch 4

In [32]:
main()

TypeError: InferenceClient.__init__() got an unexpected keyword argument 'provider'