# OpenRouter vs Hugging Face

## Open Router

In [None]:
"""
export OPENROUTER_API_KEY="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"
python your_file.py


import os
from openai import OpenAI

client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="moonshotai/Kimi-K2-Instruct-0905",
    messages=[
        {
            "role": "user",
            "content": "Write a short story about a robot learning to love."
        }
    ],
)

print(completion.choices[0].message)

"""

from __future__ import annotations

from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Literal

from openai import OpenAI
from openai.types.chat import ChatCompletion

In [None]:
ROLEType = Literal["system", "user", "assistant", "developer"]

DEFAULT_MODEL: str = "allenai/olmo-3.1-32b-think:free"
DEFAULT_ROLE: ROLEType = "user"
DEFAULT_BASE_URL: str = "https://openrouter.ai/api/v1"

DEFAULT_HEADERS: dict[str, str] = {
    "HTTP-Referer": "https://www.gusmaolab.org",
    "X-Title": "Acta Diurna",
}

DEFAULT_EXTRA_BODY: dict[str, str] = {"user": "acta-local-test"}

In [None]:
def dtnow() -> str:
    """Return an ISO-8601 UTC timestamp string."""
    return datetime.now(timezone.utc).isoformat(timespec="seconds")


def _merge_headers(*parts: dict[str, str] | None) -> dict[str, str]:
    """Merge multiple header dicts (later dicts override earlier ones)."""
    out: dict[str, str] = {}
    for p in parts:
        if p:
            out.update(p)
    return out


def _perform_query_trend(
    *,
    query: str,
    model: str,
    role: ROLEType,
    base_url: str,
    apkey: str,
    default_headers: dict[str, str] | None = None,
    extra_body: dict[str, Any] | None = None,
    app_url: str | None = None,
    app_title: str | None = None,
    temperature: float | None = None,
    top_p: float | None = None,
    max_tokens: int | None = None,
    seed: int | None = None,
    timeout_s: float | None = 120.0,
    max_retries: int = 3,
    system_prompt: str | None = None,
) -> ChatCompletion | None:
    """Perform a single chat completion request. Returns None on failure."""
    if not query.strip():
        print(f"{dtnow()} ERROR: query must be a non-empty string.")
        return None

    attribution_headers: dict[str, str] = {}
    if app_url:
        attribution_headers["HTTP-Referer"] = app_url
    if app_title:
        attribution_headers["X-Title"] = app_title

    headers = _merge_headers(default_headers, attribution_headers)

    client = OpenAI(base_url=base_url, api_key=apkey, default_headers=headers)
    try:
        client = client.with_options(max_retries=max_retries, timeout=timeout_s)
    except TypeError:
        # Older SDK: ignore
        pass

    messages: list[dict[str, str]] = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    messages.append({"role": role, "content": query})

    payload: dict[str, Any] = {"model": model, "messages": messages}

    if temperature is not None:
        payload["temperature"] = float(temperature)
    if top_p is not None:
        payload["top_p"] = float(top_p)
    if max_tokens is not None:
        payload["max_tokens"] = int(max_tokens)
    if seed is not None:
        payload["seed"] = int(seed)

    if extra_body:
        payload.update(extra_body)

    try:
        return client.chat.completions.create(**payload)
    except Exception as e:
        print(f"{dtnow()} ERROR: request failed for model={model!r}")
        print(f"{dtnow()} {type(e).__name__}: {e}")
        return None

In [None]:
def query_models(
    *,
    query: str,
    output_path: Path,
    apkey: str,
    model: str = DEFAULT_MODEL,
    role: ROLEType = DEFAULT_ROLE,
    base_url: str = DEFAULT_BASE_URL,
    default_headers: dict[str, str] = DEFAULT_HEADERS,
    default_extra_body: dict[str, str] = DEFAULT_EXTRA_BODY,
) -> int:
    """Run one query and dump choices to a file. Returns 0 on success, 1 on failure."""
    response = _perform_query_trend(
        query=query,
        model=model,
        role=role,
        base_url=base_url,
        apkey=apkey,
        default_headers=default_headers,
        extra_body=default_extra_body,
        app_url=default_headers.get("HTTP-Referer"),
        app_title=default_headers.get("X-Title"),
    )

    if response is None or not getattr(response, "choices", None):
        print(f"{dtnow()} ERROR: no response/choices")
        return 1

    output_path.parent.mkdir(parents=True, exist_ok=True)
    with output_path.open("w", encoding="utf-8") as f:
        for idx, choice in enumerate(response.choices):
            content = getattr(getattr(choice, "message", None), "content", None)
            f.write(f"#################### response.choices[{idx}]\n")
            f.write(f"# Content:\n{content}\n")
            f.write(f"# Message:\n{choice.message}\n")
            f.write(f"{'='*100}\n\n")

    return 0

In [None]:
import os

api_key = os.environ.get("OPENROUTER_API_KEY", "").strip()
if not api_key:
    raise SystemExit("Set OPENROUTER_API_KEY in your environment first.")

query = (
    "Who is Einstein? Please answer inside a triple-backtick codebox with"
    "a maximum of ~1-3 line(s) [not counting the triple backticks in the total length]."
)
output_path = Path.home() / "tmp" / "openrouter_result.txt"

raise SystemExit(
    query_models(query=query, output_path=output_path, apkey=api_key)
)

In [None]:
## Open Router

In [None]:
"""
See huggingface.txt in Organization
"""

from __future__ import annotations

from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Literal

from openai import OpenAI
from openai.types.chat import ChatCompletion

In [None]:
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=os.environ["HF_TOKEN"],
)

completion = client.chat.completions.create(
    model="moonshotai/Kimi-K2-Instruct-0905",
    messages=[
        {
            "role": "user",
            "content": "Write a short story about a robot learning to love."
        }
    ],
)

print(completion.choices[0].message)

In [None]:
import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="together",
    api_key=os.environ["HF_TOKEN"],
)

# output is a PIL.Image object
image = client.text_to_image(
    "A steampunk airship in the clouds",
    model="black-forest-labs/FLUX.1-dev",
)

In [None]:
from mlx_lm import load, generate

model, tokenizer = load("mlx-community/GLM-4.7-Flash-4bit")

prompt = "hello"

if tokenizer.chat_template is not None:
    messages = [{"role": "user", "content": prompt}]
    prompt = tokenizer.apply_chat_template(
        messages, add_generation_prompt=True, return_dict=False,
    )

response = generate(model, tokenizer, prompt=prompt, verbose=True)

In [None]:
from mlx_lm import load, generate

model_id = "lmstudio-community/Qwen2.5-7B-Instruct-MLX-4bit"
model, tokenizer = load(model_id)

prompt = "Write a 1200-word news-style story about a discovery. Then give 100-word summary, 5 keywords, and 1 mood word."
text = generate(model, tokenizer, prompt=prompt, max_tokens=1800)
print(text)

In [None]:
"""
Run as a command in shell
"""
mlx_lm.generate \
  --model mlx-community/DeepSeek-R1-Distill-Qwen-32B-4bit \
  --prompt "Say 'hi'." \
  --max-tokens 10

In [None]:
from mlx_lm import load, generate

model, tokenizer = load("mlx-community/DeepSeek-R1-Distill-Qwen-32B-4bit")

prompt="hello"

if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
    messages = [{"role": "user", "content": prompt}]
    prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

response = generate(model, tokenizer, prompt=prompt, verbose=True)