In [1]:
import litellm
litellm._turn_on_debug()
import logging
logging.basicConfig(level=logging.DEBUG, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 
                    handlers=[logging.FileHandler('edgar.log', 'w', 'utf-8')])


logger = logging.getLogger(__name__)


def query_litellm(prompt: str, model_name: str='ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF') -> str:
    """
    Query a LiteLLM-compatible model with a given prompt.

    Args:
        prompt (str): The user's input question.
        model_name (str): The model to use (e.g. 'ollama/qwen2.5' or 'huggingface/your-model').

    Returns:
        str: The model's response text.
    """
    response = litellm.completion(
        model=model_name,
        messages=[{"role": "user", "content": prompt}]
    )

    return response['choices'][0]['message']['content']


In [None]:
def test_context_limit(token_multiplier=50):
    base = "The quick brown fox jumps over the lazy dog. "
    for i in range(1, 1000):
        try:
            prompt = base * (i * token_multiplier)
            print(f"Trying length {len(prompt)}...")
            query_litellm(prompt)  # or however you call your local model
        except Exception as e:
            print(f"Failed at length {len(prompt)}: {e}")
            break

In [2]:
from transformers import AutoConfig
from huggingface_hub import HfApi
import requests
import json
import os

def config_to_markdown(config_dict):
    # Priority ordered keys
    priority_keys = [
        "model_type", "architectures", "hidden_size", "num_attention_heads",
        "num_hidden_layers", "intermediate_size", "vocab_size", "type_vocab_size",
        "max_position_embeddings", "hidden_act", "initializer_range",
        "layer_norm_eps", "attention_probs_dropout_prob", "hidden_dropout_prob",
        "pad_token_id", "bos_token_id", "eos_token_id"
    ]
    all_keys = list(config_dict.keys())
    
    # Sort by priority first, then append any other keys
    ordered_keys = [k for k in priority_keys if k in config_dict] + \
                   [k for k in all_keys if k not in priority_keys]

    # Create markdown table
    markdown = "| Key | Value |\n|-----|-------|\n"
    for k in ordered_keys:
        v = config_dict[k]
        if isinstance(v, list):
            v = ", ".join(map(str, v))
        elif isinstance(v, dict):
            v = json.dumps(v)
        markdown += f"| `{k}` | `{v}` |\n"
    return markdown

def inspect_model_config(model_name_or_path, source="autoconfig"):
    """
    Inspect a Hugging Face model's configuration and output as markdown.

    Parameters:
        model_name_or_path (str): model name from Hugging Face hub or local path.
        source (str): One of ["autoconfig", "api", "rest", "local_json"]
    """
    config_dict = None

    if source == "autoconfig":
        print("🔍 Using AutoConfig (local or remote)...")
        config = AutoConfig.from_pretrained(model_name_or_path)
        config_dict = config.to_dict()

    elif source == "api":
        print("🌐 Using huggingface_hub HfApi...")
        api = HfApi()
        model_info = api.model_info(model_name_or_path)
        config_dict = model_info.config if hasattr(model_info, 'config') else {}

    elif source == "rest":
        print("🌍 Using Hugging Face REST API...")
        url = f"https://huggingface.co/api/models/{model_name_or_path}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            config_dict = data.get("config", {})
        else:
            print("❌ Failed to retrieve model metadata from REST API.")

    elif source == "local_json":
        print("📂 Loading config.json from local directory...")
        path = os.path.join(model_name_or_path, "config.json")
        if os.path.exists(path):
            with open(path, "r") as f:
                config_dict = json.load(f)
        else:
            print(f"❌ config.json not found at {path}")

    else:
        print("⚠️ Invalid source. Choose from: autoconfig, api, rest, local_json.")

    if config_dict:
        print("\n### 📄 Model Configuration (Markdown Table)\n")
        print(config_to_markdown(config_dict))


None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [3]:
model_name = "SUFE-AIFLM-Lab/Fin-R1"
model_config = inspect_model_config(model_name, source="autoconfig")
config_to_markdown(model_config)

🔍 Using AutoConfig (local or remote)...

### 📄 Model Configuration (Markdown Table)

| Key | Value |
|-----|-------|
| `model_type` | `qwen2` |
| `architectures` | `Qwen2ForCausalLM` |
| `hidden_size` | `3584` |
| `num_attention_heads` | `28` |
| `num_hidden_layers` | `28` |
| `intermediate_size` | `18944` |
| `vocab_size` | `152064` |
| `max_position_embeddings` | `32768` |
| `hidden_act` | `silu` |
| `initializer_range` | `0.02` |
| `pad_token_id` | `None` |
| `bos_token_id` | `151643` |
| `eos_token_id` | `151645` |
| `use_sliding_window` | `False` |
| `sliding_window` | `None` |
| `max_window_layers` | `28` |
| `num_key_value_heads` | `4` |
| `rms_norm_eps` | `1e-06` |
| `use_cache` | `True` |
| `rope_theta` | `1000000.0` |
| `rope_scaling` | `None` |
| `attention_dropout` | `0.0` |
| `return_dict` | `True` |
| `output_hidden_states` | `False` |
| `output_attentions` | `False` |
| `torchscript` | `False` |
| `torch_dtype` | `bfloat16` |
| `use_bfloat16` | `False` |
| `tf_legacy_los

AttributeError: 'NoneType' object has no attribute 'keys'