# HuggingGPT Model Selection - Stage 2
=====================================
Given the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. This stage filters and ranks models, then uses an LLM for final selection.

In [28]:
# Data classes
import json
import os

import requests
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import time
from collections import defaultdict

# Optional: For LangChain + OpenRouter integration
try:
    from langchain_openai import ChatOpenAI
    HAS_LANGCHAIN = True
    print("LangChain + OpenRouter integration available.")
except ImportError:
    HAS_LANGCHAIN = False
    print("LangChain + OpenRouter integration not available.")

@dataclass
class ModelInfo:
    """Information about a Hugging Face model"""
    model_id: str
    task_type: str
    downloads: int
    description: str
    pipeline_tag: str
    tags: List[str]
    metadata: Dict[str, Any]

    def to_candidate_format(self) -> Dict[str, Any]:
        """Convert to the format expected by the model selection prompt"""
        return {
            "model_id": self.model_id,
            "metadata": {
                "downloads": self.downloads,
                "pipeline_tag": self.pipeline_tag,
                "tags": self.tags
            },
            "description": self.description[:200] + "..." if len(self.description) > 200 else self.description
        }

LangChain + OpenRouter integration available.


In [29]:
# Hugging Face Model Registry
class HuggingFaceModelRegistry:
    """Manages Hugging Face model information and filtering"""

    def __init__(self):
        self.models_cache = {}
        self.last_cache_update = 0
        self.cache_duration = 3600  # 1 hour

        # Task type mapping from HuggingGPT tasks to HF pipeline tags
        self.task_mapping = {
            "object-detection": ["object-detection", "zero-shot-object-detection"],
            "image-to-text": ["image-to-text", "image-captioning"],
            "image-cls": ["image-classification", "zero-shot-image-classification"],
            "visual-question-answering": ["visual-question-answering"],
            "pose-detection": ["object-detection", "keypoint-detection"],
            "pose-text-to-image": ["text-to-image"],
            "text-to-image": ["text-to-image"],
            "image-segmentation": ["image-segmentation", "semantic-segmentation"],
            "depth-estimation": ["depth-estimation"],
            "text-classification": ["text-classification", "zero-shot-classification"],
            "text-generation": ["text-generation", "text2text-generation"],
            "speech-to-text": ["automatic-speech-recognition"],
            "text-to-speech": ["text-to-speech", "text-to-audio"]
        }

    def get_hf_models_by_task(self, task_type: str, top_k: int = 5) -> List[ModelInfo]:
        """
        Fetch top-k models from Hugging Face for a specific task type
        Implements the filtering and ranking strategy from HuggingGPT
        """
        # Check cache first
        cache_key = f"{task_type}_{top_k}"
        current_time = time.time()

        if (cache_key in self.models_cache and
            current_time - self.last_cache_update < self.cache_duration):
            return self.models_cache[cache_key]

        # Get pipeline tags for this task
        pipeline_tags = self.task_mapping.get(task_type, [task_type])

        models = []
        for pipeline_tag in pipeline_tags:
            try:
                # Fetch models from Hugging Face API
                url = "https://huggingface.co/api/models"
                params = {
                    "pipeline_tag": pipeline_tag,
                    "sort": "downloads",
                    "direction": -1,  # Descending order
                    "limit": top_k * 2  # Get more to filter later
                }

                response = requests.get(url, params=params, timeout=10)
                if response.status_code == 200:
                    hf_models = response.json()

                    for model_data in hf_models[:top_k]:
                        model_info = ModelInfo(
                            model_id=model_data.get("modelId", ""),
                            task_type=task_type,
                            downloads=model_data.get("downloads", 0),
                            description=self._extract_description(model_data),
                            pipeline_tag=model_data.get("pipeline_tag", pipeline_tag),
                            tags=model_data.get("tags", []),
                            metadata=model_data
                        )
                        models.append(model_info)

                        if len(models) >= top_k:
                            break

            except Exception as e:
                print(f"Error fetching models for {pipeline_tag}: {e}")
                continue

        # Sort by downloads and take top-k
        models.sort(key=lambda x: x.downloads, reverse=True)
        models = models[:top_k]

        # Add fallback models if not enough found
        if len(models) < top_k:
            fallback_models = self._get_fallback_models(task_type, top_k - len(models))
            models.extend(fallback_models)

        # Cache results
        self.models_cache[cache_key] = models
        self.last_cache_update = current_time

        return models

    def _extract_description(self, model_data: Dict) -> str:
        """Extract description from model data"""
        # Try to get description from various fields
        description = ""

        if "description" in model_data and model_data["description"]:
            description = model_data["description"]
        elif "card_data" in model_data and model_data["card_data"]:
            card_data = model_data["card_data"]
            if "short_description" in card_data:
                description = card_data["short_description"]

        # If no description, create one from model ID and tags
        if not description:
            model_id = model_data.get("modelId", "")
            pipeline_tag = model_data.get("pipeline_tag", "")
            description = f"A {pipeline_tag} model: {model_id}"

        return description

    def _get_fallback_models(self, task_type: str, count: int) -> List[ModelInfo]:
        """Get fallback models when not enough models are found"""
        fallback_mapping = {
            "object-detection": [
                "facebook/detr-resnet-50",
                "hustvl/yolos-tiny"
            ],
            "image-to-text": [
                "Salesforce/blip-image-captioning-base",
                "nlpconnect/vit-gpt2-image-captioning"
            ],
            "image-cls": [
                "google/vit-base-patch16-224",
                "microsoft/resnet-50"
            ],
            "text-to-image": [
                "runwayml/stable-diffusion-v1-5",
                "CompVis/stable-diffusion-v1-4"
            ],
            "text-generation": [
                "gpt2",
                "microsoft/DialoGPT-medium"
            ]
        }

        fallback_ids = fallback_mapping.get(task_type, ["gpt2"])
        fallback_models = []

        for i, model_id in enumerate(fallback_ids[:count]):
            fallback_models.append(ModelInfo(
                model_id=model_id,
                task_type=task_type,
                downloads=100000 - i * 1000,  # Simulated download count
                description=f"Fallback model for {task_type}",
                pipeline_tag=self.task_mapping.get(task_type, [task_type])[0],
                tags=[task_type],
                metadata={"fallback": True}
            ))

        return fallback_models

In [30]:
# HuggingGPT Model Selector
class HuggingGPTModelSelector:
    """
    Model Selection component that matches HuggingGPT's approach:
    1. Filter models by task type
    2. Rank by downloads
    3. Select top-K candidates
    4. Use LLM for final selection with in-context learning
    """

    def __init__(self, openrouter_api_key: str = None, top_k_models: int = 5):
        self.model_registry = HuggingFaceModelRegistry()
        self.top_k_models = top_k_models

        # Initialize LangChain client for model selection
        self.llm_client = None
        if openrouter_api_key and HAS_LANGCHAIN:
            self.llm_client = ChatOpenAI(
                model="openai/gpt-4o-mini",
                api_key=os.getenv("OPENROUTER_API_KEY", openrouter_api_key),
                base_url="https://openrouter.ai/api/v1",
                temperature=0.1,
                max_tokens=200
            )
        print("LLM status: ",self.llm_client,HAS_LANGCHAIN)
        # Demonstration examples for model selection (from HuggingGPT paper)
        self.demonstrations = [
            {
                "task": {"task": "object-detection", "args": {"image": "image1.jpg"}},
                "candidates": [
                    {"model_id": "facebook/detr-resnet-50", "downloads": 500000, "description": "End-to-End Object Detection model"},
                    {"model_id": "hustvl/yolos-tiny", "downloads": 200000, "description": "You Only Look at One Sequence: Object Detection"}
                ],
                "selection": {"id": "facebook/detr-resnet-50", "reason": "Higher downloads and proven performance for object detection"}
            },
            {
                "task": {"task": "text-to-image", "args": {"text": "a beautiful sunset"}},
                "candidates": [
                    {"model_id": "runwayml/stable-diffusion-v1-5", "downloads": 1000000, "description": "Stable Diffusion model for text-to-image generation"},
                    {"model_id": "CompVis/stable-diffusion-v1-4", "downloads": 800000, "description": "Earlier version of Stable Diffusion"}
                ],
                "selection": {"id": "runwayml/stable-diffusion-v1-5", "reason": "More recent version with better performance"}
            }
        ]

    def create_model_selection_prompt(self, task: Dict[str, Any], candidates: List[ModelInfo]) -> str:
        """
        Create the model selection prompt following HuggingGPT format
        """
        # Format candidates
        candidates_str = ""
        for i, model in enumerate(candidates):
            candidate_dict = model.to_candidate_format()
            candidates_str += f'{{"model_id": "{candidate_dict["model_id"]}", "metadata": {json.dumps(candidate_dict["metadata"])}, "description": "{candidate_dict["description"]}"}}\n'

        # Format demonstrations
        demo_str = ""
        for demo in self.demonstrations:
            demo_str += f"Task: {json.dumps(demo['task'])}\n"
            demo_str += f"Selection: {json.dumps(demo['selection'])}\n\n"

        prompt = f"""#2 Model Selection Stage - Given the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: {{"id": "model_id", "reason": "your detailed reason for the choice"}}.

We have a list of models for you to choose from:
{candidates_str}

Examples:
{demo_str}

Current task to assign: {json.dumps(task)}

Select the most appropriate model from the candidate list above. Consider factors like:
- Task compatibility
- Model popularity (downloads)
- Model description relevance
- Performance indicators

Response (JSON format only):"""

        return prompt

    def select_model_with_llm(self, task: Dict[str, Any], candidates: List[ModelInfo]) -> Dict[str, Any]:
        """Use LLM to select the best model from candidates"""
        if not self.llm_client or not candidates:
            print("LLM not initialized or no candidates, using fallback")
            return self.fallback_model_selection(task, candidates)

        try:
            prompt = self.create_model_selection_prompt(task, candidates)
            response = self.llm_client.invoke(prompt)

            # Parse JSON response
            response_text = response.content.strip()

            # Extract JSON from response
            json_match = None
            try:
                # Try to parse the entire response as JSON
                json_match = json.loads(response_text)
            except:
                # Try to find JSON in the response
                import re
                json_pattern = r'\{[^}]*"id"[^}]*\}'
                matches = re.findall(json_pattern, response_text)
                if matches:
                    json_match = json.loads(matches[0])

            if json_match and "id" in json_match:
                # Validate that selected model is in candidates
                selected_id = json_match["id"]
                for candidate in candidates:
                    if candidate.model_id == selected_id:
                        print("LLM selected model:", selected_id)
                        return {
                            "model_id": selected_id,
                            "reason": json_match.get("reason", "Selected by LLM"),
                            "model_info": candidate,
                            "selection_method": "llm"
                        }

            # Fallback if LLM selection failed
            print("LLM selection parsing failed, using fallback")
            return self.fallback_model_selection(task, candidates)

        except Exception as e:
            print(f"LLM model selection error: {e}")
            return self.fallback_model_selection(task, candidates)

    def fallback_model_selection(self, task: Dict[str, Any], candidates: List[ModelInfo]) -> Dict[str, Any]:
        """Fallback model selection based on downloads and task compatibility"""
        if not candidates:
            return {
                "model_id": "fallback-model",
                "reason": "No candidates available",
                "model_info": None,
                "selection_method": "fallback"
            }

        # Sort by downloads (already sorted, but just to be sure)
        best_candidate = max(candidates, key=lambda x: x.downloads)

        return {
            "model_id": best_candidate.model_id,
            "reason": f"Highest downloads ({best_candidate.downloads:,}) for task type",
            "model_info": best_candidate,
            "selection_method": "downloads"
        }

    def select_models_for_tasks(self, tasks: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
        """
        Main function to select models for all tasks
        Implements the complete HuggingGPT model selection pipeline
        """
        model_assignments = {}

        print(f"🔍 Starting model selection for {len(tasks)} tasks...")

        for task in tasks:
            task_id = task.get("id", 0)
            task_type = task.get("task", "")

            print(f"\n📋 Task {task_id}: {task_type}")

            # Step 1: Filter models by task type and get top-K
            print(f"  🔍 Fetching top-{self.top_k_models} models for '{task_type}'...")
            candidates = self.model_registry.get_hf_models_by_task(task_type, self.top_k_models)

            if candidates:
                print(f"  ✅ Found {len(candidates)} candidate models:")
                for i, candidate in enumerate(candidates[:3]):  # Show top 3
                    print(f"    {i+1}. {candidate.model_id} ({candidate.downloads:,} downloads)")

                # Step 2: Use LLM for in-context task-model assignment
                print(f"  🤖 Selecting best model using LLM...")
                selection = self.select_model_with_llm(task, candidates)

                print(f"  ✅ Selected: {selection['model_id']}")
                print(f"  💡 Reason: {selection['reason']}")

                model_assignments[task_id] = selection
            else:
                print(f"  ❌ No models found for task type: {task_type}")
                model_assignments[task_id] = {
                    "model_id": "no-model-found",
                    "reason": f"No suitable models found for {task_type}",
                    "model_info": None,
                    "selection_method": "error"
                }

        return model_assignments

    def get_model_assignment_summary(self, assignments: Dict[int, Dict[str, Any]]) -> str:
        """Generate a summary of model assignments"""
        summary = "Model Assignment Summary:\n"
        summary += "=" * 40 + "\n"

        for task_id, assignment in assignments.items():
            summary += f"Task {task_id}: {assignment['model_id']}\n"
            summary += f"  Method: {assignment['selection_method']}\n"
            summary += f"  Reason: {assignment['reason']}\n"

            if assignment['model_info']:
                summary += f"  Downloads: {assignment['model_info'].downloads:,}\n"
            summary += "-" * 20 + "\n"

        return summary

In [31]:
# Example usage and testing
def test_model_selection():
    """Test the model selection system with example tasks"""

    print("🚀 HuggingGPT Model Selection Demo")
    print("=" * 50)

    # Example tasks from task planning
    example_tasks = [
        {"task": "object-detection", "id": 0, "dep": [-1], "args": {"image": "e1.jpg"}},
        {"task": "image-to-text", "id": 1, "dep": [-1], "args": {"image": "e2.jpg"}},
        {"task": "text-to-image", "id": 2, "dep": [-1], "args": {"text": "a beautiful sunset"}}
    ]

    # Initialize model selector (replace with your OpenRouter key for LLM selection)
    selector = HuggingGPTModelSelector(openrouter_api_key=os.getenv("OPENROUTER_API_KEY", None))
    # selector = HuggingGPTModelSelector()  # Will use fallback selection

    # Select models for tasks
    assignments = selector.select_models_for_tasks(example_tasks)

    # Display results
    print("\n" + "=" * 50)
    print(selector.get_model_assignment_summary(assignments))

    return selector, assignments

# Run the test
if __name__ == "__main__":
    selector, assignments = test_model_selection()

🚀 HuggingGPT Model Selection Demo
LLM status:  client=<openai.resources.chat.completions.completions.Completions object at 0x7db128d7aed0> async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7db128d79370> root_client=<openai.OpenAI object at 0x7db1280b2d80> root_async_client=<openai.AsyncOpenAI object at 0x7db128d7b230> model_name='openai/gpt-4o-mini' temperature=0.1 model_kwargs={} openai_api_key=SecretStr('**********') openai_api_base='https://openrouter.ai/api/v1' max_tokens=200 True
🔍 Starting model selection for 3 tasks...

📋 Task 0: object-detection
  🔍 Fetching top-5 models for 'object-detection'...
  ✅ Found 5 candidate models:
    1. tech4humans/yolov8s-signature-detector (41,151,738 downloads)
    2. microsoft/table-transformer-detection (2,119,774 downloads)
    3. microsoft/table-transformer-structure-recognition (1,057,417 downloads)
  🤖 Selecting best model using LLM...
LLM selected model: tech4humans/yolov8s-signature-detector
  ✅ Sel