HuggingGPT Response Generation - Stage 4
=========================================
Final stage: Generate natural language responses from execution results

Integrates:
- Task Planning results
- Model Selection decisions
- Task Execution results
- Generates friendly, actionable responses


In [6]:
import os

pip install langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.3.33-py3-none-any.whl.metadata (2.4 kB)
Downloading langchain_openai-0.3.33-py3-none-any.whl (74 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-openai
Successfully installed langchain-openai-0.3.33


In [7]:


import json
from typing import Dict, List, Any, Optional
from dataclasses import dataclass

# Optional: For LLM-based response generation
try:
    from langchain_openai import ChatOpenAI
    HAS_LANGCHAIN = True
except ImportError:
    HAS_LANGCHAIN = False
    print("langchain_openai not found")

@dataclass
class ResponseGenerationResult:
    """Final response with all pipeline information"""
    user_input: str
    natural_language_response: str
    task_summary: str
    model_summary: str
    execution_summary: str
    confidence_level: str
    structured_results: Dict[int, Any]
    file_paths: List[str]

class ResponseGenerator:
    """
    Stage 4: Response Generation
    Generates natural language responses from structured results
    """

    def __init__(self, openrouter_api_key: str = None):
        self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY") or openrouter_api_key

        # Initialize LLM for natural language generation
        self.llm_client = None
        if openrouter_api_key and HAS_LANGCHAIN:
            self.llm_client = ChatOpenAI(
                model="openai/gpt-4o-mini",
                api_key=openrouter_api_key,
                base_url="https://openrouter.ai/api/v1",
                temperature=0.7,  # More creative for responses
                max_tokens=500
            )
            print("🎨 Response Generator initialized with LLM")
        else:
            print("🎨 Response Generator initialized (rule-based)")

    def create_response_generation_prompt(self,
                                          user_input: str,
                                          tasks: List[Dict],
                                          model_assignments: Dict,
                                          execution_results: Dict) -> str:
        """
        Create prompt for LLM-based response generation
        Following HuggingGPT's approach from the paper
        """

        # Format task information
        task_info = ""
        for task in tasks:
            task_info += f"- Task {task['id']}: {task['task']} with args {task['args']}\n"

        # Format model selection
        model_info = ""
        for task_id, assignment in model_assignments.items():
            model_info += f"- Task {task_id}: {assignment['model_id']}\n"

        # Format execution results
        results_info = ""
        for task_id, result in execution_results.items():
            if result.status == "success":
                # Format result based on type
                if isinstance(result.result, dict):
                    if "detections" in result.result:
                        detections = result.result["detections"]
                        results_info += f"- Task {task_id} (object-detection): Found {len(detections)} objects:\n"
                        for det in detections[:3]:
                            results_info += f"  • {det['label']} (confidence: {det['score']:.2%})\n"
                    elif "predictions" in result.result:
                        preds = result.result["predictions"]
                        results_info += f"- Task {task_id} (classification): Top predictions:\n"
                        for pred in preds[:3]:
                            results_info += f"  • {pred['label']} (confidence: {pred['score']:.2%})\n"
                elif isinstance(result.result, str):
                    results_info += f"- Task {task_id}: {result.result[:100]}...\n"
                elif hasattr(result.result, 'size'):  # Image
                    results_info += f"- Task {task_id}: Generated image ({result.result.size})\n"
            else:
                results_info += f"- Task {task_id}: Failed - {result.error}\n"

        prompt = f"""#4 Response Generation Stage - With the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as:

User Input: {user_input}

Task Planning:
{task_info}

Model Selection:
{model_info}

Task Execution:
{results_info}

You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path. If there is nothing in the results, please tell me you can't make it.

Generate a friendly, natural language response that:
1. Directly answers the user's request
2. Explains what was done
3. Presents the results clearly
4. Indicates confidence level
5. Mentions any saved files

Response:"""

        return prompt

    def generate_response_with_llm(self,
                                   user_input: str,
                                   tasks: List[Dict],
                                   model_assignments: Dict,
                                   execution_results: Dict) -> str:
        """Generate natural language response using LLM"""

        if not self.llm_client:
            return self.generate_response_rule_based(user_input, tasks, model_assignments, execution_results)

        try:
            prompt = self.create_response_generation_prompt(
                user_input, tasks, model_assignments, execution_results
            )

            response = self.llm_client.invoke(prompt)
            return response.content.strip()

        except Exception as e:
            print(f"⚠️  LLM response generation failed: {e}")
            return self.generate_response_rule_based(user_input, tasks, model_assignments, execution_results)

    def generate_response_rule_based(self,
                                     user_input: str,
                                     tasks: List[Dict],
                                     model_assignments: Dict,
                                     execution_results: Dict) -> str:
        """Generate natural language response using rules (fallback)"""

        response = f"Based on your request: '{user_input}', here's what I found:\n\n"

        successful_tasks = 0
        failed_tasks = 0

        for task in tasks:
            task_id = task["id"]
            task_type = task["task"]

            if task_id in execution_results:
                result = execution_results[task_id]

                if result.status == "success":
                    successful_tasks += 1

                    # Format based on result type
                    if isinstance(result.result, dict):
                        if "detections" in result.result:
                            detections = result.result["detections"]
                            response += f"🔍 Object Detection: I found {len(detections)} objects in the image:\n"
                            for det in detections[:5]:
                                response += f"   • {det['label']} (confidence: {det['score']:.1%})\n"

                        elif "predictions" in result.result:
                            preds = result.result["predictions"]
                            response += f"🏷️  Image Classification: Top predictions:\n"
                            for pred in preds[:3]:
                                response += f"   • {pred['label']} (confidence: {pred['score']:.1%})\n"

                    elif isinstance(result.result, str):
                        if task_type == "text-generation":
                            response += f"✍️  Generated Text: {result.result}\n"
                        elif task_type in ["image-to-text", "image-captioning"]:
                            response += f"💬 Image Caption: {result.result}\n"

                    elif hasattr(result.result, 'size'):  # PIL Image
                        response += f"🎨 Generated Image: Created a {result.result.size[0]}x{result.result.size[1]} image\n"

                    # Add file path if available
                    if result.resource_path:
                        response += f"   📁 Saved to: {result.resource_path}\n"

                    response += "\n"
                else:
                    failed_tasks += 1
                    response += f"❌ Task {task_id} ({task_type}) failed: {result.error}\n\n"

        # Add confidence summary
        if successful_tasks > 0:
            confidence = "high" if successful_tasks == len(tasks) else "medium" if failed_tasks == 0 else "low"
            response += f"\n✅ Confidence Level: {confidence.upper()}\n"
            response += f"   Successfully completed {successful_tasks}/{len(tasks)} tasks\n"
        else:
            response += "\n❌ Unable to complete the request. Please try again or reformulate your query.\n"

        return response

    def calculate_confidence_level(self, execution_results: Dict) -> str:
        """Calculate overall confidence level based on results"""

        if not execution_results:
            return "none"

        total_tasks = len(execution_results)
        successful_tasks = sum(1 for r in execution_results.values() if r.status == "success")

        success_rate = successful_tasks / total_tasks

        # Calculate average confidence from results
        total_confidence = 0
        confidence_count = 0

        for result in execution_results.values():
            if result.status == "success" and isinstance(result.result, dict):
                if "detections" in result.result:
                    scores = [d["score"] for d in result.result["detections"]]
                    if scores:
                        total_confidence += sum(scores) / len(scores)
                        confidence_count += 1
                elif "predictions" in result.result:
                    scores = [p["score"] for p in result.result["predictions"]]
                    if scores:
                        total_confidence += scores[0]  # Top prediction
                        confidence_count += 1

        avg_confidence = total_confidence / confidence_count if confidence_count > 0 else 0.5

        # Combine success rate and model confidence
        overall_confidence = (success_rate * 0.6) + (avg_confidence * 0.4)

        if overall_confidence >= 0.8:
            return "high"
        elif overall_confidence >= 0.5:
            return "medium"
        else:
            return "low"

    def generate_final_response(self,
                                user_input: str,
                                tasks: List[Dict],
                                model_assignments: Dict,
                                execution_results: Dict,
                                use_llm: bool = True) -> ResponseGenerationResult:
        """
        Main response generation function
        Integrates all stages into final response
        """

        print(f"\n{'='*70}")
        print(f"🎨 STAGE 4: RESPONSE GENERATION")
        print(f"{'='*70}")

        # Generate natural language response
        if use_llm and self.llm_client:
            print("🤖 Generating response with LLM...")
            natural_response = self.generate_response_with_llm(
                user_input, tasks, model_assignments, execution_results
            )
        else:
            print("📝 Generating response with rules...")
            natural_response = self.generate_response_rule_based(
                user_input, tasks, model_assignments, execution_results
            )

        # Create summaries
        task_summary = f"Planned {len(tasks)} tasks: " + ", ".join([t["task"] for t in tasks])

        model_summary = f"Used models: " + ", ".join([
            f"{assignment['model_id']}" for assignment in model_assignments.values()
        ])

        successful = sum(1 for r in execution_results.values() if r.status == "success")
        execution_summary = f"Executed {len(execution_results)} tasks ({successful} successful)"

        # Calculate confidence
        confidence = self.calculate_confidence_level(execution_results)

        # Collect file paths
        file_paths = [
            r.resource_path for r in execution_results.values()
            if r.resource_path
        ]

        # Collect structured results
        structured_results = {
            task_id: {
                "task_type": result.task_type,
                "status": result.status,
                "result": result.result,
                "inference_time": result.inference_time
            }
            for task_id, result in execution_results.items()
        }

        result = ResponseGenerationResult(
            user_input=user_input,
            natural_language_response=natural_response,
            task_summary=task_summary,
            model_summary=model_summary,
            execution_summary=execution_summary,
            confidence_level=confidence,
            structured_results=structured_results,
            file_paths=file_paths
        )

        print(f"\n✅ Response generated (confidence: {confidence})")
        print(f"{'='*70}")

        return result

    def display_final_response(self, response: ResponseGenerationResult):
        """Display the final response in a formatted way"""

        print(f"\n{'🎯 FINAL RESPONSE':^70}")
        print("="*70)

        print(f"\n📝 User Request:")
        print(f"   {response.user_input}")

        print(f"\n💬 Response:")
        print("-"*70)
        for line in response.natural_language_response.split('\n'):
            print(f"   {line}")
        print("-"*70)

        print(f"\n📊 Summary:")
        print(f"   • {response.task_summary}")
        print(f"   • {response.model_summary}")
        print(f"   • {response.execution_summary}")
        print(f"   • Confidence: {response.confidence_level.upper()}")

        if response.file_paths:
            print(f"\n📁 Generated Files:")
            for path in response.file_paths:
                print(f"   • {path}")

        print(f"\n{'='*70}")

# ============================================================================
# COMPLETE HUGGINGGPT PIPELINE - ALL 4 STAGES
# ============================================================================

class CompleteHuggingGPTPipeline:
    """
    Complete HuggingGPT System - All 4 Stages
    1. Task Planning
    2. Model Selection
    3. Task Execution
    4. Response Generation
    """

    def __init__(self,
                 openrouter_api_key: str = None,
                 hf_token: str = None):

        print("🚀 Initializing Complete HuggingGPT Pipeline (All 4 Stages)")
        print("="*70)

        self.openrouter_api_key = openrouter_api_key
        self.hf_token = hf_token

        # Note: Import other stages from your previous implementations
        # Stage 1: Task Planning
        # Stage 2: Model Selection
        # Stage 3: Task Execution (from hugginggpt_execution_with_inference_client)
        # Stage 4: Response Generation
        self.response_generator = ResponseGenerator(openrouter_api_key)

        print("✅ Pipeline Ready!")
        print("="*70)

    async def process_complete_request(self,
                                       user_input: str,
                                       tasks: List[Dict],
                                       model_assignments: Dict,
                                       execution_results: Dict,
                                       use_llm_for_response: bool = True) -> ResponseGenerationResult:
        """
        Process complete request through all 4 stages
        (Assumes stages 1-3 are already completed)
        """

        # Stage 4: Response Generation
        final_response = self.response_generator.generate_final_response(
            user_input=user_input,
            tasks=tasks,
            model_assignments=model_assignments,
            execution_results=execution_results,
            use_llm=use_llm_for_response
        )

        # Display formatted response
        self.response_generator.display_final_response(final_response)

        return final_response

# ============================================================================
# TEST FUNCTION
# ============================================================================

async def test_response_generation():
    """Test response generation with sample data"""

    print("🧪 TESTING RESPONSE GENERATION")
    print("="*70)

    # Sample data (as if from previous stages)
    user_input = "Can you detect objects in my image and tell me what you see?"

    tasks = [
        {"id": 0, "task": "object-detection", "args": {"image": "test.jpg"}},
        {"id": 1, "task": "image-to-text", "args": {"image": "test.jpg"}}
    ]

    model_assignments = {
        0: {"model_id": "facebook/detr-resnet-50"},
        1: {"model_id": "Salesforce/blip-image-captioning-base"}
    }

    # Mock execution results
    from dataclasses import dataclass

    @dataclass
    class MockResult:
        task_id: int
        task_type: str
        status: str
        result: Any
        inference_time: float
        model_used: str
        error: Optional[str] = None
        resource_path: Optional[str] = None

    execution_results = {
        0: MockResult(
            task_id=0,
            task_type="object-detection",
            status="success",
            result={
                "detections": [
                    {"label": "cat", "score": 0.95},
                    {"label": "couch", "score": 0.87}
                ],
                "count": 2
            },
            inference_time=2.3,
            model_used="facebook/detr-resnet-50",
            resource_path="outputs/task_0_data.json"
        ),
        1: MockResult(
            task_id=1,
            task_type="image-to-text",
            status="success",
            result="a cat sitting on a couch",
            inference_time=1.5,
            model_used="Salesforce/blip-image-captioning-base",
            resource_path="outputs/task_1_text.txt"
        )
    }

    # Initialize response generator
    generator = ResponseGenerator(openrouter_api_key= os.getenv("OPENROUNTER_API_KEY"))  # Use rule-based for demo

    # Generate response
    final_response = generator.generate_final_response(
        user_input=user_input,
        tasks=tasks,
        model_assignments=model_assignments,
        execution_results=execution_results,
        use_llm=False
    )

    # Display
    generator.display_final_response(final_response)

    return generator, final_response

# ============================================================================
# JUPYTER STARTUP
# ============================================================================

if __name__ == "__main__" or "ipykernel" in __import__("sys").modules:
    print("="*70)
    print("🎨 HUGGINGGPT RESPONSE GENERATION - STAGE 4")
    print("="*70)
    print("\n📋 Test response generation:")
    print("\n   generator, response = await test_response_generation()")
    print("\n" + "="*70)

🎨 HUGGINGGPT RESPONSE GENERATION - STAGE 4

📋 Test response generation:

   generator, response = await test_response_generation()



In [8]:
generator, response = await test_response_generation()

🧪 TESTING RESPONSE GENERATION
🎨 Response Generator initialized with LLM

🎨 STAGE 4: RESPONSE GENERATION
📝 Generating response with rules...

✅ Response generated (confidence: high)

                           🎯 FINAL RESPONSE                           

📝 User Request:
   Can you detect objects in my image and tell me what you see?

💬 Response:
----------------------------------------------------------------------
   Based on your request: 'Can you detect objects in my image and tell me what you see?', here's what I found:
   
   🔍 Object Detection: I found 2 objects in the image:
      • cat (confidence: 95.0%)
      • couch (confidence: 87.0%)
      📁 Saved to: outputs/task_0_data.json
   
   💬 Image Caption: a cat sitting on a couch
      📁 Saved to: outputs/task_1_text.txt
   
   
   ✅ Confidence Level: HIGH
      Successfully completed 2/2 tasks
   
----------------------------------------------------------------------

📊 Summary:
   • Planned 2 tasks: object-detection, image-to-t