In [None]:
import os
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
import asyncio
import json
import logging
from datetime import datetime
from typing import Dict, List, Any, Optional
import re

from app.mcp import mcp_tool

logger = logging.getLogger(__name__)
load_dotenv()




  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'app'

In [1]:
async def query_decomposer_tool(tool_input: dict, context: dict) -> dict:
    """
    Uses OpenAI GPT-4o to decompose complex queries into subqueries and returns prioritized list.
    
    Args:
        tool_input: {"query": str, "max_subqueries": int}
        context: Additional context information
    """
    try:
        import openai
        from app.config import config
        
        query = tool_input.get("query", "")
        max_subqueries = tool_input.get("max_subqueries", 5)
        
        if not query:
            return {"status": "error", "error": "Query is required", "meta": {}}
        
        # Initialize OpenAI client
        client = openai.AsyncOpenAI(api_key=config.OPENAI_API_KEY)
        
        # Create prompt for intelligent query decomposition
        prompt = f"""
You are an expert research analyst tasked with breaking down complex research questions into manageable sub-questions for systematic investigation.

Main Research Query: {query}

Your task is to decompose this query into {max_subqueries} focused sub-questions that will enable comprehensive research. Each sub-question should:

1. Address a specific aspect of the main query
2. Be researchable through academic sources, reports, and reliable data
3. Build towards answering the main question
4. Be prioritized by importance and logical sequence

For each sub-question, provide:
- The specific sub-question
- Priority level (1 = highest priority, {max_subqueries} = lowest)
- Rationale for why this sub-question is important
- Expected information type (data, analysis, case studies, etc.)

Format your response as a JSON array:
[
  {{
    "subquery": "Specific focused research question",
    "priority": 1,
    "rationale": "Why this question is important for the overall research",
    "expected_info_type": "data|analysis|case_studies|policy_review|literature_review",
    "research_scope": "global|regional|national|sectoral"
  }}
]

Generate exactly {max_subqueries} sub-questions, ordered by priority.
"""

        try:
            # Call OpenAI API
            response = await client.chat.completions.create(
                model=config.DECOMPOSITION_MODEL,
                messages=[
                    {"role": "system", "content": "You are an expert research analyst specializing in breaking down complex research questions into systematic investigation plans."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=1500
            )
            
            # Parse the response
            response_content = response.choices[0].message.content
            
            # Extract JSON from the response
            import json
            json_match = re.search(r'\[.*\]', response_content, re.DOTALL)
            if json_match:
                subqueries_data = json.loads(json_match.group())
            else:
                subqueries_data = json.loads(response_content)
            
            # Validate and process the subqueries
            subqueries = []
            for sq_data in subqueries_data:
                if isinstance(sq_data, dict) and "subquery" in sq_data:
                    subqueries.append({
                        "subquery": sq_data["subquery"],
                        "priority": sq_data.get("priority", len(subqueries) + 1),
                        "rationale": sq_data.get("rationale", ""),
                        "expected_info_type": sq_data.get("expected_info_type", "analysis"),
                        "research_scope": sq_data.get("research_scope", "global")
                    })
            
            # Sort by priority
            subqueries.sort(key=lambda x: x["priority"])
            
            return {
                "status": "ok",
                "result": {
                    "original_query": query,
                    "subqueries": subqueries,
                    "total_subqueries": len(subqueries)
                },
                "meta": {
                    "decomposition_method": "openai_gpt4o",
                    "model_used": config.DECOMPOSITION_MODEL,
                    "prompt_tokens": response.usage.prompt_tokens if hasattr(response, 'usage') else 0,
                    "completion_tokens": response.usage.completion_tokens if hasattr(response, 'usage') else 0
                }
            }
            
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse LLM response as JSON: {e}")
            return {
                "status": "error",
                "error": f"Failed to parse decomposition response: {str(e)}",
                "meta": {}
            }
            
        except Exception as e:
            logger.error(f"OpenAI API call failed: {e}")
            return {
                "status": "error",
                "error": f"Query decomposition failed: {str(e)}",
                "meta": {}
            }
        
    except Exception as e:
        logger.error(f"Query decomposition failed: {e}")
        return {
            "status": "error",
            "error": f"Query decomposition failed: {str(e)}",
            "meta": {}
        }


In [2]:
if __name__ == "__main__":
    async def main():
        tool_input = {
            "query": "How can artificial intelligence improve sustainable agriculture in developing countries?",
            "max_subqueries": 5
        }
        context = {}

        result = await query_decomposer_tool(tool_input, context)
        print(json.dumps(result, indent=2))

    asyncio.run(main())

NameError: name 'asyncio' is not defined