In [1]:
pip install crewai

Collecting docstring-parser<0.16,>=0.15 (from instructor<0.6.0,>=0.5.2->crewai)
  Using cached docstring_parser-0.15-py3-none-any.whl.metadata (2.4 kB)
Collecting typer<0.10.0,>=0.9.0 (from instructor<0.6.0,>=0.5.2->crewai)
  Using cached typer-0.9.4-py3-none-any.whl.metadata (14 kB)
Collecting tiktoken<0.6.0,>=0.5.2 (from langchain-openai<0.0.6,>=0.0.5->crewai)
  Using cached tiktoken-0.5.2-cp312-cp312-win_amd64.whl.metadata (6.8 kB)
Using cached docstring_parser-0.15-py3-none-any.whl (36 kB)
Using cached tiktoken-0.5.2-cp312-cp312-win_amd64.whl (785 kB)
Using cached typer-0.9.4-py3-none-any.whl (45 kB)
Installing collected packages: docstring-parser, typer, tiktoken

  Attempting uninstall: docstring-parser

    Found existing installation: docstring_parser 0.16

    Uninstalling docstring_parser-0.16:

      Successfully uninstalled docstring_parser-0.16

   ---------------------------------------- 0/3 [docstring-parser]
   ---------------------------------------- 0/3 [docstring-par

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
docling 2.28.4 requires typer<0.13.0,>=0.12.5, but you have typer 0.9.4 which is incompatible.
docling-core 2.25.0 requires typer<0.13.0,>=0.12.5, but you have typer 0.9.4 which is incompatible.
fastapi-cli 0.0.7 requires typer>=0.12.3, but you have typer 0.9.4 which is incompatible.
mistral-common 1.5.4 requires tiktoken>=0.7.0, but you have tiktoken 0.5.2 which is incompatible.
swarms 6.0.9 requires docstring_parser==0.16, but you have docstring-parser 0.15 which is incompatible.
swarms 6.0.9 requires pydantic==2.8.2, but you have pydantic 2.11.4 which is incompatible.
vllm 0.8.5.post1 requires tiktoken>=0.6.0, but you have tiktoken 0.5.2 which is incompatible.


In [14]:
from crewai import Agent, Task, Crew
from langchain_community.llms import Ollama
from langchain.tools import Tool
import base64
import requests
import os
from typing import Dict, Any

# === STEP 1: Enhanced Multimodal Tool with Context ===

class ContextualMultimodalTool:
    def __init__(self, ollama_base_url="http://localhost:11434"):
        self.ollama_base_url = ollama_base_url
        self.context_data = {}  # Store analysis context
    
    def encode_image_to_base64(self, image_path: str) -> str:
        """Convert image to base64 string for Ollama"""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    
    def analyze_with_context(self, image_path: str, prompt: str, analysis_type: str = "general") -> str:
        """Analyze image with contextual prompting"""
        try:
            # Handle CrewAI argument passing
            if isinstance(image_path, dict):
                if 'tool_input' in image_path:
                    image_path = image_path['tool_input']
                else:
                    return "Error: Invalid image path format"
            
            # Clean path
            if isinstance(image_path, str):
                image_path = image_path.strip().strip('"').strip("'")
            
            # Check if file exists
            if not os.path.exists(image_path):
                return f"Error: Image file not found at path: {image_path}"
            
            # Get stored context if available
            user_prompt = self.context_data.get('user_prompt', '')
            previous_analysis = self.context_data.get('previous_analysis', {})
            
            # Enhance prompt with context
            contextual_prompt = self._build_contextual_prompt(prompt, user_prompt, previous_analysis, analysis_type)
            
            # Encode image
            image_b64 = self.encode_image_to_base64(image_path)
            
            # Make request to Ollama
            response = requests.post(
                f"{self.ollama_base_url}/api/generate",
                json={
                    "model": "llava:7b",
                    "prompt": contextual_prompt,
                    "images": [image_b64],
                    "stream": False
                },
                timeout=90
            )
            
            if response.status_code == 200:
                result = response.json()
                analysis_result = result.get("response", "No response received")
                
                # Store analysis for context
                self.context_data['previous_analysis'][analysis_type] = analysis_result
                
                return analysis_result
            else:
                return f"Error: {response.status_code} - {response.text}"
                
        except Exception as e:
            return f"Error processing image: {str(e)}"
    
    def _build_contextual_prompt(self, base_prompt: str, user_prompt: str, previous_analysis: Dict[str, str], analysis_type: str) -> str:
        """Build a contextual prompt incorporating user intent and previous analysis"""
        
        context_parts = [base_prompt]
        
        if user_prompt:
            context_parts.append(f"\nUSER'S REQUEST/CONTEXT: {user_prompt}")
            context_parts.append("Consider the user's specific request when analyzing this image.")
        
        if previous_analysis:
            context_parts.append("\nPREVIOUS ANALYSIS CONTEXT:")
            for analysis_key, analysis_value in previous_analysis.items():
                if analysis_key != analysis_type:  # Don't include current analysis type
                    context_parts.append(f"- {analysis_key.upper()}: {analysis_value[:200]}...")
        
        # Add specific instructions based on analysis type
        if analysis_type == "visual":
            context_parts.append("\nFocus on visual elements that might be relevant to the user's request.")
        elif analysis_type == "emotional":
            context_parts.append("\nPay special attention to emotional cues that relate to the user's context.")
        elif analysis_type == "contextual":
            context_parts.append("\nProvide insights that directly address the user's needs and situation.")
        
        return "\n".join(context_parts)
    
    def set_user_context(self, user_prompt: str):
        """Set the user's prompt for contextual analysis"""
        self.context_data['user_prompt'] = user_prompt
        self.context_data['previous_analysis'] = {}

# Initialize the contextual tool
contextual_tool = ContextualMultimodalTool()

# === STEP 2: Enhanced Analysis Functions ===

def analyze_visual_context(image_path: str) -> str:
    """Analyze visual content with user context"""
    return contextual_tool.analyze_with_context(
        image_path,
        """Analyze this image comprehensively, focusing on:
        1. People present (age, gender, appearance, clothing)
        2. Environment and setting (location, lighting, objects)
        3. Activities or situations depicted
        4. Visual elements that might be relevant to understanding the context
        5. Any text, signs, or written content visible
        Provide detailed observations that could help understand the user's situation.""",
        "visual"
    )

def analyze_emotional_context(image_path: str) -> str:
    """Analyze emotional state with user context"""
    return contextual_tool.analyze_with_context(
        image_path,
        """Perform a detailed emotional and psychological analysis:
        1. Facial expressions and micro-expressions
        2. Body language and posture
        3. Emotional state indicators (happy, sad, stressed, calm, etc.)
        4. Social dynamics if multiple people are present
        5. Environmental factors affecting mood
        6. Energy level and engagement
        Consider how this emotional context relates to what the user might need or want.""",
        "emotional"
    )

def analyze_situational_context(image_path: str) -> str:
    """Analyze the overall situation with full context"""
    return contextual_tool.analyze_with_context(
        image_path,
        """Provide a comprehensive situational analysis:
        1. What is happening in this scene?
        2. What might have led to this situation?
        3. What are the social, emotional, or practical implications?
        4. What kind of response or assistance might be appropriate?
        5. How does this relate to the user's expressed needs or context?
        Synthesize all visual and emotional information to provide actionable insights.""",
        "contextual"
    )

# === STEP 3: Create Enhanced Tools ===

visual_context_tool = Tool(
    name="VisualContextAnalyzer",
    description="Analyze visual content of an image with consideration for user context and needs.",
    func=lambda x: analyze_visual_context(x)
)

emotional_context_tool = Tool(
    name="EmotionalContextAnalyzer",
    description="Analyze emotional states and psychological context with user needs in mind.",
    func=lambda x: analyze_emotional_context(x)
)

situational_context_tool = Tool(
    name="SituationalContextAnalyzer",
    description="Analyze the overall situation and provide contextual insights based on user needs.",
    func=lambda x: analyze_situational_context(x)
)

# === STEP 4: Set up LLM ===
text_llm = Ollama(
    model="llava:7b",
    base_url="http://localhost:11434"
)

# === STEP 5: Define Context-Aware Agents ===

visual_analyst = Agent(
    role="Contextual Visual Analyst",
    goal="Extract comprehensive visual information while considering the user's specific needs and context",
    backstory=(
        "You are an expert visual analyst who doesn't just describe what you see, but understands "
        "how visual information relates to human needs, emotions, and situations. You consider the "
        "user's specific request and provide relevant insights that help address their concerns."
    ),
    tools=[visual_context_tool],
    verbose=True,
    llm=text_llm
)

emotion_analyst = Agent(
    role="Contextual Emotion Specialist",
    goal="Understand emotional states and their implications for the user's specific situation",
    backstory=(
        "You are a psychology expert who specializes in understanding not just what emotions are "
        "present, but how they relate to the user's specific context and needs. You provide "
        "emotional insights that are actionable and relevant to the user's request."
    ),
    tools=[emotional_context_tool],
    verbose=True,
    llm=text_llm
)

contextual_advisor = Agent(
    role="Contextual Situation Advisor",
    goal="Synthesize all information to provide targeted advice and insights for the user's specific needs",
    backstory=(
        "You are an expert advisor who combines visual analysis, emotional understanding, and "
        "situational awareness to provide comprehensive, actionable advice. You understand how "
        "to connect image analysis with user intent to provide meaningful, helpful responses."
    ),
    tools=[situational_context_tool],
    verbose=True,
    llm=text_llm
)

response_synthesizer = Agent(
    role="Intelligent Response Synthesizer",
    goal="Create comprehensive, contextually-aware responses that directly address the user's needs",
    backstory=(
        "You are a master synthesizer who combines all available information - visual analysis, "
        "emotional insights, situational context, and user intent - to create responses that are "
        "not only accurate but truly helpful and relevant to what the user actually needs."
    ),
    tools=[],
    verbose=True,
    llm=text_llm
)

# === STEP 6: Define Contextual Tasks ===

def create_contextual_tasks(image_path: str, user_prompt: str):
    """Create tasks that incorporate user context"""
    
    visual_task = Task(
        description=f"""
        Analyze the image at '{image_path}' with the following user context in mind:
        USER REQUEST: "{user_prompt}"
        
        Use the VisualContextAnalyzer tool to extract visual information that is relevant to 
        the user's request. Focus on details that might help understand or address their needs.
        """,
        agent=visual_analyst,
        expected_output="Detailed visual analysis relevant to the user's context and needs"
    )
    
    emotional_task = Task(
        description=f"""
        Analyze the emotional context of the person(s) in the image at '{image_path}'.
        USER REQUEST: "{user_prompt}"
        
        Use the EmotionalContextAnalyzer tool to understand emotional states and how they 
        relate to the user's specific situation or request.
        """,
        agent=emotion_analyst,
        expected_output="Emotional analysis that considers the user's context and needs"
    )
    
    situational_task = Task(
        description=f"""
        Analyze the overall situation in the image at '{image_path}' considering:
        USER REQUEST: "{user_prompt}"
        
        Use the SituationalContextAnalyzer tool to provide comprehensive situational insights
        that directly relate to what the user is asking about or needs help with.
        """,
        agent=contextual_advisor,
        expected_output="Comprehensive situational analysis addressing user needs"
    )
    
    synthesis_task = Task(
        description=f"""
        Based on all previous analyses (visual, emotional, and situational), create a comprehensive 
        response that directly addresses the user's request:
        USER REQUEST: "{user_prompt}"
        
        Synthesize all information to provide:
        1. A clear understanding of the situation in the image
        2. How it relates to the user's specific request or needs
        3. Actionable insights, recommendations, or answers
        4. Any additional context that would be helpful
        
        Make your response practical, empathetic, and directly useful to the user.
        """,
        agent=response_synthesizer,
        expected_output="Comprehensive, contextually-aware response addressing the user's specific needs",
        context=[visual_task, emotional_task, situational_task]
    )
    
    return [visual_task, emotional_task, situational_task, synthesis_task]

# === STEP 7: Main Analysis Function ===

def analyze_with_user_context(image_path: str, user_prompt: str):
    """Run contextual multimodal analysis with user prompt"""
    
    # Verify image exists
    if not os.path.exists(image_path):
        print(f"Error: Image file '{image_path}' not found!")
        return
    
    # Set user context in the tool
    contextual_tool.set_user_context(user_prompt)
    
    print(f"🔍 Analyzing image with user context:")
    print(f"📁 Image: {image_path}")
    print(f"💭 User Request: {user_prompt}")
    print("=" * 80)
    
    # Create contextual tasks
    tasks = create_contextual_tasks(image_path, user_prompt)
    
    # Create and run crew
    crew = Crew(
        agents=[visual_analyst, emotion_analyst, contextual_advisor, response_synthesizer],
        tasks=tasks,
        verbose=True,
        process="sequential"
    )
    
    # Execute analysis
    result = crew.kickoff()
    
    print("\n" + "=" * 80)
    print("🧠 CONTEXTUAL ANALYSIS RESULTS:")
    print("=" * 80)
    print(result)
    
    return result

# === STEP 8: Usage Examples ===

if __name__ == "__main__":
    
    # Example 1: Elderly care context
    image_path = r"C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png"
    user_prompt = "Hi, how are you?"
    
    result1 = analyze_with_user_context(image_path, user_prompt)
    
    # Save results with context
    with open("contextual_analysis.txt", "w", encoding="utf-8") as f:
        f.write(f"USER REQUEST: {user_prompt}\n")
        f.write(f"IMAGE: {image_path}\n")
        f.write("=" * 50 + "\n")
        f.write(str(result1))
    
    print("\n✅ Contextual analysis complete! Results saved to 'contextual_analysis.txt'")
    
    # Additional examples you can try:
    """
    # Example 2: Social interaction context
    user_prompt = "I want to improve my social interactions. What can you tell me about the social dynamics in this image?"
    
    # Example 3: Mental health context  
    user_prompt = "I'm concerned about signs of depression or anxiety. What emotional indicators do you see in this image?"
    
    # Example 4: Professional context
    user_prompt = "I'm preparing for a job interview. Can you help me understand what professional impression this image conveys?"
    
    # Example 5: Educational context
    user_prompt = "I'm studying human behavior. Can you help me analyze the behavioral patterns and social cues in this image?"
    """

Overriding of current TracerProvider is not allowed


🔍 Analyzing image with user context:
📁 Image: C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png
💭 User Request: Hi, how are you?
[DEBUG]: Working Agent: Contextual Visual Analyst
[INFO]: Starting Task: 
        Analyze the image at 'C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png' with the following user context in mind:
        USER REQUEST: "Hi, how are you?"
        
        Use the VisualContextAnalyzer tool to extract visual information that is relevant to 
        the user's request. Focus on details that might help understand or address their needs.
        


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3m Thought: Do I need to use a tool? Yes
Action: Delegate work to co-worker
Action Input: role="Contextual Emotion Specialist", task="analyze emotional content of image based on user request", context="user asked 'how are you?'. The image shows an elderly person sitting in front of a window with 

In [16]:
from crewai import Agent, Task, Crew
from langchain_community.llms import Ollama
from langchain.tools import Tool
import base64
import requests
import os
import asyncio
import aiohttp
from concurrent.futures import ThreadPoolExecutor
import threading
from typing import Dict, Any

# === OPTIMIZATION 1: Async Multimodal Tool ===

class OptimizedMultimodalTool:
    def __init__(self, ollama_base_url="http://localhost:11434"):
        self.ollama_base_url = ollama_base_url
        self.context_data = {}
        self._lock = threading.Lock()
    
    def encode_image_to_base64_fast(self, image_path: str) -> str:
        """Optimized image encoding"""
        try:
            with open(image_path, "rb") as f:
                return base64.b64encode(f.read()).decode('utf-8')
        except Exception as e:
            return None
    
    def single_comprehensive_analysis(self, image_path: str, user_prompt: str = "") -> Dict[str, str]:
        """Single API call for all analysis types - MUCH FASTER"""
        try:
            # Handle CrewAI argument formats
            if isinstance(image_path, dict):
                image_path = image_path.get('tool_input', image_path)
            
            image_path = str(image_path).strip().strip('"').strip("'")
            
            if not os.path.exists(image_path):
                return {"error": f"Image not found: {image_path}"}
            
            # Encode image once
            image_b64 = self.encode_image_to_base64_fast(image_path)
            if not image_b64:
                return {"error": "Failed to encode image"}
            
            # Comprehensive prompt for single analysis
            comprehensive_prompt = f"""
Analyze this image comprehensively and provide a structured response covering ALL aspects:

USER CONTEXT: {user_prompt if user_prompt else "General analysis requested"}

Please provide analysis in the following structure:

**VISUAL ANALYSIS:**
- People: (age, gender, appearance, expressions, posture)
- Environment: (setting, location, lighting, objects)
- Activities: (what's happening, interactions)
- Notable details: (text, signs, important elements)

**EMOTIONAL ANALYSIS:**
- Primary emotions visible: (happy, sad, calm, stressed, etc.)
- Body language indicators:
- Facial expression details:
- Energy level and engagement:
- Social dynamics (if multiple people):

**CONTEXTUAL INSIGHTS:**
- Situation assessment: (what's happening and why)
- Relationship to user context:
- Practical implications:
- Recommended responses or actions:

**SYNTHESIS:**
- Key takeaways relevant to user needs:
- Actionable recommendations:
- Additional considerations:

Be thorough but concise. Focus on elements most relevant to the user's context.
"""
            
            # Single API call
            response = requests.post(
                f"{self.ollama_base_url}/api/generate",
                json={
                    "model": "llava:7b",
                    "prompt": comprehensive_prompt,
                    "images": [image_b64],
                    "stream": False,
                    "options": {
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "num_predict": 800  # Limit response length for speed
                    }
                },
                timeout=120
            )
            
            if response.status_code == 200:
                result = response.json().get("response", "")
                
                # Parse structured response (simple parsing)
                sections = {
                    "visual": self._extract_section(result, "VISUAL ANALYSIS"),
                    "emotional": self._extract_section(result, "EMOTIONAL ANALYSIS"), 
                    "contextual": self._extract_section(result, "CONTEXTUAL INSIGHTS"),
                    "synthesis": self._extract_section(result, "SYNTHESIS"),
                    "full_response": result
                }
                
                return sections
            else:
                return {"error": f"API Error: {response.status_code}"}
                
        except Exception as e:
            return {"error": f"Analysis failed: {str(e)}"}
    
    def _extract_section(self, text: str, section_name: str) -> str:
        """Extract specific section from structured response"""
        try:
            start_marker = f"**{section_name}:**"
            start_idx = text.find(start_marker)
            if start_idx == -1:
                return "Section not found"
            
            start_idx += len(start_marker)
            
            # Find next section or end
            next_section = text.find("**", start_idx)
            if next_section == -1:
                return text[start_idx:].strip()
            else:
                return text[start_idx:next_section].strip()
        except:
            return "Parsing error"

# === OPTIMIZATION 2: Fast Analysis Functions ===

optimized_tool = OptimizedMultimodalTool()

def fast_comprehensive_analysis(image_path: str) -> str:
    """Single fast analysis call"""
    user_prompt = getattr(fast_comprehensive_analysis, 'user_context', '')
    result = optimized_tool.single_comprehensive_analysis(image_path, user_prompt)
    
    if "error" in result:
        return result["error"]
    
    return result.get("full_response", "Analysis completed")

def get_visual_analysis(image_path: str) -> str:
    """Extract visual section from cached analysis"""
    result = optimized_tool.single_comprehensive_analysis(image_path, 
                                                        getattr(get_visual_analysis, 'user_context', ''))
    return result.get("visual", "Visual analysis not available")

def get_emotional_analysis(image_path: str) -> str:
    """Extract emotional section from cached analysis"""
    result = optimized_tool.single_comprehensive_analysis(image_path,
                                                         getattr(get_emotional_analysis, 'user_context', ''))
    return result.get("emotional", "Emotional analysis not available")

# === OPTIMIZATION 3: Streamlined Tools ===

comprehensive_tool = Tool(
    name="ComprehensiveAnalyzer",
    description="Perform complete visual, emotional, and contextual analysis in one fast operation.",
    func=lambda x: fast_comprehensive_analysis(x)
)

# === OPTIMIZATION 4: Faster LLM Setup ===
fast_llm = Ollama(
    model="llava:7B",  # Keep fast text model
    base_url="http://localhost:11434",
    # Optimize for speed
    temperature=0.7,
    num_predict=200,  # Shorter responses
    top_p=0.9
)

# === OPTIMIZATION 5: Streamlined Single Agent ===

master_analyst = Agent(
    role="Master Multimodal Analyst",
    goal="Provide fast, comprehensive analysis combining visual, emotional, and contextual insights",
    backstory=(
        "You are an expert analyst who can quickly process complex multimodal information "
        "and provide comprehensive insights efficiently. You understand user context and "
        "provide relevant, actionable analysis without unnecessary elaboration."
    ),
    tools=[comprehensive_tool],
    verbose=True,
    llm=fast_llm
)

# === OPTIMIZATION 6: Single Task Approach ===

def create_fast_task(image_path: str, user_prompt: str):
    """Create single comprehensive task"""
    return Task(
        description=f"""
        Perform a comprehensive multimodal analysis of the image at '{image_path}'.
        
        USER CONTEXT: {user_prompt}
        
        Use the ComprehensiveAnalyzer tool to get complete analysis, then:
        1. Summarize the key findings relevant to the user's needs
        2. Provide actionable insights and recommendations  
        3. Focus on what matters most for the user's context
        4. Keep response concise but comprehensive
        
        Be efficient and direct while maintaining thoroughness.
        """,
        agent=master_analyst,
        expected_output="Comprehensive analysis with actionable insights relevant to user context"
    )

# === OPTIMIZATION 7: Fast Analysis Function ===

def fast_analyze_with_context(image_path: str, user_prompt: str):
    """Optimized fast analysis with user context"""
    
    if not os.path.exists(image_path):
        print(f"❌ Error: Image file '{image_path}' not found!")
        return
    
    # Set user context for tools
    fast_comprehensive_analysis.user_context = user_prompt
    get_visual_analysis.user_context = user_prompt
    get_emotional_analysis.user_context = user_prompt
    
    print(f"🚀 Fast Analysis Starting...")
    print(f"📁 Image: {os.path.basename(image_path)}")
    print(f"💭 Context: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
    print("=" * 60)
    
    # Create single task
    task = create_fast_task(image_path, user_prompt)
    
    # Create streamlined crew
    crew = Crew(
        agents=[master_analyst],
        tasks=[task],
        verbose=False,  # Disable verbose for speed
        process="sequential"
    )
    
    # Execute with timing
    import time
    start_time = time.time()
    
    result = crew.kickoff()
    
    end_time = time.time()
    execution_time = end_time - start_time
    
    print(f"\n⚡ FAST ANALYSIS RESULTS (completed in {execution_time:.2f}s):")
    print("=" * 60)
    print(result)
    
    return result

# === OPTIMIZATION 8: Batch Processing (if multiple images) ===

def batch_analyze_images(image_paths: list, user_prompts: list):
    """Analyze multiple images efficiently"""
    results = []
    
    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = [
            executor.submit(fast_analyze_with_context, img_path, prompt)
            for img_path, prompt in zip(image_paths, user_prompts)
        ]
        
        for future in futures:
            try:
                result = future.result(timeout=180)
                results.append(result)
            except Exception as e:
                results.append(f"Error: {e}")
    
    return results

# === OPTIMIZATION 9: Usage with Performance Tips ===

def optimize_ollama_performance():
    """Tips for optimizing Ollama performance"""
    print("""
🔧 OLLAMA PERFORMANCE OPTIMIZATION TIPS:

1. Use smaller models for speed:
   ollama pull llava:7b  (instead of 13b or 34b)

2. Set environment variables for better performance:
   export OLLAMA_NUM_PARALLEL=2
   export OLLAMA_MAX_LOADED_MODELS=2

3. Increase memory allocation:
   export OLLAMA_HOST=0.0.0.0:11434
   export OLLAMA_ORIGINS=*

4. Use GPU acceleration if available:
   Make sure CUDA/ROCm is properly installed

5. Keep models loaded:
   Run: ollama run llava:7b (keeps model in memory)
""")

if __name__ == "__main__":
    
    # Performance tips
    optimize_ollama_performance()
    
    # Fast analysis example
    image_path = r"C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png"
    user_prompt = "I'm worried about my elderly parent's wellbeing. What can you tell me about their current state?"
    
    # Run fast analysis
    result = fast_analyze_with_context(image_path, user_prompt)
    
    # Save results
    with open("fast_analysis_result.txt", "w", encoding="utf-8") as f:
        f.write(f"USER PROMPT: {user_prompt}\n")
        f.write(f"IMAGE: {image_path}\n")
        f.write("=" * 50 + "\n")
        f.write(str(result))
    
    print(f"\n✅ Fast analysis complete! Results saved to 'fast_analysis_result.txt'")
    
    # Example of batch processing
    """
    # Batch example:
    image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"]
    user_prompts = ["Analyze emotions", "Check safety", "Assess wellbeing"]
    batch_results = batch_analyze_images(image_paths, user_prompts)
    """

Overriding of current TracerProvider is not allowed



🔧 OLLAMA PERFORMANCE OPTIMIZATION TIPS:

1. Use smaller models for speed:
   ollama pull llava:7b  (instead of 13b or 34b)

2. Set environment variables for better performance:
   export OLLAMA_NUM_PARALLEL=2
   export OLLAMA_MAX_LOADED_MODELS=2

3. Increase memory allocation:
   export OLLAMA_HOST=0.0.0.0:11434
   export OLLAMA_ORIGINS=*

4. Use GPU acceleration if available:
   Make sure CUDA/ROCm is properly installed

5. Keep models loaded:
   Run: ollama run llava:7b (keeps model in memory)

🚀 Fast Analysis Starting...
📁 Image: elderly.png
💭 Context: I'm worried about my elderly parent's wellbeing. What can you tell me about their current state?


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3m Thought: Do I need to use a tool? No
Final Answer: To perform a comprehensive multimodal analysis of the image 'C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png' in the given user context, I will manually analyze the visual, emotional, and 

In [19]:
from crewai import Agent, Task, Crew
from langchain_community.llms import Ollama
from langchain.tools import Tool
import base64
import requests
import os
import threading
from typing import Dict, Any, List
import json
import time

class ContextualMultimodalAnalyzer:
    def __init__(self, ollama_base_url="http://localhost:11434"):
        self.ollama_base_url = ollama_base_url
        self.analysis_cache = {}
        self._lock = threading.Lock()
    
    def encode_image_to_base64(self, image_path: str) -> str:
        """Encode image to base64"""
        try:
            with open(image_path, "rb") as f:
                return base64.b64encode(f.read()).decode('utf-8')
        except Exception as e:
            return None
    
    def contextual_visual_analysis(self, image_path: str, user_context: str) -> Dict[str, Any]:
        """Context-aware visual analysis that focuses on user's specific needs"""
        try:
            # Handle CrewAI argument formats
            if isinstance(image_path, dict):
                image_path = image_path.get('tool_input', image_path)
            
            image_path = str(image_path).strip().strip('"').strip("'")
            
            if not os.path.exists(image_path):
                return {"error": f"Image not found: {image_path}"}
            
            # Encode image
            image_b64 = self.encode_image_to_base64(image_path)
            if not image_b64:
                return {"error": "Failed to encode image"}
            
            # Create context-specific analysis prompt
            context_prompt = self._create_contextual_prompt(user_context)
            
            # API call with contextual focus
            response = requests.post(
                f"{self.ollama_base_url}/api/generate",
                json={
                    "model": "llava:7b",
                    "prompt": context_prompt,
                    "images": [image_b64],
                    "stream": False,
                    "options": {
                        "temperature": 0.3,  # Lower temp for more focused responses
                        "top_p": 0.8,
                        "num_predict": 600
                    }
                },
                timeout=120
            )
            
            if response.status_code == 200:
                analysis_result = response.json().get("response", "")
                
                # Parse and structure the contextual response
                structured_result = self._parse_contextual_response(analysis_result, user_context)
                
                return {
                    "success": True,
                    "contextual_analysis": structured_result,
                    "raw_response": analysis_result,
                    "user_context": user_context
                }
            else:
                return {"error": f"API Error: {response.status_code}"}
                
        except Exception as e:
            return {"error": f"Analysis failed: {str(e)}"}
    
    def _create_contextual_prompt(self, user_context: str) -> str:
        """Create a focused prompt based on user context"""
        
        # Analyze user context to determine focus areas
        context_lower = user_context.lower()
        
        # Define context categories and their specific analysis needs
        context_categories = {
            'health_wellbeing': ['health', 'wellbeing', 'sick', 'tired', 'medical', 'elderly', 'care'],
            'safety_security': ['safe', 'danger', 'security', 'risk', 'accident', 'emergency'],
            'emotional_state': ['emotion', 'mood', 'happy', 'sad', 'stress', 'anxiety', 'feeling'],
            'behavior_activity': ['behavior', 'activity', 'doing', 'action', 'work', 'play'],
            'relationship_social': ['relationship', 'family', 'friend', 'social', 'interaction'],
            'environment_setting': ['environment', 'place', 'location', 'setting', 'home', 'work']
        }
        
        # Determine primary focus based on user context
        detected_categories = []
        for category, keywords in context_categories.items():
            if any(keyword in context_lower for keyword in keywords):
                detected_categories.append(category)
        
        # Create targeted prompt based on detected categories
        base_prompt = f"""
USER REQUEST: {user_context}

Analyze this image specifically to address the user's concern. Focus your analysis on what the user is asking about.

ANALYSIS FRAMEWORK:
"""
        
        if 'health_wellbeing' in detected_categories:
            base_prompt += """
1. HEALTH & WELLBEING ASSESSMENT:
   - Physical appearance and posture indicators
   - Signs of fatigue, illness, or vitality
   - Mobility and physical capabilities observed
   - Environmental factors affecting health
   
"""
        
        if 'safety_security' in detected_categories:
            base_prompt += """
2. SAFETY & SECURITY EVALUATION:
   - Potential hazards or risks visible
   - Safety measures in place
   - Environmental dangers
   - Emergency considerations
   
"""
        
        if 'emotional_state' in detected_categories:
            base_prompt += """
3. EMOTIONAL & PSYCHOLOGICAL STATE:
   - Facial expressions and body language
   - Signs of stress, comfort, or distress
   - Social engagement levels
   - Emotional indicators
   
"""
        
        if 'behavior_activity' in detected_categories:
            base_prompt += """
4. BEHAVIOR & ACTIVITY ANALYSIS:
   - Current activities and engagement
   - Behavioral patterns observed
   - Interaction with environment
   - Activity appropriateness
   
"""
        
        # Add contextual reasoning section
        base_prompt += f"""
CONTEXTUAL REASONING:
- How does what you observe in the image relate to the user's specific concern: "{user_context}"?
- What evidence supports or contradicts potential concerns?
- What recommendations would you make based on this specific context?

DIRECT RESPONSE:
Provide a direct, focused answer to the user's question based on visual evidence from the image. Be specific about what you can and cannot determine from the visual information available.

Remember: The user is asking about "{user_context}" - make sure your analysis directly addresses this concern.
"""
        
        return base_prompt
    
    def _parse_contextual_response(self, response: str, user_context: str) -> Dict[str, str]:
        """Parse and structure the contextual response"""
        
        # Simple parsing to extract key sections
        sections = {
            "direct_response": "",
            "observations": "",
            "contextual_insights": "",
            "recommendations": "",
            "confidence_level": ""
        }
        
        try:
            # Look for key sections in the response
            lines = response.split('\n')
            current_section = "direct_response"
            
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                
                # Identify section headers
                if any(keyword in line.lower() for keyword in ['direct response', 'answer', 'conclusion']):
                    current_section = "direct_response"
                elif any(keyword in line.lower() for keyword in ['observation', 'visual', 'see', 'notice']):
                    current_section = "observations"
                elif any(keyword in line.lower() for keyword in ['context', 'reasoning', 'insight']):
                    current_section = "contextual_insights"
                elif any(keyword in line.lower() for keyword in ['recommend', 'suggest', 'advice']):
                    current_section = "recommendations"
                elif any(keyword in line.lower() for keyword in ['confidence', 'certain', 'unclear']):
                    current_section = "confidence_level"
                else:
                    # Add content to current section
                    if sections[current_section]:
                        sections[current_section] += " " + line
                    else:
                        sections[current_section] = line
            
            # If parsing didn't work well, put everything in direct_response
            if not any(sections.values()):
                sections["direct_response"] = response
            
        except Exception:
            sections["direct_response"] = response
        
        return sections

# Initialize the contextual analyzer
contextual_analyzer = ContextualMultimodalAnalyzer()

# === CONTEXTUAL ANALYSIS TOOLS ===

def contextual_image_analysis(input_data: str) -> str:
    """Perform context-aware image analysis"""
    try:
        # Handle different input formats
        if isinstance(input_data, dict):
            image_path = input_data.get('image_path', '')
            user_context = input_data.get('user_context', '')
        else:
            # Assume it's the image path, get context from function attribute
            image_path = str(input_data)
            user_context = getattr(contextual_image_analysis, 'user_context', 'General analysis')
        
        result = contextual_analyzer.contextual_visual_analysis(image_path, user_context)
        
        if result.get("error"):
            return f"❌ Error: {result['error']}"
        
        if result.get("success"):
            analysis = result["contextual_analysis"]
            
            # Format response focusing on user's question
            formatted_response = f"""
🎯 CONTEXTUAL ANALYSIS RESPONSE:

USER QUESTION: {user_context}

DIRECT ANSWER: {analysis.get('direct_response', 'Analysis provided below')}

KEY OBSERVATIONS: {analysis.get('observations', 'See detailed analysis')}

CONTEXTUAL INSIGHTS: {analysis.get('contextual_insights', 'Based on visual evidence')}

RECOMMENDATIONS: {analysis.get('recommendations', 'See analysis for guidance')}

CONFIDENCE LEVEL: {analysis.get('confidence_level', 'Analysis based on visible evidence')}
"""
            return formatted_response
        
        return "Analysis could not be completed"
        
    except Exception as e:
        return f"❌ Analysis error: {str(e)}"

# === OPTIMIZED TOOLS ===

contextual_tool = Tool(
    name="ContextualImageAnalyzer",
    description="Analyze images with specific focus on user's context and questions. Provides targeted responses to user concerns.",
    func=contextual_image_analysis
)

# === CONTEXTUAL AGENT ===

contextual_agent = Agent(
    role="Contextual Visual Intelligence Specialist",
    goal="Provide precise, context-aware analysis that directly addresses user questions and concerns",
    backstory=(
        "You are a specialized analyst who excels at understanding user context and providing "
        "targeted visual analysis. You focus on what the user specifically wants to know, "
        "rather than generic descriptions. You reason through visual evidence to address "
        "specific concerns, questions, or needs expressed by the user."
    ),
    tools=[contextual_tool],
    verbose=True,
    llm=Ollama(
        model="llava:7B",  # Using text model for better reasoning
        base_url="http://localhost:11434",
        temperature=0.4,
        top_p=0.8,
        num_predict=400
    )
)

# === CONTEXTUAL TASK CREATION ===

def create_contextual_task(image_path: str, user_prompt: str):
    """Create a task focused on user's specific context"""
    return Task(
        description=f"""
        The user has asked: "{user_prompt}"
        
        Use the ContextualImageAnalyzer to analyze the image at '{image_path}' with this specific context in mind.
        
        Your job is to:
        1. Focus specifically on what the user is asking about
        2. Provide a direct answer to their question based on visual evidence
        3. Explain your reasoning using what you observe in the image
        4. Address their specific concerns or interests
        5. Be honest about what you can and cannot determine from the image
        
        DO NOT provide a generic image description. Instead, provide a targeted response that directly addresses the user's question: "{user_prompt}"
        
        Structure your response to clearly answer their specific concern.
        """,
        agent=contextual_agent,
        expected_output=f"A focused response that directly addresses: '{user_prompt}' with visual evidence and reasoning"
    )

# === MAIN CONTEXTUAL ANALYSIS FUNCTION ===

def analyze_with_context(image_path: str, user_prompt: str):
    """Main function for contextual image analysis"""
    
    if not os.path.exists(image_path):
        print(f"❌ Error: Image file '{image_path}' not found!")
        return
    
    # Set context for the tool
    contextual_image_analysis.user_context = user_prompt
    
    print(f"🎯 CONTEXTUAL ANALYSIS")
    print(f"📁 Image: {os.path.basename(image_path)}")
    print(f"❓ User Question: {user_prompt}")
    print("=" * 80)
    
    # Create contextual task
    task = create_contextual_task(image_path, user_prompt)
    
    # Create crew with contextual focus
    crew = Crew(
        agents=[contextual_agent],
        tasks=[task],
        verbose=True,
        process="sequential"
    )
    
    # Execute analysis
    start_time = time.time()
    result = crew.kickoff()
    end_time = time.time()
    
    print(f"\n⚡ CONTEXTUAL ANALYSIS COMPLETE ({end_time - start_time:.2f}s)")
    print("=" * 80)
    print(result)
    
    return result

# === USAGE EXAMPLES ===

if __name__ == "__main__":
    
    # Example 1: Health concern
    image_path = r"C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png"
    user_question = "Where is France?"
    
    print("🔍 EXAMPLE 1: Health & Wellbeing Analysis")
    result1 = analyze_with_context(image_path, user_question)
    
    # Save result
    with open("contextual_analysis_result.txt", "w", encoding="utf-8") as f:
        f.write(f"USER QUESTION: {user_question}\n")
        f.write(f"IMAGE: {image_path}\n")
        f.write("=" * 50 + "\n")
        f.write(str(result1))
    
    print(f"\n✅ Analysis saved to 'contextual_analysis_result.txt'")
    
    # More examples:
    """
    # Example 2: Safety concern
    safety_question = "Is this environment safe for a child to play in?"
    result2 = analyze_with_context(image_path, safety_question)
    
    # Example 3: Emotional state
    emotion_question = "How does the person in this image seem to be feeling emotionally?"
    result3 = analyze_with_context(image_path, emotion_question)
    
    # Example 4: Activity assessment
    activity_question = "What activity is taking place and is it appropriate for this person?"
    result4 = analyze_with_context(image_path, activity_question)
    """

🔍 EXAMPLE 1: Health & Wellbeing Analysis
🎯 CONTEXTUAL ANALYSIS
📁 Image: elderly.png
❓ User Question: Where is France?


Overriding of current TracerProvider is not allowed


[DEBUG]: Working Agent: Contextual Visual Intelligence Specialist
[INFO]: Starting Task: 
        The user has asked: "Where is France?"
        
        Use the ContextualImageAnalyzer to analyze the image at 'C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png' with this specific context in mind.
        
        Your job is to:
        1. Focus specifically on what the user is asking about
        2. Provide a direct answer to their question based on visual evidence
        3. Explain your reasoning using what you observe in the image
        4. Address their specific concerns or interests
        5. Be honest about what you can and cannot determine from the image
        
        DO NOT provide a generic image description. Instead, provide a targeted response that directly addresses the user's question: "Where is France?"
        
        Structure your response to clearly answer their specific concern.
        


[1m> Entering new CrewAgentExecutor 

In [28]:
from crewai import Agent, Task, Crew
from langchain_community.llms import Ollama
from langchain.tools import Tool
import base64
import requests
import os
import threading
from typing import Dict, Any, List, Optional
import json
import time
from datetime import datetime
from PIL import Image
from MemoryBank import MemoryBank

class DualResponseContextualAnalyzer:
    def __init__(self, 
                 ollama_base_url="http://localhost:11434",
                 memory_bank: Optional[MemoryBank] = None,
                 persist_directory: str = "./contextual_memory_storage"):
        self.ollama_base_url = ollama_base_url
        self.analysis_cache = {}
        self._lock = threading.Lock()
        
        # Initialize or use provided MemoryBank
        if memory_bank is None:
            self.memory_bank = MemoryBank(
                persist_directory=persist_directory,
                forgetting_enabled=True
            )
        else:
            self.memory_bank = memory_bank
    
    def encode_image_to_base64(self, image_path: str) -> str:
        """Encode image to base64"""
        try:
            with open(image_path, "rb") as f:
                return base64.b64encode(f.read()).decode('utf-8')
        except Exception as e:
            return None
    
    def generate_direct_answer(self, user_question: str, user_id: str = "default_user") -> str:
        """Generate a direct answer to the user's question using text model"""
        try:
            # Get memory context for personalized response
            memory_context = self.memory_bank.get_prompt_context(user_id, user_question)
            
            # Create prompt for direct answer
            direct_answer_prompt = f"""
USER QUESTION: {user_question}

USER CONTEXT (from memory):
- User Name: {memory_context['user_name']}
- Session: {memory_context['session_count']}
- User Profile: {memory_context['user_portrait']}
- Previous Interactions: {memory_context['memory_records'][:500]}...

TASK: Provide a direct, helpful answer to the user's question. Consider their background and previous interactions to personalize your response. Be concise but informative.

If this is a factual question (like "Where is France?"), provide the factual answer.
If this is a personal question, consider their history and context.
If this is a complex question, break it down clearly.

DIRECT ANSWER:"""

            # Call text model for direct answer
            response = requests.post(
                f"{self.ollama_base_url}/api/generate",
                json={
                    "model": "llama3.2:3b",  # Using text model for better factual responses
                    "prompt": direct_answer_prompt,
                    "stream": False,
                    "options": {
                        "temperature": 0.2,
                        "top_p": 0.9,
                        "num_predict": 300
                    }
                },
                timeout=60
            )
            
            if response.status_code == 200:
                direct_answer = response.json().get("response", "").strip()
                return direct_answer
            else:
                return f"I apologize, but I'm having trouble generating a response right now."
                
        except Exception as e:
            return f"Let me help you with that question, though I'm experiencing some technical difficulties: {str(e)}"
    
    def dual_contextual_analysis(self, 
                                image_path: str, 
                                user_question: str, 
                                user_id: str = "default_user") -> Dict[str, Any]:
        """Perform both direct answer and contextual visual analysis"""
        try:
            # Handle CrewAI argument formats
            if isinstance(image_path, dict):
                image_path = image_path.get('tool_input', image_path)
            
            image_path = str(image_path).strip().strip('"').strip("'")
            
            if not os.path.exists(image_path):
                return {"error": f"Image not found: {image_path}"}
            
            # STEP 1: Generate direct answer to user question
            print("🤖 Generating direct answer...")
            direct_answer = self.generate_direct_answer(user_question, user_id)
            
            # STEP 2: Get memory context for visual analysis
            memory_context = self.memory_bank.get_prompt_context(user_id, user_question)
            
            # STEP 3: Encode image for analysis
            image_b64 = self.encode_image_to_base64(image_path)
            if not image_b64:
                return {"error": "Failed to encode image"}
            
            # STEP 4: Create dual-purpose analysis prompt
            print("🖼️ Performing contextual image analysis...")
            dual_prompt = self._create_dual_analysis_prompt(user_question, direct_answer, memory_context)
            
            # STEP 5: API call for visual analysis
            response = requests.post(
                f"{self.ollama_base_url}/api/generate",
                json={
                    "model": "llava:7b",
                    "prompt": dual_prompt,
                    "images": [image_b64],
                    "stream": False,
                    "options": {
                        "temperature": 0.3,
                        "top_p": 0.8,
                        "num_predict": 1000
                    }
                },
                timeout=180
            )
            
            if response.status_code == 200:
                visual_analysis = response.json().get("response", "")
                
                # Parse the dual response
                structured_result = self._parse_dual_response(visual_analysis, user_question, direct_answer)
                
                # Store both responses in memory
                self._store_dual_analysis_in_memory(
                    user_id=user_id,
                    image_path=image_path,
                    user_question=user_question,
                    direct_answer=direct_answer,
                    visual_analysis=structured_result,
                    raw_visual_response=visual_analysis
                )
                
                return {
                    "success": True,
                    "direct_answer": direct_answer,
                    "visual_analysis": structured_result,
                    "raw_visual_response": visual_analysis,
                    "user_question": user_question,
                    "memory_context_used": True
                }
            else:
                return {
                    "success": True,
                    "direct_answer": direct_answer,
                    "visual_analysis": {"error": f"Visual analysis failed: {response.status_code}"},
                    "user_question": user_question
                }
                
        except Exception as e:
            return {"error": f"Dual analysis failed: {str(e)}"}
    
    def _create_dual_analysis_prompt(self, user_question: str, direct_answer: str, memory_context: Dict) -> str:
        """Create a prompt that handles both direct answer and visual analysis"""
        
        prompt = f"""
DUAL RESPONSE ANALYSIS - DIRECT ANSWER + VISUAL CONTEXT

USER QUESTION: {user_question}
DIRECT ANSWER PROVIDED: {direct_answer}

USER MEMORY CONTEXT:
- Current Time: {memory_context['current_datetime']}
- User: {memory_context['user_name']}
- Session: {memory_context['session_count']}
- User Profile: {memory_context['user_portrait']}
- Previous Conversations: {memory_context['memory_records']}
- Emotional History: {memory_context['emotional_image_context']}
- Important Events: {memory_context['event_summaries']}

ANALYSIS TASK:
Now that we've provided a direct answer to their question, analyze this image to provide additional context-aware insights that complement the direct answer.

Your visual analysis should:

1. VISUAL CONTEXT FOR THE QUESTION:
   - How does what you see in the image relate to their question "{user_question}"?
   - Does the image provide additional context or contradiction to the direct answer?
   - What visual elements are relevant to their inquiry?

2. MEMORY-ENHANCED OBSERVATIONS:
   - Based on their history, what aspects of this image might be particularly relevant to them?
   - How does this image connect to their previous interactions or concerns?
   - What patterns do you notice considering their background?

3. CONTEXTUAL INSIGHTS:
   - What additional information does the image provide beyond the direct answer?
   - How might the visual context change or enhance understanding of the topic?
   - What emotions, situations, or circumstances are visible that add context?

4. PERSONALIZED CONNECTIONS:
   - How might this image and question relate to their personal situation?
   - What follow-up questions or concerns might they have based on what you see?
   - How can the visual information help them better understand the direct answer?

5. INTEGRATED RECOMMENDATIONS:
   - Considering both the direct answer and visual context, what suggestions would you make?
   - How does the combination of textual answer and visual evidence guide your recommendations?

RESPONSE FORMAT:
Provide a comprehensive analysis that bridges the direct answer with visual insights, creating a complete response that addresses both their explicit question and the contextual information visible in the image.

Remember: The user already received the direct answer "{direct_answer}". Now provide visual analysis that adds depth, context, and personalized insights based on what you observe and their history.
"""
        
        return prompt
    
    def _parse_dual_response(self, visual_response: str, user_question: str, direct_answer: str) -> Dict[str, str]:
        """Parse the visual analysis response into structured sections"""
        
        sections = {
            "visual_context": "",
            "memory_observations": "",
            "contextual_insights": "",
            "personal_connections": "",
            "integrated_recommendations": "",
            "additional_notes": ""
        }
        
        try:
            lines = visual_response.split('\n')
            current_section = "visual_context"
            
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                
                # Identify section headers
                if any(keyword in line.lower() for keyword in ['visual context', 'image relation', 'visual element']):
                    current_section = "visual_context"
                elif any(keyword in line.lower() for keyword in ['memory', 'history', 'previous', 'background']):
                    current_section = "memory_observations"
                elif any(keyword in line.lower() for keyword in ['contextual insight', 'additional information', 'beyond']):
                    current_section = "contextual_insights"
                elif any(keyword in line.lower() for keyword in ['personal', 'connection', 'situation', 'individual']):
                    current_section = "personal_connections"
                elif any(keyword in line.lower() for keyword in ['recommend', 'suggest', 'integrated', 'combination']):
                    current_section = "integrated_recommendations"
                else:
                    # Add content to current section
                    if sections[current_section]:
                        sections[current_section] += " " + line
                    else:
                        sections[current_section] = line
            
            # If parsing didn't work well, put everything in visual_context
            if not any(sections.values()):
                sections["visual_context"] = visual_response
            
        except Exception:
            sections["visual_context"] = visual_response
        
        return sections
    
    def _store_dual_analysis_in_memory(self, 
                                     user_id: str, 
                                     image_path: str, 
                                     user_question: str, 
                                     direct_answer: str,
                                     visual_analysis: Dict, 
                                     raw_visual_response: str):
        """Store both direct answer and visual analysis in memory"""
        try:
            # Store the complete interaction
            full_response = f"Question: {user_question}\n"
            full_response += f"Direct Answer: {direct_answer}\n"
            full_response += f"Visual Context: {visual_analysis.get('visual_context', '')}\n"
            full_response += f"Personal Insights: {visual_analysis.get('personal_connections', '')}"
            
            self.memory_bank.add_conversation(
                user_id=user_id,
                conversation_text=full_response,
                user_input=user_question,
                bot_response=f"{direct_answer} | Visual Analysis: {visual_analysis.get('visual_context', '')}",
                metadata={
                    "analysis_type": "dual_response_contextual",
                    "has_direct_answer": True,
                    "has_visual_analysis": True,
                    "image_analyzed": True,
                    "image_path": image_path,
                    "question_type": self._classify_question_type(user_question)
                }
            )
            
            # Store emotional image if relevant
            if os.path.exists(image_path):
                try:
                    pil_image = Image.open(image_path)
                    
                    emotion_description = f"Dual analysis from {datetime.now().strftime('%Y-%m-%d %H:%M')}: "
                    emotion_description += f"User asked '{user_question}'. "
                    emotion_description += f"Direct answer: {direct_answer[:100]}... "
                    emotion_description += f"Visual context: {visual_analysis.get('visual_context', '')[:100]}..."
                    
                    self.memory_bank.add_emotional_image(
                        user_id=user_id,
                        image=pil_image,
                        emotion_description=emotion_description,
                        metadata={
                            "analysis_type": "dual_response",
                            "user_question": user_question,
                            "has_direct_answer": True
                        }
                    )
                except Exception as e:
                    print(f"Error storing emotional image: {e}")
            
        except Exception as e:
            print(f"Error storing dual analysis in memory: {e}")
    
    def _classify_question_type(self, question: str) -> str:
        """Classify the type of question asked"""
        question_lower = question.lower()
        
        if any(word in question_lower for word in ['where', 'location', 'place']):
            return "location"
        elif any(word in question_lower for word in ['what', 'define', 'explain']):
            return "factual"
        elif any(word in question_lower for word in ['how', 'why', 'when']):
            return "explanatory"
        elif any(word in question_lower for word in ['feel', 'emotion', 'mood', 'happy', 'sad']):
            return "emotional"
        elif any(word in question_lower for word in ['help', 'advice', 'suggest', 'recommend']):
            return "advisory"
        else:
            return "general"

# Initialize the dual response analyzer
def initialize_dual_analyzer_with_memory(memory_bank: Optional[MemoryBank] = None):
    """Initialize the dual response analyzer with memory integration"""
    return DualResponseContextualAnalyzer(
        ollama_base_url="http://localhost:11434",
        memory_bank=memory_bank
    )

# Global analyzer instance
dual_analyzer = None

# === DUAL RESPONSE TOOL ===

def dual_response_analysis_tool(input_data: str) -> str:
    """Perform both direct answer and contextual image analysis"""
    global dual_analyzer
    
    try:
        # Handle different input formats
        if isinstance(input_data, dict):
            image_path = input_data.get('image_path', '')
            user_question = input_data.get('user_question', '')
            user_id = input_data.get('user_id', 'default_user')
        else:
            # Get from function attributes
            image_path = str(input_data)
            user_question = getattr(dual_response_analysis_tool, 'user_question', 'General analysis')
            user_id = getattr(dual_response_analysis_tool, 'user_id', 'default_user')
        
        if dual_analyzer is None:
            return "❌ Error: Dual analyzer not initialized"
        
        result = dual_analyzer.dual_contextual_analysis(image_path, user_question, user_id)
        
        if result.get("error"):
            return f"❌ Error: {result['error']}"
        
        if result.get("success"):
            direct_answer = result["direct_answer"]
            visual_analysis = result["visual_analysis"]
            
            # Format comprehensive dual response
            formatted_response = f"""
🎯 DIRECT ANSWER:
{direct_answer}

🧠 MEMORY-ENHANCED VISUAL ANALYSIS:

USER QUESTION: {user_question}
MEMORY INTEGRATION: {'✅ Previous interactions considered' if result.get('memory_context_used') else '❌ No memory context'}

📸 VISUAL CONTEXT FOR YOUR QUESTION:
{visual_analysis.get('visual_context', 'Visual elements analyzed in relation to your question')}

🔗 MEMORY-BASED OBSERVATIONS:
{visual_analysis.get('memory_observations', 'Connections to your history and preferences identified')}

💡 CONTEXTUAL INSIGHTS:
{visual_analysis.get('contextual_insights', 'Additional context provided based on visual evidence')}

👤 PERSONAL CONNECTIONS:
{visual_analysis.get('personal_connections', 'Personalized insights based on your profile')}

🎯 INTEGRATED RECOMMENDATIONS:
{visual_analysis.get('integrated_recommendations', 'Suggestions combining your question and visual context')}

📋 ADDITIONAL NOTES:
{visual_analysis.get('additional_notes', 'Complete analysis stored in memory for future reference')}

📝 Both direct answer and visual analysis stored in memory for future conversations.
"""
            return formatted_response
        
        return "Analysis could not be completed"
        
    except Exception as e:
        return f"❌ Dual analysis error: {str(e)}"

# === DUAL RESPONSE TOOL CREATION ===

dual_response_tool = Tool(
    name="DualResponseAnalyzer",
    description="Provides both direct answers to user questions AND contextual image analysis with memory integration.",
    func=dual_response_analysis_tool
)

# === DUAL RESPONSE AGENT ===

def create_dual_response_agent():
    """Create an agent that handles both direct answers and visual analysis"""
    return Agent(
        role="Dual Response Visual Intelligence Specialist",
        goal="Provide comprehensive responses that include both direct answers to user questions and memory-enhanced contextual visual analysis",
        backstory=(
            "You are an advanced AI assistant that excels at providing complete, helpful responses. "
            "When users ask questions alongside images, you provide both a direct answer to their question "
            "AND a contextual analysis of their image that considers their personal history and context. "
            "You understand that users want their explicit questions answered while also benefiting from "
            "visual insights that enhance their understanding. You combine factual knowledge with "
            "personalized visual analysis to create comprehensive, useful responses."
        ),
        tools=[dual_response_tool],
        verbose=True,
        llm=Ollama(
            model="llava:7b",  # Using multimodal model for visual analysis
            base_url="http://localhost:11434",
            temperature=0.4,
            top_p=0.8,
            num_predict=600
        )
    )

# === DUAL RESPONSE TASK CREATION ===

def create_dual_response_task(image_path: str, user_question: str, user_id: str = "default_user"):
    """Create a task that handles both direct answers and visual analysis"""
    return Task(
        description=f"""
        The user ({user_id}) has asked: "{user_question}"
        
        Use the DualResponseAnalyzer to provide a comprehensive response that includes:
        
        1. DIRECT ANSWER: A clear, direct answer to their specific question "{user_question}"
        2. VISUAL ANALYSIS: A contextual analysis of the image at '{image_path}' that:
           - Considers their personal history and memory context
           - Relates the visual content to their question
           - Provides additional insights beyond the direct answer
           - Offers personalized recommendations
        
        Your response should be complete and helpful, addressing both their explicit question 
        and providing valuable visual insights that enhance their understanding.
        
        Both the direct answer and visual analysis will be stored in memory for future reference.
        """,
        agent=create_dual_response_agent(),
        expected_output=f"A comprehensive dual response providing both a direct answer to '{user_question}' and memory-enhanced contextual visual analysis"
    )

# === MAIN DUAL ANALYSIS FUNCTION ===

def analyze_with_dual_response(image_path: str, 
                              user_question: str, 
                              user_id: str = "default_user",
                              memory_bank: Optional[MemoryBank] = None):
    """Main function for dual response analysis (direct answer + visual analysis)"""
    
    global dual_analyzer
    
    if not os.path.exists(image_path):
        print(f"❌ Error: Image file '{image_path}' not found!")
        return
    
    # Initialize analyzer with memory if not already done
    if dual_analyzer is None:
        dual_analyzer = initialize_dual_analyzer_with_memory(memory_bank)
    
    # Set context for the tool
    dual_response_analysis_tool.user_question = user_question
    dual_response_analysis_tool.user_id = user_id
    
    print(f"🎯 DUAL RESPONSE ANALYSIS")
    print(f"👤 User: {user_id}")
    print(f"📁 Image: {os.path.basename(image_path)}")
    print(f"❓ Question: {user_question}")
    print("=" * 80)
    
    # Create dual response task
    task = create_dual_response_task(image_path, user_question, user_id)
    
    # Create crew with dual response capability
    crew = Crew(
        agents=[create_dual_response_agent()],
        tasks=[task],
        verbose=True,
        process="sequential"
    )
    
    # Execute analysis
    start_time = time.time()
    result = crew.kickoff()
    end_time = time.time()
    
    print(f"\n⚡ DUAL RESPONSE ANALYSIS COMPLETE ({end_time - start_time:.2f}s)")
    print("=" * 80)
    print(result)
    
    return result

# === USAGE EXAMPLES ===

if __name__ == "__main__":
    
    # Initialize memory bank
    memory_bank = MemoryBank(
        persist_directory="./dual_response_memory_storage",
        forgetting_enabled=True
    )
    
    # Example 1: Geographic question with image
    image_path = r"C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png"
    user_question = "Where is France?"
    user_id = "student_001"
    
    print("🌍 EXAMPLE 1: Geographic Question + Visual Context")
    result1 = analyze_with_dual_response(
        image_path=image_path,
        user_question=user_question,
        user_id=user_id,
        memory_bank=memory_bank
    )
    
    # Example 2: Emotional question with image
    user_question2 = "How does the person in this image seem to be feeling emotionally?"
    
    print("\n" + "="*80)
    print("😊 EXAMPLE 2: Emotional Question + Visual Analysis")
    result2 = analyze_with_dual_response(
        image_path=image_path,
        user_question=user_question2,
        user_id=user_id,
        memory_bank=memory_bank
    )
    
    # Save results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"dual_response_analysis_{timestamp}.txt"
    
    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"DUAL RESPONSE ANALYSIS RESULTS\n")
        f.write(f"User ID: {user_id}\n")
        f.write(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write("=" * 60 + "\n\n")
        f.write("EXAMPLE 1 - Geographic Question:\n")
        f.write(f"Question: {user_question}\n")
        f.write(str(result1))
        f.write("\n\n" + "="*60 + "\n\n")
        f.write("EXAMPLE 2 - Emotional Question:\n")
        f.write(f"Question: {user_question2}\n")
        f.write(str(result2))
    
    print(f"\n✅ Dual response analysis saved to '{filename}'")
    print("\n🎯 SUMMARY: The system now provides both direct answers and contextual visual analysis!")

Overriding of current TracerProvider is not allowed


🌍 EXAMPLE 1: Geographic Question + Visual Context
🎯 DUAL RESPONSE ANALYSIS
👤 User: student_001
📁 Image: elderly.png
❓ Question: Where is France?
[DEBUG]: Working Agent: Dual Response Visual Intelligence Specialist
[INFO]: Starting Task: 
        The user (student_001) has asked: "Where is France?"
        
        Use the DualResponseAnalyzer to provide a comprehensive response that includes:
        
        1. DIRECT ANSWER: A clear, direct answer to their specific question "Where is France?"
        2. VISUAL ANALYSIS: A contextual analysis of the image at 'C:\Users\drodm\OneDrive\Documents\GitHub\Dolores-AI\Dolores-AI\uploads\elderly.png' that:
           - Considers their personal history and memory context
           - Relates the visual content to their question
           - Provides additional insights beyond the direct answer
           - Offers personalized recommendations
        
        Your response should be complete and helpful, addressing both their explicit question 


Overriding of current TracerProvider is not allowed



[1m> Finished chain.[0m
[DEBUG]: [Dual Response Visual Intelligence Specialist] Task output: France is a country located in Western Europe. It is bordered by the Atlantic Ocean, the English Channel, Belgium, Germany, Luxembourg, Switzerland, Italy, Spain, Andorra, and Monaco. The capital of France is Paris. In the image you provided, there are no visual cues that directly relate to France or its geographical location. However, I can provide additional insights based on your personal history and memory context. For example, if you have a particular interest in French culture, history, or language, I could offer recommendations for learning more about these topics. Additionally, if you have any specific memories or experiences related to France, I could help you connect those memories to the broader context of the country.



⚡ DUAL RESPONSE ANALYSIS COMPLETE (16.54s)
France is a country located in Western Europe. It is bordered by the Atlantic Ocean, the English Channel, Belgium, Ger