In [None]:
# Single Disease Analysis - Cookbook Example 02

This notebook demonstrates comprehensive analysis of a single disease using multiple WebSearcher agents for different analysis types.

## 🎯 What You'll Learn

- How to combine multiple analysis types (socioeconomic + groups)
- Results synthesis and comparison techniques
- Building comprehensive disease profiles
- Error handling in multi-step workflows
- Priority assessment based on multiple criteria

## 🏥 Comprehensive Analysis Process

1. **Socioeconomic Impact Assessment** - Cost and burden analysis
2. **Research Groups Identification** - CIBERER groups analysis  
3. **Results Synthesis** - Combined insights and priority scoring
4. **Comparative Analysis** - Multiple diseases comparison

Let's build a complete disease analysis pipeline! 🚀


In [None]:
# Setup and imports for single disease analysis
import sys
import os
sys.path.insert(0, os.path.abspath('../../'))

# Initialize prompt system
import apps.research_prioritization.prompts.prompt_registry
from agents import WebSearcher
from dataclasses import dataclass
from typing import Dict, Any, List

# Common client configuration
CLIENT_CONFIG = {
    "reasoning": {"effort": "medium"},
    "max_output_tokens": 5000
}

print("📊 Single Disease Analysis System Ready!")
print(f"💻 Client Configuration: {CLIENT_CONFIG}")
print(f"🔗 Available Tools: WebSearcher for comprehensive analysis")


In [None]:
# Create analysis framework with helper structures
@dataclass
class DiseaseAnalysis:
    """Container for comprehensive disease analysis results"""
    orphacode: str
    disease_name: str
    socioeconomic_result: Any = None
    groups_result: Any = None
    synthesis: Dict[str, Any] = None
    error_log: List[str] = None
    
    def __post_init__(self):
        if self.error_log is None:
            self.error_log = []
    
    def add_error(self, component: str, error: str):
        self.error_log.append(f"{component}: {error}")
    
    def is_complete(self) -> bool:
        return self.socioeconomic_result is not None and self.groups_result is not None
    
    def get_priority_score(self) -> int:
        """Calculate priority score from 1-10 based on available data"""
        if not self.is_complete():
            return 0
        
        # Base score from socioeconomic impact
        base_score = int(self.socioeconomic_result.score)
        
        # Boost if research groups identified
        if self.groups_result and self.groups_result.groups:
            base_score += 2  # Existing research capacity boost
        
        return min(base_score, 10)

class DiseaseAnalyzer:
    """Comprehensive disease analysis using multiple WebSearcher agents"""
    
    def __init__(self, client_config: dict):
        self.client_config = client_config
        self.socio_searcher = WebSearcher("socioeconomic_v2", client_config)
        self.groups_searcher = WebSearcher("groups_v2", client_config)
        
    def analyze_disease(self, orphacode: str, disease_name: str) -> DiseaseAnalysis:
        """Run complete analysis for a single disease"""
        analysis = DiseaseAnalysis(orphacode, disease_name)
        template_data = {"orphacode": orphacode, "disease_name": disease_name}
        
        print(f"🔍 Starting comprehensive analysis for {disease_name} (Orphacode: {orphacode})")
        
        # Socioeconomic analysis
        try:
            print("  📊 Running socioeconomic impact analysis...")
            analysis.socioeconomic_result = self.socio_searcher.search(template_data)
            print(f"  ✅ Socioeconomic analysis complete - Score: {analysis.socioeconomic_result.score}")
        except Exception as e:
            analysis.add_error("socioeconomic", str(e))
            print(f"  ❌ Socioeconomic analysis failed: {e}")
        
        # Research groups analysis
        try:
            print("  👥 Running research groups analysis...")
            analysis.groups_result = self.groups_searcher.search(template_data)
            group_count = len(analysis.groups_result.groups) if analysis.groups_result.groups else 0
            print(f"  ✅ Groups analysis complete - Found {group_count} relevant groups")
        except Exception as e:
            analysis.add_error("groups", str(e))
            print(f"  ❌ Groups analysis failed: {e}")
        
        # Generate synthesis
        analysis.synthesis = self._synthesize_results(analysis)
        
        print(f"🎯 Analysis complete - Priority Score: {analysis.get_priority_score()}/10")
        return analysis
    
    def _synthesize_results(self, analysis: DiseaseAnalysis) -> Dict[str, Any]:
        """Create synthesis of all analysis components"""
        synthesis = {
            "completion_status": "complete" if analysis.is_complete() else "partial",
            "priority_score": analysis.get_priority_score(),
            "key_insights": [],
            "recommendations": []
        }
        
        # Socioeconomic insights
        if analysis.socioeconomic_result:
            study_count = len(analysis.socioeconomic_result.socioeconomic_impact_studies)
            synthesis["key_insights"].append(f"Found {study_count} socioeconomic studies")
            synthesis["key_insights"].append(f"Evidence level: {analysis.socioeconomic_result.evidence_level}")
        
        # Research groups insights
        if analysis.groups_result and analysis.groups_result.groups:
            group_count = len(analysis.groups_result.groups)
            synthesis["key_insights"].append(f"Identified {group_count} relevant CIBERER research groups")
            synthesis["recommendations"].append("Strong research infrastructure available")
        else:
            synthesis["recommendations"].append("Consider developing research partnerships")
        
        return synthesis

# Initialize analyzer
analyzer = DiseaseAnalyzer(CLIENT_CONFIG)
print("🏗️ Disease analyzer initialized with dual WebSearcher setup!")


In [None]:
## 🧬 Example 1: Wilson Disease - Well-Studied Rare Disease

Let's analyze Wilson disease, which should have good socioeconomic data and potentially relevant research groups.


In [None]:
# Comprehensive Wilson disease analysis
wilson_analysis = analyzer.analyze_disease("905", "Wilson disease")

print("\n📋 COMPREHENSIVE ANALYSIS REPORT")
print("=" * 50)
print(f"🏷️  Disease: {wilson_analysis.disease_name}")
print(f"🔢 Orphacode: {wilson_analysis.orphacode}")
print(f"✅ Complete: {wilson_analysis.is_complete()}")
print(f"🎯 Priority Score: {wilson_analysis.get_priority_score()}/10")

# Show synthesis
if wilson_analysis.synthesis:
    print(f"\n🔬 KEY INSIGHTS:")
    for insight in wilson_analysis.synthesis["key_insights"]:
        print(f"  • {insight}")
    
    print(f"\n💡 RECOMMENDATIONS:")
    for rec in wilson_analysis.synthesis["recommendations"]:
        print(f"  • {rec}")

# Show errors if any
if wilson_analysis.error_log:
    print(f"\n⚠️  ERRORS ENCOUNTERED:")
    for error in wilson_analysis.error_log:
        print(f"  • {error}")


In [None]:
# Detailed results inspection (optional)
print("🔍 DETAILED SOCIOECONOMIC RESULTS:")
if wilson_analysis.socioeconomic_result:
    print(f"  Score: {wilson_analysis.socioeconomic_result.score}")
    print(f"  Evidence: {wilson_analysis.socioeconomic_result.evidence_level}")
    print(f"  Studies Found: {len(wilson_analysis.socioeconomic_result.socioeconomic_impact_studies)}")
    
    if wilson_analysis.socioeconomic_result.socioeconomic_impact_studies:
        print("  📊 Study Examples:")
        for i, study in enumerate(wilson_analysis.socioeconomic_result.socioeconomic_impact_studies[:2]):
            print(f"    {i+1}. {study.measure}: {study.cost} ({study.country}, {study.year})")

print("\n👥 DETAILED GROUPS RESULTS:")
if wilson_analysis.groups_result:
    group_count = len(wilson_analysis.groups_result.groups) if wilson_analysis.groups_result.groups else 0
    print(f"  Groups Found: {group_count}")
    
    if wilson_analysis.groups_result.groups:
        print("  🏥 Research Groups:")
        for i, group in enumerate(wilson_analysis.groups_result.groups[:3]):
            print(f"    {i+1}. {group}")
else:
    print("  No groups analysis available")


In [None]:
## 🧬 Example 2: Huntington Disease - Comparative Analysis

Let's analyze another disease to demonstrate comparative assessment and batch processing capabilities.


In [None]:
# Analyze Huntington disease for comparison
huntington_analysis = analyzer.analyze_disease("399", "Huntington disease")

print("\n📊 COMPARATIVE ANALYSIS")
print("=" * 50)

diseases = [
    ("Wilson Disease", wilson_analysis),
    ("Huntington Disease", huntington_analysis)
]

print(f"{'Disease':<20} {'Priority':<10} {'Complete':<10} {'Socio Score':<12} {'Groups Found':<12}")
print("-" * 70)

for name, analysis in diseases:
    complete = "✅ Yes" if analysis.is_complete() else "❌ No"
    priority = f"{analysis.get_priority_score()}/10"
    
    socio_score = "N/A"
    if analysis.socioeconomic_result:
        socio_score = str(analysis.socioeconomic_result.score)
    
    groups_count = "0"
    if analysis.groups_result and analysis.groups_result.groups:
        groups_count = str(len(analysis.groups_result.groups))
    
    print(f"{name:<20} {priority:<10} {complete:<10} {socio_score:<12} {groups_count:<12}")

print("\n🎯 PRIORITIZATION INSIGHTS:")
if wilson_analysis.get_priority_score() > huntington_analysis.get_priority_score():
    print(f"  Wilson Disease has higher priority ({wilson_analysis.get_priority_score()}) vs Huntington ({huntington_analysis.get_priority_score()})")
elif huntington_analysis.get_priority_score() > wilson_analysis.get_priority_score():
    print(f"  Huntington Disease has higher priority ({huntington_analysis.get_priority_score()}) vs Wilson ({wilson_analysis.get_priority_score()})")
else:
    print(f"  Both diseases have equal priority scores ({wilson_analysis.get_priority_score()})")

# Show any differences in analysis completion
if wilson_analysis.is_complete() != huntington_analysis.is_complete():
    print("  ⚠️  Analysis completion differs between diseases - review error logs")


In [None]:
## 📚 Key Learnings & Best Practices

### ✅ What We Accomplished

1. **Multi-Agent Analysis**: Combined socioeconomic and groups analysis
2. **Error Resilience**: Handled failures gracefully with partial results  
3. **Results Synthesis**: Created unified insights from multiple data sources
4. **Priority Scoring**: Automated prioritization based on multiple criteria
5. **Comparative Analysis**: Side-by-side disease comparison capabilities

### 🎯 Best Practices Demonstrated

- **Comprehensive Framework**: Use structured data classes for complex workflows
- **Error Handling**: Always capture and report component-level failures
- **Results Synthesis**: Combine insights from multiple analysis types
- **Priority Metrics**: Develop scoring systems for decision support
- **Comparative Analysis**: Build frameworks for multiple entity assessment

### 🚀 Next Steps

- **Notebook 03**: Learn batch processing for multiple diseases
- **Notebook 04**: Master error handling and recovery strategies
- **Notebook 05**: Explore performance optimization techniques
- **Production Use**: Apply these patterns in your research workflows

The comprehensive analysis framework provides a robust foundation for research prioritization! 🎊
