In [None]:
# Custom Workflows - Cookbook Example 06

This notebook demonstrates how to build custom analysis workflows using WebSearcher agents for specialized research scenarios and domain-specific requirements.

## 🎯 What You'll Learn

- Building specialized analysis pipelines
- Custom scoring and prioritization algorithms
- Multi-stage workflow orchestration
- Domain-specific prompt combinations
- Workflow templates for research teams
- Decision trees and conditional logic
- Results aggregation and reporting

## 🔬 Custom Workflow Benefits

1. **Specialization**: Tailored analysis for specific research domains
2. **Flexibility**: Adaptable workflows for different use cases
3. **Automation**: End-to-end analysis pipelines
4. **Consistency**: Standardized research methodologies
5. **Scalability**: Reusable templates for research teams

Let's build powerful custom research workflows! 🚀


In [None]:
# Setup for custom workflows
import sys
import os
sys.path.insert(0, os.path.abspath('../../'))

# Initialize prompt system
import apps.research_prioritization.prompts.prompt_registry
from agents import WebSearcher

# Custom workflow imports
from dataclasses import dataclass, field
from typing import Dict, List, Any, Optional, Callable, Union
from enum import Enum
import json
import time

# Workflow configuration
WORKFLOW_CONFIG = {
    "reasoning": {"effort": "medium"},
    "max_output_tokens": 4000
}

print("🔬 Custom Workflows System Ready!")
print(f"💻 Configuration: {WORKFLOW_CONFIG}")
print(f"🚀 Ready for specialized research workflows")


In [None]:
# Custom workflow framework for research analysis

class WorkflowStage(Enum):
    """Workflow stage definitions"""
    INITIAL_SCREENING = "initial_screening"
    DETAILED_ANALYSIS = "detailed_analysis"
    CROSS_VALIDATION = "cross_validation"
    FINAL_SCORING = "final_scoring"
    REPORTING = "reporting"

@dataclass
class WorkflowStep:
    """Individual step in a workflow"""
    name: str
    stage: WorkflowStage
    searcher_alias: str
    required: bool = True
    depends_on: List[str] = field(default_factory=list)
    scoring_weight: float = 1.0
    timeout_seconds: int = 30

@dataclass
class WorkflowResult:
    """Result from workflow execution"""
    disease_name: str
    orphacode: str
    step_results: Dict[str, Any] = field(default_factory=dict)
    stage_scores: Dict[str, float] = field(default_factory=dict)
    final_score: float = 0.0
    workflow_status: str = "pending"
    execution_time: float = 0.0
    errors: List[str] = field(default_factory=list)

class CustomWorkflow:
    """Flexible workflow engine for research analysis"""
    
    def __init__(self, name: str, client_config: dict):
        self.name = name
        self.client_config = client_config
        self.steps: List[WorkflowStep] = []
        self.searchers: Dict[str, WebSearcher] = {}
        self.scoring_rules: List[Callable] = []
        
    def add_step(self, step: WorkflowStep):
        """Add a step to the workflow"""
        self.steps.append(step)
        
        # Initialize searcher if not already done
        if step.searcher_alias not in self.searchers:
            self.searchers[step.searcher_alias] = WebSearcher(
                step.searcher_alias, self.client_config
            )
    
    def add_scoring_rule(self, rule: Callable[[WorkflowResult], float]):
        """Add custom scoring rule"""
        self.scoring_rules.append(rule)
    
    def execute(self, orphacode: str, disease_name: str) -> WorkflowResult:
        """Execute the complete workflow"""
        start_time = time.time()
        result = WorkflowResult(disease_name=disease_name, orphacode=orphacode)
        template_data = {"orphacode": orphacode, "disease_name": disease_name}
        
        print(f"🔬 Executing {self.name} workflow for {disease_name}")
        
        # Group steps by stage
        stages = self._group_steps_by_stage()\n        \n        # Execute each stage\n        for stage, steps in stages.items():\n            print(f\"  📊 Stage: {stage.value}\")\n            stage_start = time.time()\n            stage_results = {}\n            \n            for step in steps:\n                if self._can_execute_step(step, result.step_results):\n                    try:\n                        print(f\"    ⚙️  Executing: {step.name}\")\n                        step_result = self.searchers[step.searcher_alias].search(template_data)\n                        result.step_results[step.name] = step_result\n                        stage_results[step.name] = step_result\n                        print(f\"    ✅ Completed: {step.name}\")\n                    except Exception as e:\n                        error_msg = f\"Step {step.name} failed: {str(e)}\"\n                        result.errors.append(error_msg)\n                        print(f\"    ❌ Failed: {step.name} - {str(e)[:50]}...\")\n                        \n                        if step.required:\n                            result.workflow_status = \"failed\"\n                            return result\n                else:\n                    if step.required:\n                        result.errors.append(f\"Required step {step.name} cannot execute (missing dependencies)\")\n                        result.workflow_status = \"failed\"\n                        return result\n            \n            # Calculate stage score\n            stage_score = self._calculate_stage_score(stage, stage_results)\n            result.stage_scores[stage.value] = stage_score\n            stage_time = time.time() - stage_start\n            print(f\"  🎯 Stage score: {stage_score:.2f} (took {stage_time:.1f}s)\")\n        \n        # Calculate final score\n        result.final_score = self._calculate_final_score(result)\n        result.execution_time = time.time() - start_time\n        result.workflow_status = \"completed\"\n        \n        print(f\"🎊 Workflow completed! Final score: {result.final_score:.2f}\")\n        return result\n    \n    def _group_steps_by_stage(self) -> Dict[WorkflowStage, List[WorkflowStep]]:\n        \"\"\"Group workflow steps by stage\"\"\"\n        stages = {}\n        for step in self.steps:\n            if step.stage not in stages:\n                stages[step.stage] = []\n            stages[step.stage].append(step)\n        return stages\n    \n    def _can_execute_step(self, step: WorkflowStep, completed_steps: Dict[str, Any]) -> bool:\n        \"\"\"Check if step dependencies are satisfied\"\"\"\n        for dependency in step.depends_on:\n            if dependency not in completed_steps:\n                return False\n        return True\n    \n    def _calculate_stage_score(self, stage: WorkflowStage, stage_results: Dict[str, Any]) -> float:\n        \"\"\"Calculate score for a stage based on results\"\"\"\n        if not stage_results:\n            return 0.0\n        \n        total_score = 0.0\n        total_weight = 0.0\n        \n        for step_name, result in stage_results.items():\n            step = next((s for s in self.steps if s.name == step_name), None)\n            if step and result:\n                # Extract score based on result type\n                if hasattr(result, 'score'):\n                    step_score = float(result.score)\n                elif hasattr(result, 'groups') and result.groups:\n                    step_score = min(len(result.groups) * 2, 10)  # Max 10\n                else:\n                    step_score = 5.0  # Default score\n                \n                total_score += step_score * step.scoring_weight\n                total_weight += step.scoring_weight\n        \n        return total_score / total_weight if total_weight > 0 else 0.0\n    \n    def _calculate_final_score(self, result: WorkflowResult) -> float:\n        \"\"\"Calculate final workflow score\"\"\"\n        if not result.stage_scores:\n            return 0.0\n        \n        # Start with average of stage scores\n        base_score = sum(result.stage_scores.values()) / len(result.stage_scores)\n        \n        # Apply custom scoring rules\n        for rule in self.scoring_rules:\n            try:\n                adjustment = rule(result)\n                base_score += adjustment\n            except Exception as e:\n                result.errors.append(f\"Scoring rule failed: {str(e)}\")\n        \n        return min(max(base_score, 0.0), 10.0)  # Clamp to 0-10 range\n\n# Example workflow templates\nclass WorkflowTemplates:\n    \"\"\"Pre-built workflow templates for common scenarios\"\"\"\n    \n    @staticmethod\n    def comprehensive_rare_disease_analysis(client_config: dict) -> CustomWorkflow:\n        \"\"\"Comprehensive analysis workflow for rare diseases\"\"\"\n        workflow = CustomWorkflow(\"Comprehensive Rare Disease Analysis\", client_config)\n        \n        # Initial screening stage\n        workflow.add_step(WorkflowStep(\n            name=\"socioeconomic_screening\",\n            stage=WorkflowStage.INITIAL_SCREENING,\n            searcher_alias=\"socioeconomic_v1\",\n            scoring_weight=1.0\n        ))\n        \n        # Detailed analysis stage\n        workflow.add_step(WorkflowStep(\n            name=\"detailed_socioeconomic\",\n            stage=WorkflowStage.DETAILED_ANALYSIS,\n            searcher_alias=\"socioeconomic_v2\",\n            depends_on=[\"socioeconomic_screening\"],\n            scoring_weight=2.0\n        ))\n        \n        workflow.add_step(WorkflowStep(\n            name=\"research_groups_analysis\",\n            stage=WorkflowStage.DETAILED_ANALYSIS,\n            searcher_alias=\"groups_v2\",\n            scoring_weight=1.5\n        ))\n        \n        # Add scoring rules\n        def high_impact_bonus(result: WorkflowResult) -> float:\n            \"\"\"Bonus for high-impact diseases\"\"\"\n            if \"detailed_socioeconomic\" in result.step_results:\n                socio = result.step_results[\"detailed_socioeconomic\"]\n                if hasattr(socio, 'score') and int(socio.score) >= 8:\n                    return 1.0\n            return 0.0\n        \n        def research_capacity_bonus(result: WorkflowResult) -> float:\n            \"\"\"Bonus for existing research capacity\"\"\"\n            if \"research_groups_analysis\" in result.step_results:\n                groups = result.step_results[\"research_groups_analysis\"]\n                if hasattr(groups, 'groups') and groups.groups and len(groups.groups) >= 2:\n                    return 0.5\n            return 0.0\n        \n        workflow.add_scoring_rule(high_impact_bonus)\n        workflow.add_scoring_rule(research_capacity_bonus)\n        \n        return workflow\n    \n    @staticmethod\n    def quick_screening_workflow(client_config: dict) -> CustomWorkflow:\n        \"\"\"Fast screening workflow for initial assessment\"\"\"\n        workflow = CustomWorkflow(\"Quick Screening\", client_config)\n        \n        workflow.add_step(WorkflowStep(\n            name=\"fast_socioeconomic\",\n            stage=WorkflowStage.INITIAL_SCREENING,\n            searcher_alias=\"socioeconomic_v1\",\n            scoring_weight=1.0\n        ))\n        \n        workflow.add_step(WorkflowStep(\n            name=\"groups_check\",\n            stage=WorkflowStage.INITIAL_SCREENING,\n            searcher_alias=\"groups_v1\",\n            scoring_weight=0.5\n        ))\n        \n        return workflow\n\nprint(\"🏗️ Custom workflow framework initialized!\")\nprint(\"✅ Features: Multi-stage execution, Dependency management, Custom scoring\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🧪 Example: Comprehensive Rare Disease Workflow\n",
    "\n",
    "Let's demonstrate a comprehensive workflow for rare disease analysis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create and execute comprehensive workflow\n",
    "comprehensive_workflow = WorkflowTemplates.comprehensive_rare_disease_analysis(WORKFLOW_CONFIG)\n",
    "\n",
    "print(\"🔬 COMPREHENSIVE RARE DISEASE WORKFLOW\")\n",
    "print(\"=\" * 45)\n",
    "\n",
    "# Execute workflow for Wilson disease\n",
    "wilson_result = comprehensive_workflow.execute(\"905\", \"Wilson disease\")\n",
    "\n",
    "print(f\"\\n📋 WORKFLOW RESULTS SUMMARY\")\n",
    "print(\"=\" * 30)\n",
    "print(f\"🏷️  Disease: {wilson_result.disease_name}\")\n",
    "print(f\"🔢 Orphacode: {wilson_result.orphacode}\")\n",
    "print(f\"✅ Status: {wilson_result.workflow_status}\")\n",
    "print(f\"🎯 Final Score: {wilson_result.final_score:.2f}/10\")\n",
    "print(f\"⏱️  Execution Time: {wilson_result.execution_time:.1f}s\")\n",
    "\n",
    "if wilson_result.stage_scores:\n",
    "    print(f\"\\n📊 STAGE SCORES:\")\n",
    "    for stage, score in wilson_result.stage_scores.items():\n",
    "        print(f\"   {stage.replace('_', ' ').title()}: {score:.2f}/10\")\n",
    "\n",
    "if wilson_result.step_results:\n",
    "    print(f\"\\n🔍 STEP RESULTS:\")\n",
    "    for step_name, result in wilson_result.step_results.items():\n",
    "        if hasattr(result, 'score'):\n",
    "            print(f\"   {step_name}: Score {result.score}\")\n",
    "        elif hasattr(result, 'groups') and result.groups:\n",
    "            print(f\"   {step_name}: {len(result.groups)} groups found\")\n",
    "        else:\n",
    "            print(f\"   {step_name}: Completed\")\n",
    "\n",
    "if wilson_result.errors:\n",
    "    print(f\"\\n⚠️  ERRORS:\")\n",
    "    for error in wilson_result.errors:\n",
    "        print(f\"   • {error}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ⚡ Example: Quick Screening Workflow\n",
    "\n",
    "Let's also demonstrate a fast screening workflow for rapid assessment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create and execute quick screening workflow\n",
    "quick_workflow = WorkflowTemplates.quick_screening_workflow(WORKFLOW_CONFIG)\n",
    "\n",
    "print(\"⚡ QUICK SCREENING WORKFLOW\")\n",
    "print(\"=\" * 30)\n",
    "\n",
    "# Test with multiple diseases for comparison\n",
    "test_diseases = [\n",
    "    (\"399\", \"Huntington disease\"),\n",
    "    (\"98\", \"Alpers syndrome\")\n",
    "]\n",
    "\n",
    "screening_results = []\n",
    "\n",
    "for orphacode, disease_name in test_diseases:\n",
    "    print(f\"\\n🧬 Screening {disease_name}...\")\n",
    "    result = quick_workflow.execute(orphacode, disease_name)\n",
    "    screening_results.append(result)\n",
    "    \n",
    "    print(f\"   ✅ Score: {result.final_score:.2f}/10\")\n",
    "    print(f\"   ⏱️  Time: {result.execution_time:.1f}s\")\n",
    "\n",
    "# Compare all results\n",
    "print(f\"\\n📊 SCREENING COMPARISON\")\n",
    "print(\"=\" * 25)\n",
    "\n",
    "all_results = [wilson_result] + screening_results\n",
    "all_results.sort(key=lambda x: x.final_score, reverse=True)\n",
    "\n",
    "print(f\"{'Rank':<4} {'Disease':<25} {'Score':<8} {'Status':<10} {'Time(s)':<8}\")\n",
    "print(\"-\" * 60)\n",
    "\n",
    "for i, result in enumerate(all_results, 1):\n",
    "    status = \"✅\" if result.workflow_status == \"completed\" else \"❌\"\n",
    "    print(f\"{i:<4} {result.disease_name[:23]:<25} {result.final_score:<8.2f} {status:<10} {result.execution_time:<8.1f}\")\n",
    "\n",
    "print(f\"\\n🏆 Top Priority: {all_results[0].disease_name} (Score: {all_results[0].final_score:.2f})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📚 Key Learnings & Best Practices\n",
    "\n",
    "### ✅ What We Accomplished\n",
    "\n",
    "1. **Flexible Workflow Engine**: Multi-stage, dependency-aware execution\n",
    "2. **Custom Scoring Rules**: Domain-specific prioritization algorithms\n",
    "3. **Template System**: Reusable workflows for common scenarios\n",
    "4. **Error Resilience**: Graceful handling of step failures\n",
    "5. **Performance Tracking**: Detailed execution metrics\n",
    "6. **Comparative Analysis**: Multi-disease workflow execution\n",
    "\n",
    "### 🎯 Workflow Design Patterns\n",
    "\n",
    "- **Staged Execution**: Logical grouping of related analysis steps\n",
    "- **Dependency Management**: Steps execute only when prerequisites are met\n",
    "- **Weighted Scoring**: Different steps contribute differently to final score\n",
    "- **Custom Rules**: Domain expertise encoded in scoring algorithms\n",
    "- **Error Isolation**: Failed steps don't break entire workflow\n",
    "\n",
    "### 🔬 Research Applications\n",
    "\n",
    "- **Drug Development**: Multi-criteria assessment workflows\n",
    "- **Clinical Trials**: Patient population and feasibility analysis\n",
    "- **Research Prioritization**: Evidence-based funding decisions\n",
    "- **Regulatory Submissions**: Comprehensive evidence packages\n",
    "- **Academic Research**: Standardized analysis methodologies\n",
    "\n",
    "### 🚀 Next Steps\n",
    "\n",
    "- **Notebook 07**: Monitoring and debugging for production deployments\n",
    "- **Workflow Repository**: Build library of domain-specific templates\n",
    "- **Advanced Orchestration**: Conditional branching and parallel execution\n",
    "- **Integration**: Connect with research management systems\n",
    "\n",
    "The custom workflow framework enables sophisticated research analysis pipelines! 🎊"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
