In [38]:
from pydantic_ai import Agent
from pydantic_ai.models.anthropic import AnthropicModel
import requests
from pydantic import BaseModel, Field
import json
from dotenv import load_dotenv
import nest_asyncio # type: ignore

In [39]:
# Setup

load_dotenv()
nest_asyncio.apply() # type: ignore

In [27]:
# Pydantic models for FHIR metadata

class SearchParameter(BaseModel):
    """FHIR search parameter definition"""
    name: str
    type: str | None  # Can sometimes be None if special type
    documentation: str | None = None

class ResourceMetadata(BaseModel):
    """Metadata for a single FHIR resource type"""
    type: str
    profile: str | None = None
    interactions: list[str]  # ['read', 'search-type', 'create', etc.]
    search_params: list[SearchParameter]

class FHIRMetadata(BaseModel):
    """Complete FHIR server metadata response"""
    searchable_types: list[str] = Field(
        description="List of resource types that support search-type interaction"
    )
    resource_metadata: dict[str, ResourceMetadata] = Field(
        description="Full metadata for each resource type, keyed by type name"
    )
    fhir_version: str | None = None
    server_url: str

In [28]:
def fetch_searchable_resources(base_url: str = "https://r4.smarthealthit.org") -> FHIRMetadata:
    """
    Fetch searchable resource types from FHIR server metadata.

    Based on reference code that queries /metadata endpoint and filters
    resources with 'search-type' interaction capability.

    Args:
        base_url: FHIR server base URL (default: SMART Health IT R4 server)

    Returns:
        FHIRMetadata: Pydantic model containing searchable types and full resource metadata

    Raises:
        requests.RequestException: If network request fails
        ValueError: If response is invalid or missing required fields
    """
    # Query metadata endpoint
    metadata_url = f"{base_url}/metadata"

    try:
        response = requests.get(metadata_url, timeout=10)
        response.raise_for_status()
        capability_statement = response.json()
    except requests.RequestException as e:
        raise requests.RequestException(f"Failed to fetch metadata from {metadata_url}: {e}")

    # Validate response structure
    if not capability_statement.get('rest') or len(capability_statement['rest']) == 0:
        raise ValueError("Invalid CapabilityStatement: missing 'rest' array")

    if not capability_statement['rest'][0].get('resource'):
        raise ValueError("Invalid CapabilityStatement: missing 'resource' array in rest[0]")

    # Extract searchable resources (matching reference code logic)
    searchable_types: list[str] = []
    resource_metadata: dict[str, ResourceMetadata] = {}

    for resource in capability_statement['rest'][0]['resource']:
        resource_type = resource.get('type')
        if not resource_type:
            continue

        # Check if resource supports search-type interaction (reference line 74-79)
        interactions = resource.get('interaction', [])
        interaction_codes = [interaction.get('code') for interaction in interactions]

        if 'search-type' in interaction_codes:
            searchable_types.append(resource_type)

            # Parse search parameters and sort alphabetically (reference line 81-90)
            search_params: list[SearchParameter] = []
            for param in resource.get('searchParam', []):
                search_params.append(SearchParameter(
                    name=param.get('name'),
                    type=param.get('type'),
                    documentation=param.get('documentation')
                ))

            # Sort by name (matching reference code)
            search_params.sort(key=lambda p: p.name)

            # Create ResourceMetadata object
            resource_metadata[resource_type] = ResourceMetadata(
                type=resource_type,
                profile=resource.get('profile'),
                interactions=interaction_codes,
                search_params=search_params
            )

    return FHIRMetadata(
        searchable_types=searchable_types,
        resource_metadata=resource_metadata,
        fhir_version=capability_statement.get('fhirVersion'),
        server_url=base_url
    )

In [32]:
# Test the function
metadata = fetch_searchable_resources()

print(f"FHIR Version: {metadata.fhir_version}")
print(f"Server URL: {metadata.server_url}")
print(f"\nFound {len(metadata.searchable_types)} searchable resource types:")
print(metadata.searchable_types[:10])  # Show first 10

# Examine a specific resource
if 'Patient' in metadata.resource_metadata:
    patient_meta = metadata.resource_metadata['Patient']
    print(f"\nPatient resource:")
    print(f"  Interactions: {patient_meta.interactions}")
    print(f"  Number of search parameters: {len(patient_meta.search_params)}")
    print(f"  First 5 search parameters: {[p.name for p in patient_meta.search_params[:5]]}")

FHIR Version: 4.0.0
Server URL: https://r4.smarthealthit.org

Found 146 searchable resource types:
['Account', 'ActivityDefinition', 'AdverseEvent', 'AllergyIntolerance', 'Appointment', 'AppointmentResponse', 'AuditEvent', 'Basic', 'Binary', 'BiologicallyDerivedProduct']

Patient resource:
  Interactions: ['read', 'vread', 'update', 'patch', 'delete', 'history-instance', 'history-type', 'create', 'search-type']
  Number of search parameters: 25
  First 5 search parameters: ['_id', '_language', 'active', 'address', 'address-city']


In [47]:
# Fetch available parameters

def get_search_parameters(
    resource_type: str,
    metadata: FHIRMetadata
) -> list[SearchParameter]:
    """
    Get available search parameters for a specific FHIR resource type.

    Args:
        resource_type: The FHIR resource type name (e.g., 'Patient', 'Observation')
        metadata: FHIRMetadata object containing cached resource metadata

    Returns:
        List of SearchParameter objects for the given resource type

    Raises:
        ValueError: If resource_type is not found in metadata
    """
    if resource_type not in metadata.resource_metadata:
        available_types = ', '.join(sorted(metadata.searchable_types)[:10])
        raise ValueError(
            f"Resource type '{resource_type}' not found in metadata. "
            f"Available types include: {available_types}..."
        )

    return metadata.resource_metadata[resource_type].search_params

In [48]:
# Test get_search_parameters function

print("=" * 60)
print("Testing get_search_parameters() utility function")
print("=" * 60)

# Test 1: Get parameters for Patient resource
print("\n1. Getting search parameters for Patient:")
patient_params = get_search_parameters("Patient", metadata)
print(f"   Patient has {len(patient_params)} search parameters")
print(f"   First 10 parameters:")
for param in patient_params[:10]:
    doc = f" - {param.documentation[:50]}..." if param.documentation else ""
    print(f"     - {param.name} ({param.type}){doc}")

# Test 2: Get parameters for Observation resource  
print("\n2. Getting search parameters for Observation:")
obs_params = get_search_parameters("Observation", metadata)
print(f"   Observation has {len(obs_params)} search parameters")
print(f"   First 10 parameters:")
for param in obs_params[:10]:
    doc = f" - {param.documentation[:50]}..." if param.documentation else ""
    print(f"     - {param.name} ({param.type}){doc}")

# Test 3: Error handling - invalid type
print("\n3. Testing error handling with invalid type:")
try:
    invalid_params = get_search_parameters("InvalidType", metadata)
except ValueError as e:
    print(f"   ✓ Caught expected error: {e}")

Testing get_search_parameters() utility function

1. Getting search parameters for Patient:
   Patient has 25 search parameters
   First 10 parameters:
     - _id (token) - The ID of the resource...
     - _language (string) - The language of the resource...
     - active (token) - Whether the patient record is active...
     - address (string) - A server defined search that may match any of the ...
     - address-city (string) - A city specified in an address...
     - address-country (string) - A country specified in an address...
     - address-postalcode (string) - A postalCode specified in an address...
     - address-state (string) - A state specified in an address...
     - address-use (token) - A use code specified in an address...
     - birthdate (date) - The patient's date of birth...

2. Getting search parameters for Observation:
   Observation has 40 search parameters
   First 10 parameters:
     - _id (token) - The ID of the resource...
     - _language (string) - The lan

In [30]:
# Pydantic models for Select Types Agent output

class SelectedResourceType(BaseModel):
    """A single selected FHIR resource type with confidence and reasoning"""
    selected_type: str = Field(
        description="The selected resource type name"
    )
    confidence: float = Field(
        ge=0.0,
        le=1.0,
        description="Confidence score (0.0-1.0) for this selection"
    )
    reasoning: str = Field(
        description="Explanation of why this type was selected"
    )

class SelectTypeError(BaseModel):
    """Error result when type selection fails"""
    error: str = Field(
        description="Error message describing what went wrong"
    )
    reasoning: str = Field(
        description="Explanation of why the selection failed"
    )

In [None]:
class SelectTypesAgent:
    """Agent for selecting FHIR resource types from natural language queries"""

    def __init__(self, metadata: FHIRMetadata):
        """
        Initialize the agent with FHIR server metadata.

        Args:
            metadata: FHIRMetadata object containing available searchable types
        """
        self.metadata = metadata
        self.model = AnthropicModel('claude-opus-4-5')

        # Create agent that returns a list of SelectedResourceType or SelectTypeError
        self.agent = Agent(
            model=self.model,
            output_type=list[SelectedResourceType] | SelectTypeError,
            system_prompt=self._build_system_prompt()
        )

    def select_types(self, query: str) -> list[SelectedResourceType] | SelectTypeError:
        """
        Analyze query and return selected resource types.

        Args:
            query: Natural language query from user

        Returns:
            List of SelectedResourceType (with individual confidence/reasoning) 
            or SelectTypeError if selection fails
        """
        result = self.agent.run_sync(query)
        return result.output

    def _build_system_prompt(self) -> str:
        """Build dynamic system prompt with available types from metadata"""
        types_list = "\n".join(sorted(self.metadata.searchable_types))

        return f"""You are a FHIR resource type selector. Analyze user queries and select the appropriate FHIR resource type(s).

Available searchable resource types ({len(self.metadata.searchable_types)} total):
{types_list}

Your task:
1. Analyze the user's query to understand what data they want
2. Select the most appropriate resource type(s) from the available list above
3. Return a list of SelectedResourceType objects, each with:
   - selected_type: the resource type name
   - confidence: your confidence score for this specific type (0.0-1.0)
   - reasoning: why this specific type was selected
4. Order results by relevance (most relevant first)

Confidence scoring guidelines (per type):
- 0.9-1.0: Exact type name mentioned or very clear semantic match
- 0.7-0.9: Clear semantic match with good context
- 0.5-0.7: Reasonable match but some ambiguity
- 0.3-0.5: Multiple valid options, this is one possibility
- 0.0-0.3: Very uncertain, weak match

Common mappings:
- "patients", "patient demographics", "people" → Patient
- "vital signs", "blood pressure", "lab results", "observations" → Observation
- "medications", "prescriptions", "drugs" → Medication, MedicationRequest
- "encounters", "visits", "appointments" → Encounter  
- "procedures", "surgeries", "operations" → Procedure
- "conditions", "diagnoses", "problems", "diseases" → Condition
- "allergies" → AllergyIntolerance
- "immunizations", "vaccinations" → Immunization

Error handling:
- If requested type doesn't exist in available list: return a SelectTypeError with error message and reasoning
- If type exists but query is ambiguous: return multiple SelectedResourceType objects, each with their own confidence
- If query is too vague: return most likely types with lower confidence scores

IMPORTANT: 
- Only select types from the available list above
- Each SelectedResourceType in your list should have its own reasoning explaining why THAT specific type matches
- For ambiguous queries, return multiple types with individual confidence scores
- If NO valid types can be found, return a single SelectTypeError

Examples:
- "Find patients" → [SelectedResourceType(selected_type="Patient", confidence=0.95, reasoning="Direct match...")]
- "Get medication data" → [
    SelectedResourceType(selected_type="Medication", confidence=0.7, reasoning="Could be medication definitions..."),
    SelectedResourceType(selected_type="MedicationRequest", confidence=0.8, reasoning="Most likely prescription orders..."),
  ]
- "Find XYZ" → [SelectTypeError(error="Type 'XYZ' not found", reasoning="...")]"""

In [35]:
# Initialize the SelectTypesAgent
select_agent = SelectTypesAgent(metadata)

print("SelectTypesAgent initialized successfully!")
print(f"Agent has access to {len(metadata.searchable_types)} searchable resource types")

SelectTypesAgent initialized successfully!
Agent has access to 146 searchable resource types


In [None]:
# Test 1: Clear, specific query
print("=" * 60)
print("TEST 1: Clear query - 'Find all patients born after 1990'")
print("=" * 60)
results = select_agent.select_types("Find all patients born after 1990")
for i, result in enumerate(results, 1):
    if isinstance(result, SelectedResourceType):
        print(f"\n{i}. Selected Type: {result.selected_type}")
        print(f"   Confidence: {result.confidence:.2f}")
        print(f"   Reasoning: {result.reasoning}")
if isinstance(results, SelectTypeError):
    print(f"\n{i}. ERROR: {results.error}")
    print(f"   Reasoning: {results.reasoning}")
print()

TEST 1: Clear query - 'Find all patients born after 1990'

1. Selected Type: Patient
   Confidence: 0.95
   Reasoning: The query explicitly asks for 'patients' which directly maps to the Patient FHIR resource type. The birth date filter (born after 1990) is a standard search parameter available on the Patient resource (birthdate), confirming this is the correct resource type.



In [None]:
# Test 2: Ambiguous query (multiple matching types)
print("=" * 60)
print("TEST 2: Ambiguous query - 'Get medication data'")
print("=" * 60)
results = select_agent.select_types("Get medication data")
for i, result in enumerate(results, 1):
    if isinstance(result, SelectedResourceType):
        print(f"\n{i}. Selected Type: {result.selected_type}")
        print(f"   Confidence: {result.confidence:.2f}")
        print(f"   Reasoning: {result.reasoning}")
if isinstance(results, SelectTypeError):
    print(f"\n{i}. ERROR: {results.error}")
    print(f"   Reasoning: {results.reasoning}")
print()

TEST 2: Ambiguous query - 'Get medication data'

1. Selected Type: MedicationRequest
   Confidence: 0.80
   Reasoning: Most likely what users mean by 'medication data' - represents prescription orders and medication requests for patients, which is the most commonly queried medication-related resource

2. Selected Type: MedicationStatement
   Confidence: 0.75
   Reasoning: Records of medications being taken by a patient, including self-reported medications - another common interpretation of 'medication data'

3. Selected Type: Medication
   Confidence: 0.70
   Reasoning: The base medication resource containing drug definitions and information - could be what's needed if looking for medication catalog/reference data

4. Selected Type: MedicationAdministration
   Confidence: 0.60
   Reasoning: Records of actual medication administration events - relevant if looking for data about when medications were given to patients

5. Selected Type: MedicationDispense
   Confidence: 0.55
   Reasoning

In [None]:
# Test 3: Semantic query (blood pressure -> Observation)
print("=" * 60)
print("TEST 3: Semantic query - 'Show me blood pressure readings'")
print("=" * 60)
results = select_agent.select_types("Show me blood pressure readings")
for i, result in enumerate(results, 1):
    if isinstance(result, SelectedResourceType):
        print(f"\n{i}. Selected Type: {result.selected_type}")
        print(f"   Confidence: {result.confidence:.2f}")
        print(f"   Reasoning: {result.reasoning}")
if isinstance(results, SelectTypeError):
    print(f"\n{i}. ERROR: {results.error}")
    print(f"   Reasoning: {results.reasoning}")
print()

TEST 3: Semantic query - 'Show me blood pressure readings'

1. Selected Type: DiagnosticReport
   Confidence: 0.95
   Reasoning: The user explicitly requested 'diagnostic reports', which directly maps to the DiagnosticReport FHIR resource type. DiagnosticReport resources contain findings and interpretations of diagnostic tests performed on patients, and they have a 'subject' reference field that links to the Patient. The filter by subject's name (Larson) would be applied through a chained search parameter on the subject reference.



In [None]:
# Test 4: Non-existent type
print("=" * 60)
print("TEST 4: Non-existent type - 'Find XYZ records'")
print("=" * 60)
results = select_agent.select_types("Find XYZ records")
for i, result in enumerate(results, 1):
    if isinstance(result, SelectedResourceType):
        print(f"\n{i}. Selected Type: {result.selected_type}")
        print(f"   Confidence: {result.confidence:.2f}")
        print(f"   Reasoning: {result.reasoning}")
if isinstance(results, SelectTypeError):
    print(f"\n{i}. ERROR: {results.error}")
    print(f"   Reasoning: {results.reasoning}")
print()

TEST 4: Non-existent type - 'Find XYZ records'

2. ERROR: Type 'XYZ' not found in available FHIR resource types
   Reasoning: The user requested 'XYZ records', but 'XYZ' does not match any of the 146 available FHIR resource types. There is no semantic mapping or close match for 'XYZ' in the available list. The term does not correspond to any known healthcare data concept that could be mapped to a standard FHIR resource type.

