diff --git a/README.md b/README.md index 26d704d4..9ef165b1 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,30 @@ if __name__ == "__main__": uvicorn.run(app, port=8000) ``` +### Testing with Sandbox [[Docs](https://dotimplement.github.io/HealthChain/reference/utilities/sandbox)] + +```python +from healthchain.sandbox import SandboxClient + +# Test CDS Hooks service with synthetic data +client = SandboxClient( + url="http://localhost:8000/cds/cds-services/discharge-summary", + workflow="encounter-discharge" +) + +# Load from test datasets +client.load_from_registry( + "synthea-patient", + data_dir="./data/synthea", + resource_types=["Condition", "DocumentReference"], + sample_size=5 +) + +# Send requests and save results +responses = client.send_requests() +client.save_results("./output/") +``` + ## Road Map - [ ] ๐Ÿ” Data provenance and audit trails tracking diff --git a/docs/cookbook/clinical_coding.md b/docs/cookbook/clinical_coding.md index cd4bf482..46b34a05 100644 --- a/docs/cookbook/clinical_coding.md +++ b/docs/cookbook/clinical_coding.md @@ -231,6 +231,10 @@ client = SandboxClient( # Load sample CDA document client.load_from_path("./data/notereader_cda.xml") + +# Inspect CDA document before sending +# for request in client.requests: +# print(request.document[:1000]) # View first 1000 chars of CDA XML ``` ## Run the Complete Example diff --git a/docs/cookbook/discharge_summarizer.md b/docs/cookbook/discharge_summarizer.md index 1af15122..897fd010 100644 --- a/docs/cookbook/discharge_summarizer.md +++ b/docs/cookbook/discharge_summarizer.md @@ -168,6 +168,10 @@ client.load_free_text( csv_path="data/discharge_notes.csv", column_name="text" ) + +# Inspect requests before sending to verify data +# for request in client.requests: +# print(request.prefetch.get('document')) # Get DocumentReference ``` !!! tip "Learn More About Test Data Generation" diff --git a/docs/quickstart.md b/docs/quickstart.md index be40bbb1..fe722882 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -1,14 +1,18 @@ # Quickstart After [installing HealthChain](installation.md), get up to speed quickly with the core components before diving further into the [full documentation](reference/index.md)! +HealthChain has three main components: -HealthChain provides three core tools for healthcare AI integration: **Gateway** for connecting to multiple healthcare systems, **Pipelines** for FHIR-native AI workflows, and **InteropEngine** for healthcare data format conversion between FHIR, CDA, and HL7v2. +- **Gateway:** Connect to multiple healthcare systems with a single API. +- **Pipelines:** Easily build data processing pipelines for both clinical text and [FHIR](https://www.hl7.org/fhir/) data. +- **InteropEngine:** Seamlessly convert between data formats like [FHIR](https://www.hl7.org/fhir/), [HL7 CDA](https://www.hl7.org/implement/standards/product_brief.cfm?product_id=7), and [HL7v2](https://www.hl7.org/implement/standards/product_brief.cfm?product_id=185). -## Core Components -### HealthChainAPI Gateway ๐Ÿ”Œ +## Core Components ๐Ÿงฉ -The HealthChainAPI provides a unified interface for connecting your AI models to multiple healthcare systems through a single API. Handle FHIR, CDS Hooks, and SOAP/CDA protocols with OAuth2 authentication. +### Gateway ๐Ÿ”Œ + +The [**HealthChainAPI**](./reference/gateway/api.md) provides a unified interface for connecting your AI application and models to multiple healthcare systems through a single API. It automatically handles [FHIR API](https://www.hl7.org/fhir/http.html), [CDS Hooks](https://cds-hooks.org/), and [SOAP/CDA protocols](https://www.hl7.org/implement/standards/product_brief.cfm?product_id=7) with [OAuth2 authentication](https://oauth.net/2/). [(Full Documentation on Gateway)](./reference/gateway/gateway.md) @@ -41,51 +45,52 @@ app.register_gateway(fhir) ### Pipeline ๐Ÿ› ๏ธ -HealthChain Pipelines provide a flexible way to build and manage processing pipelines for NLP and ML tasks that can easily integrate with electronic health record (EHR) systems. +HealthChain [**Pipelines**](./reference/pipeline/pipeline.md) provide a flexible way to build and manage processing pipelines for NLP and ML tasks that can easily integrate with electronic health record (EHR) systems. You can build pipelines with three different approaches: -#### 1. Build Your Own Pipeline with Inline Functions +#### 1. Quick Inline Functions + +For quick experiments, start by picking the right [**Container**](./reference/pipeline/data_container.md) when you initialize your pipeline (e.g. `Pipeline[Document]()` for clinical text). -This is the most flexible approach, ideal for quick experiments and prototyping. Initialize a pipeline type hinted with the container type you want to process, then add components to your pipeline with the `@add_node` decorator. +Containers make your pipeline FHIR-native by loading and transforming your data (free text, EHR resources, etc.) into structured FHIR-ready formats. Just add your processing functions with `@add_node`, compile with `.build()`, and your pipeline is ready to process FHIR data end-to-end. -Compile the pipeline with `.build()` to use it. +[(Full Documentation on Container)](./reference/pipeline/data_container.md) ```python from healthchain.pipeline import Pipeline from healthchain.io import Document +from healthchain.fhir import create_condition -nlp_pipeline = Pipeline[Document]() +pipeline = Pipeline[Document]() -@nlp_pipeline.add_node -def tokenize(doc: Document) -> Document: - doc.tokens = doc.text.split() - return doc +@pipeline.add_node +def extract_diabetes(doc: Document) -> Document: + """Adds a FHIR Condition for diabetes if mentioned in the text.""" + if "diabetes" in doc.text.lower(): + condition = create_condition( + code="73211009", + display="Diabetes mellitus", + ) + doc.fhir.problem_list.append(condition) -@nlp_pipeline.add_node -def pos_tag(doc: Document) -> Document: - doc.pos_tags = ["NOUN" if token[0].isupper() else "VERB" for token in doc.tokens] return doc -nlp = nlp_pipeline.build() - -doc = Document("Patient has a fracture of the left femur.") -doc = nlp(doc) +pipe = pipeline.build() -print(doc.tokens) -print(doc.pos_tags) +doc = Document("Patient has a history of diabetes.") +doc = pipe(doc) -# ['Patient', 'has', 'fracture', 'of', 'left', 'femur.'] -# ['NOUN', 'VERB', 'VERB', 'VERB', 'VERB', 'VERB'] +print(doc.fhir.problem_list) # FHIR Condition ``` -#### 2. Build Your Own Pipeline with Components, Models, and Connectors +#### 2. Build With Components and Adapters -Components are stateful - they're classes instead of functions. They can be useful for grouping related processing steps together, setting configurations, or wrapping specific model loading steps. +[**Components**](./reference/) are reusable, stateful classes that encapsulate specific processing logic, model loading, or configuration for your pipeline. Use them to organize complex workflows, handle model state, or integrate third-party libraries with minimal setup. -HealthChain comes with a few pre-built components, but you can also easily add your own. You can find more details on the [Components](./reference/pipeline/components/components.md) and [Integrations](./reference/pipeline/integrations/integrations.md) documentation pages. +HealthChain provides a set of ready-to-use [**NLP Integrations**](./reference/pipeline/integrations/integrations.md) for common clinical NLP and ML tasks, and you can easily implement your own. -Add components to your pipeline with the `.add_node()` method and compile with `.build()`. +[(Full Documentation on Components)](./reference/pipeline/components/components.md) ```python from healthchain.pipeline import Pipeline @@ -104,18 +109,14 @@ doc = Document("Patient presents with hypertension.") output = pipe(doc) ``` -Let's go one step further! You can use [Adapters](./reference/pipeline/adapters/adapters.md) to work directly with [CDA](https://www.hl7.org.uk/standards/hl7-standards/cda-clinical-document-architecture/) and [FHIR](https://hl7.org/fhir/) data received from healthcare system APIs. Adapters handle format conversion while keeping your pipeline pure ML processing. +You can process legacy healthcare data formats too. [**Adapters**](./reference/pipeline/adapters/adapters.md) convert between healthcare formats like [CDA](https://www.hl7.org/implement/standards/product_brief.cfm?product_id=7) and your pipeline โ€” just parse, process, and format without worrying about low-level data conversion. + +[(Full Documentation on Adapters)](./reference/pipeline/adapters/adapters.md) ```python -from healthchain.pipeline import Pipeline -from healthchain.pipeline.components import SpacyNLP from healthchain.io import CdaAdapter from healthchain.models import CdaRequest -pipeline = Pipeline() -pipeline.add_node(SpacyNLP.from_model_id("en_core_sci_sm")) -pipe = pipeline.build() - # Use adapter for format conversion adapter = CdaAdapter() cda_request = CdaRequest(document="") @@ -128,21 +129,14 @@ output = adapter.format(processed_doc) #### 3. Use Prebuilt Pipelines -Prebuilt pipelines are pre-configured collections of Components and Models optimized for specific healthcare AI use cases. They offer the highest level of abstraction and are the easiest way to get started. +Prebuilt pipelines are the fastest way to jump into healthcare AI with minimal setup: just load and run. Each pipeline bundles best-practice components and models for common clinical tasks (like coding or summarization) and handles all FHIR/CDA conversion for you. Easily customize or extend pipelines by adding/removing components, or swap models as needed. -For a full list of available prebuilt pipelines and details on how to configure and customize them, see the [Pipelines](./reference/pipeline/pipeline.md) documentation page. +[(Full Documentation on Pipelines)](./reference/pipeline/pipeline.md#prebuilt-) ```python from healthchain.pipeline import MedicalCodingPipeline from healthchain.models import CdaRequest -# Load from pre-built chain -chain = ChatPromptTemplate.from_template("Summarize: {text}") | ChatOpenAI() -pipeline = MedicalCodingPipeline.load(chain, source="langchain") - -# Or load from model ID -pipeline = MedicalCodingPipeline.from_model_id("facebook/bart-large-cnn", source="huggingface") - # Or load from local model pipeline = MedicalCodingPipeline.from_local_model("./path/to/model", source="spacy") @@ -152,7 +146,7 @@ output = pipeline.process_request(cda_request) ### Interoperability ๐Ÿ”„ -The HealthChain Interoperability module provides tools for converting between different healthcare data formats, including HL7 FHIR, HL7 CDA, and HL7v2 messages. +The HealthChain Interoperability module provides tools for converting between different healthcare data formats, including FHIR, CDA, and HL7v2 messages. [(Full Documentation on Interoperability Engine)](./reference/interop/interop.md) @@ -176,34 +170,88 @@ cda_document = engine.from_fhir(fhir_resources, dest_format=FormatType.CDA) ## Utilities โš™๏ธ -### Sandbox Testing +### Sandbox Client ๐Ÿงช -Test your AI applications in realistic healthcare contexts with `SandboxClient` for CDS Hooks and clinical documentation workflows. +Use [**SandboxClient**](./reference/utilities/sandbox.md) to quickly test your app against real-world EHR scenarios like CDS Hooks or Clinical Documentation Improvement (CDI) workflows. Load test datasets, send requests to your service, and validate responses in a few lines of code. [(Full Documentation on Sandbox)](./reference/utilities/sandbox.md) +#### Workflows + +A [**workflow**](./reference/utilities/sandbox.md#workflow-protocol-compatibility) represents a specific event in an EHR system that triggers your service (e.g., `patient-view` when opening a patient chart, `encounter-discharge` when discharging a patient). + +Workflows determine the request structure, required FHIR resources, and validation rules. Different workflows are compatible with different protocols: + +| Workflow Type | Protocol | Example Workflows | +|-------------------------------------|------------|--------------------------------------------------------| +| **CDS Hooks** | REST | `patient-view`, `order-select`, `order-sign`, `encounter-discharge` | +| **Clinical Documentation** | SOAP | `sign-note-inpatient`, `sign-note-outpatient` | + + +#### Available Dataset Loaders + +[**Dataset Loaders**](./reference/utilities/sandbox.md#dataset-loaders) are shortcuts for loading common clinical test datasets from file. Currently available: + +| Dataset Key | Description | FHIR Version | Source | Download Link | +|--------------------|---------------------------------------------|--------------|-----------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------| +| `mimic-on-fhir` | **MIMIC-IV on FHIR Demo Dataset** | R4 | [PhysioNet Project](https://physionet.org/content/mimic-iv-fhir-demo/2.1.0/) | [Download ZIP](https://physionet.org/content/mimic-iv-fhir-demo/get-zip/2.1.0/) (49.5 MB) | +| `synthea-patient` | **Synthea FHIR Patient Records** | R4 | [Synthea Downloads](https://synthea.mitre.org/downloads) | [Download ZIP](https://arc.net/l/quote/hoquexhy) (100 Sample, 36 MB) | + + +```python +from healthchain.sandbox import list_available_datasets + +# See all registered datasets with descriptions +datasets = list_available_datasets() +print(datasets) +``` + +#### Basic Usage + ```python from healthchain.sandbox import SandboxClient -# Create client with service URL and workflow +# Initialize client with your service URL and workflow client = SandboxClient( - url="http://localhost:8000/cds/cds-services/my-service", + url="http://localhost:8000/cds/encounter-discharge", workflow="encounter-discharge" ) -# Load from datasets or files +# Load test data from a registered dataset client.load_from_registry( "synthea-patient", data_dir="./data/synthea", resource_types=["Condition", "DocumentReference"], sample_size=3 ) + +# Optionally inspect before sending +client.preview_requests() # See what will be sent +client.get_status() # Check client state + +# Send requests to your service responses = client.send_requests() ``` -### FHIR Helpers +For clinical documentation workflows using SOAP/CDA: + +```python +# Use context manager for automatic result saving +with SandboxClient( + url="http://localhost:8000/notereader/ProcessDocument", + workflow="sign-note-inpatient", + protocol="soap" +) as client: + client.load_from_path("./cookbook/data/notereader_cda.xml") + responses = client.send_requests() + # Results automatically saved to ./output/ on success +``` + +### FHIR Helpers ๐Ÿ”ฅ -The `fhir` module provides a set of helper functions for working with FHIR resources. +Use `healthchain.fhir` helpers to quickly create and manipulate FHIR resources (like `Condition`, `Observation`, etc.) in your code, ensuring theyโ€™re standards-compliant with minimal boilerplate. + +[(Full Documentation on FHIR Helpers)](./reference/utilities/fhir_helpers.md) ```python from healthchain.fhir import create_condition @@ -217,37 +265,5 @@ condition = create_condition( ) ``` -[(Full Documentation on FHIR Helpers)](./reference/utilities/fhir_helpers.md) - -### Data Generator - -You can use the data generator to generate synthetic FHIR data for testing. - -The `CdsDataGenerator` generates synthetic [FHIR](https://hl7.org/fhir/) data as [Pydantic](https://docs.pydantic.dev/) models suitable for different CDS workflows. Use it standalone or with `SandboxClient.load_free_text()` to include text-based data. - -[(Full Documentation on Data Generators)](./reference/utilities/data_generator.md) - -```python -from healthchain.sandbox.generators import CdsDataGenerator -from healthchain.sandbox.workflows import Workflow - -# Initialize data generator -data_generator = CdsDataGenerator() - -# Generate FHIR resources for specific workflow -data_generator.set_workflow(Workflow.encounter_discharge) -data = data_generator.generate_prefetch() - -print(data.model_dump()) - -# { -# "prefetch": { -# "encounter": { -# "resourceType": ... -# } -# } -# } -``` - ## Going further โœจ Check out our [Cookbook](cookbook/index.md) section for more worked examples! HealthChain is still in its early stages, so if you have any questions please feel free to reach us on [Github](https://github.com/dotimplement/HealthChain/discussions) or [Discord](https://discord.gg/UQC6uAepUz). diff --git a/docs/reference/pipeline/integrations/integrations.md b/docs/reference/pipeline/integrations/integrations.md index 390fe7d7..950d4376 100644 --- a/docs/reference/pipeline/integrations/integrations.md +++ b/docs/reference/pipeline/integrations/integrations.md @@ -1,4 +1,4 @@ -# HealthChain Integrations +# NLP Integrations This document provides an overview of the integration components available in the HealthChain package. These components allow you to easily incorporate popular NLP libraries into your HealthChain pipelines. diff --git a/docs/reference/pipeline/pipeline.md b/docs/reference/pipeline/pipeline.md index 1b7195ad..5fc99da9 100644 --- a/docs/reference/pipeline/pipeline.md +++ b/docs/reference/pipeline/pipeline.md @@ -1,12 +1,15 @@ # Pipeline -HealthChain pipelines enable FHIR-native workflows that integrate directly with EHR systems. Pipelines handle the complexities of healthcare data standards like [CDA (Clinical Document Architecture)](https://www.hl7.org.uk/standards/hl7-standards/cda-clinical-document-architecture/) and [FHIR (Fast Healthcare Interoperability Resources)](https://build.fhir.org/), allowing you to focus on building AI models while maintaining production-ready interoperability. +HealthChain pipelines help you quickly build data processing workflows that integrate seamlessly with EHR systems. They support healthcare formats like [FHIR](https://build.fhir.org/) out of the box and include built-in NLP to process free-text and structured clinical dataโ€”so you can focus on AI, not integration hassles. -You can either use prebuilt pipelines optimized for common clinical workflows, or build custom pipelines from scratch for specialized use cases. +Choose from prebuilt pipelines tailored to standard clinical workflows, or build custom pipelines for your own applications. Both approaches ensure production-ready interoperability and make it easy to adapt pipelines for any healthcare use case. ## Prebuilt ๐Ÿ“ฆ -HealthChain comes with a set of prebuilt pipelines that are out-of-the-box implementations of common healthcare data processing tasks: +HealthChain comes with a set of end-to-end pipeline implementations of common healthcare data processing tasks. + +These prebuilt pipelines handle FHIR conversion, validation, and EHR integration for you. They work out-of-the-box with [**Adapters**](./adapters/adapters.md) and [**Gateways**](../gateway/gateway.md), supporting CDS Hooks, NoteReader CDI, and FHIR APIs. They're great for a quick setup to build more complex integrations on top of. + | Pipeline | Container | Use Case | Description | Example Application | |----------|-----------|----------|-------------|---------------------| @@ -15,75 +18,71 @@ HealthChain comes with a set of prebuilt pipelines that are out-of-the-box imple -Prebuilt pipelines are production-ready workflows that automatically handle FHIR conversion, validation, and formatting. They integrate seamlessly with EHR systems through [adapters](./adapters/adapters.md) and [gateways](../gateway/gateway.md), supporting standards like CDS Hooks and FHIR REST APIs. - -Load your models from Hugging Face, local files, or pipeline objects: +When you load your data into a prebuilt pipeline, it receives and returns request and response data ready to send to EHR integration points: ```python from healthchain.pipeline import MedicalCodingPipeline from healthchain.models import CdaRequest -#ย Load from Hugging Face -pipeline = MedicalCodingPipeline.from_model_id( - 'blaze999/Medical-NER', task="token-classification", source="huggingface" -) -# Load from local model files -pipeline = MedicalCodingPipeline.from_local_model( - '/path/to/model', source="spacy" -) # Load from a pipeline object pipeline = MedicalCodingPipeline.load(pipeline_object) # Simple end-to-end processing cda_request = CdaRequest(document="") cda_response = pipeline.process_request(cda_request) - -# Or manual adapter control for more granular control -from healthchain.io import CdaAdapter -adapter = CdaAdapter() -doc = adapter.parse(cda_request) -doc = pipeline(doc) -# Access: doc.fhir.problem_list, doc.fhir.medication_list -response = adapter.format(doc) ``` ### Customizing Prebuilt Pipelines -To customize a prebuilt pipeline, you can use the [pipeline management methods](#pipeline-management) to add, remove, and replace components. For example, you may want to change the model being used. [TODO] +To customize a prebuilt pipeline, you can use the [pipeline management](#pipeline-management) methods to add, remove, and replace components. If you need more control and don't mind writing more code, you can subclass `BasePipeline` and implement your own pipeline logic. [(BasePipeline API Reference)](../../api/pipeline.md#healthchain.pipeline.base.BasePipeline) -## Integrations +## NLP Integrations + +HealthChain integrates directly with popular NLP libraries like spaCy, HuggingFace Transformers, and LangChain. Easily add advanced NLP models and components into your pipelines to power state-of-the-art healthcare AI workflows. + +[(Full Documentation on NLP Integrations)](./integrations/integrations.md) -HealthChain offers powerful integrations with popular NLP libraries, enhancing its capabilities and allowing you to build more sophisticated pipelines. These integrations include components for spaCy, Hugging Face Transformers, and LangChain, enabling you to leverage state-of-the-art NLP models and techniques within your HealthChain workflows. +```python +from healthchain.pipeline import MedicalCodingPipeline -Integrations are covered in detail on the [Integrations](./integrations/integrations.md) homepage. +#ย Load from Hugging Face +pipeline = MedicalCodingPipeline.from_model_id( + 'blaze999/Medical-NER', task="token-classification", source="huggingface" +) +# Load from local model files +pipeline = MedicalCodingPipeline.from_local_model( + '/path/to/model', source="spacy" +) +``` ## Freestyle ๐Ÿ•บ -To build your own pipeline, you can start with an empty pipeline and add components to it. Initialize your pipeline with the appropriate container type, such as `Document` or `Tabular`. This is not essential, but it allows the pipeline to enforce type safety (If you don't specify the container type, it will be inferred from the first component added.) +[**Containers**](./data_container.md) are at the core of HealthChain pipelines: they define your data type and flow through each pipeline step, just like spaCyโ€™s `Doc`. -You can see the full list of available containers at the [Container](./data_container.md) page. +Specify the container (e.g. `Document` or `Tabular`) when creating your pipeline (`Pipeline[Document]()`). Each node processes and returns the container, enabling smooth, type-safe, modular workflows and direct FHIR conversion. ```python from healthchain.pipeline import Pipeline from healthchain.io.containers import Document pipeline = Pipeline[Document]() - -# Or if you live dangerously -# pipeline = Pipeline() ``` To use a built pipeline, compile it by running `.build()`. This will return a compiled pipeline that you can run on your data. ```python +# Compile the pipeline to create a callable object pipe = pipeline.build() + +# Create a Document with your clinical text and run it through the pipeline doc = pipe(Document("Patient is diagnosed with diabetes")) -print(doc.entities) +# Print the extracted problem list items +print(doc.fhir.problem_list) ``` ### Adding Nodes @@ -181,26 +180,6 @@ pipeline.add_node(linker) [(BaseComponent API Reference)](../../api/component.md#healthchain.pipeline.components.base.BaseComponent) -### Working with Healthcare Data Formats ๐Ÿ”„ - -Adapters convert between healthcare formats (CDA, FHIR, CDS Hooks) and HealthChain's internal Document objects, enabling clean separation between ML processing and format handling. This allows your pipeline to work with any healthcare data source while maintaining FHIR-native outputs. - -```python -from healthchain.io import CdaAdapter, Document - -adapter = CdaAdapter() - -# Parse healthcare data into Document -doc = adapter.parse(cda_request) - -# Process with pure pipeline -processed_doc = pipeline(doc) - -# Convert back to healthcare format -response = adapter.format(processed_doc) -``` - -You can learn more about adapters at the [Adapters](./adapters/adapters.md) documentation page. ## Pipeline Management ๐Ÿ”จ @@ -299,3 +278,23 @@ print(pipeline.stages) # fhir_conversion: # - FHIRProblemListExtractor ``` +## Working with Healthcare Data Formats ๐Ÿ”„ + +Adapters let you easily convert between healthcare formats (CDA, FHIR, CDS Hooks) and HealthChain Documents. Keep your ML pipeline format-agnostic while always getting FHIR-ready outputs. + +[(Full Documentation on Adapters)](./adapters/adapters.md) + +```python +from healthchain.io import CdaAdapter, Document + +adapter = CdaAdapter() + +# Parse healthcare data into Document +doc = adapter.parse(cda_request) + +# Process with pure pipeline +processed_doc = pipeline(doc) + +# Convert back to healthcare format +response = adapter.format(processed_doc) +``` diff --git a/docs/reference/utilities/sandbox.md b/docs/reference/utilities/sandbox.md index a64bcaf3..3c666bb8 100644 --- a/docs/reference/utilities/sandbox.md +++ b/docs/reference/utilities/sandbox.md @@ -85,28 +85,28 @@ The client validates workflow-protocol combinations at initialization: ) ``` -## Dataset Loaders +See [Data Generator](data_generator.md) for more details on `.load_free_text()` `generate_synthetic` field. -HealthChain provides two pre-configured dataset loaders for testing with common FHIR testing datasets. Use `load_from_registry()` to access these datasets. +## Dataset Registry -### Overview - -| Dataset | Type | Use Case | File Format | -|---------|------|----------|-------------| -| **MIMIC-on-FHIR** | Real de-identified | Testing with realistic clinical patterns | `.ndjson.gz` per resource type | -| **Synthea** | Synthetic | Quick demos, single patient testing | `.json` Bundle per patient | +HealthChain provides two pre-configured dataset loaders for testing common FHIR test datasets with CDS Hooks workflows. Download the datasets and use `.load_from_registry()` to load from your local directory. +### Overview -**When to use:** +| Dataset & Description | FHIR Version | Type | File Format | Source | Download Link | +|------------------------------------------------------------------------|--------------|---------------------|-----------------------------|------------------------------------------------------------------------------------|---------------------------------------------------------| +| **MIMIC-on-FHIR**: MIMIC-IV on FHIR Demo Dataset | R4 | Real de-identified | `.ndjson.gz` per resource type | [PhysioNet Project](https://physionet.org/content/mimic-iv-fhir-demo/2.1.0/) | [Download ZIP](https://physionet.org/content/mimic-iv-fhir-demo/get-zip/2.1.0/) | +| **Synthea**: Synthea FHIR Patient Records (100 Sample) | R4 | Synthetic | `.json` Bundle per patient | [Synthea Downloads](https://synthea.mitre.org/downloads) | [Download ZIP](https://arc.net/l/quote/hoquexhy) | -- **MIMIC**: Test with real-world data distributions and clinical patterns from a major hospital -- **Synthea**: Quick demos without downloading large datasets; ideal for single-patient workflows ### MIMIC-on-FHIR Loader -Real de-identified clinical data from Beth Israel Deaconess Medical Center in FHIR R4 format. +Real-world, de-identified FHIR R4 data from Beth Israel Deaconess Medical Center. Suitable for testing with real-world data distributions and clinical patterns + +!!! tip "Full Dataset" + The [MIMIC-on-FHIR demo dataset](https://physionet.org/content/mimic-iv-fhir-demo/2.1.0/) is open access and contains about 100 patients. Access to the [full dataset](https://physionet.org/content/mimic-iv-fhir/2.1/) requires PhysioNet credentialed access. -**Directory Structure:** +#### Directory Structure ``` data_dir/ @@ -117,7 +117,7 @@ data_dir/ โ””โ”€โ”€ ... (other resource types) ``` -**Usage:** +#### Usage === "Basic" ```python @@ -140,18 +140,14 @@ data_dir/ ) ``` -**Available Resource Types:** - -`MimicMedication`, `MimicCondition`, `MimicObservation`, `MimicProcedure`, `MimicEncounter`, `MimicPatient`, and more. Check your dataset's `/fhir` directory for available types. - -!!! note "Setup Requirements" - The full MIMIC-on-FHIR dataset requires credentialed PhysioNet access, but you can download the [demo dataset without credentials](https://physionet.org/content/mimic-iv-fhir-demo/2.1.0/) (100 patients). - ### Synthea Loader -Synthetic patient data generated by Synthea, containing realistic FHIR Bundles (typically 100-500 resources per patient). +Synthetic patient data generated by [Synthea](https://synthea.mitre.org), containing realistic FHIR Bundles (typically 100-500 resources per patient). Ideal for single-patient workflows that require diverse data scenarios. -**Directory Structure:** +!!! tip "Getting Synthea Data" + Generate synthetic patients using [Synthea](https://github.com/synthetichealth/synthea) or [download sample data](https://synthea.mitre.org/downloads) from their releases. Each patient Bundle is self-contained with all clinical history. + +#### Directory Structure ``` data_dir/ @@ -160,7 +156,7 @@ data_dir/ โ””โ”€โ”€ ... (one .json file per patient) ``` -**Usage:** +#### Usage === "First Patient (Quick Demo)" ```python @@ -188,34 +184,118 @@ data_dir/ client.load_from_registry( "synthea-patient", data_dir="./synthea_sample_data_fhir_latest", + patient_id="a969c177-a995-7b89-7b6d-885214dfa253", resource_types=["Condition", "MedicationRequest", "Observation"], sample_size=5, # 5 resources per type random_seed=42, ) ``` +### Request Inspection and Debugging -!!! tip "Getting Synthea Data" - Generate synthetic patients using [Synthea](https://github.com/synthetichealth/synthea) or [download sample data](https://synthea.mitre.org/downloads) from their releases. Each patient Bundle is self-contained with all clinical history. +Before sending requests to your service, you can inspect and verify the queued data using several debugging methods. These are particularly useful for troubleshooting data loading issues or verifying request structure. + +#### Preview Requests -### Managing Requests +Get a high-level summary of queued requests without retrieving full payloads: ```python -# Preview queued requests before sending +# Preview all queued requests +previews = client.preview_requests() + +# Preview first 3 requests only previews = client.preview_requests(limit=3) -for preview in previews: - print(f"Request {preview['index']}: {preview['type']}") +print(previews) + +# [{'index': 0, 'type': 'CdaRequest', 'protocol': 'SOAP', 'has_document': True}] +``` + +#### Get Request Data + +Access the full request data in different formats for detailed inspection: + +```python +# Access raw Pydantic models directly +for request in client.requests: + print(f"Prefetch keys: {request.prefetch.keys()}") + print(request.model_dump()) -# Get full request data for inspection +# Get as list of dictionaries (for serialization) requests_dict = client.get_request_data(format="dict") +print(requests_dict[0].keys()) # See available fields + +# Get as JSON string (for saving or logging) requests_json = client.get_request_data(format="json") -requests_raw = client.get_request_data(format="raw") +with open("debug_requests.json", "w") as f: + f.write(requests_json) +``` + +#### Check Client Status + +Get the current state of your sandbox client: + +```python +status = client.get_status() +print(status) + +# { +# "sandbox_id": "550e8400-e29b-41d4-a716-446655440000", +# "url": "http://localhost:8000/cds/cds-services/my-service", +# "protocol": "rest", +# "workflow": "encounter-discharge", +# "requests_queued": 5, +# "responses_received": 0 +# } +``` + +#### Clear and Reload + +Reset the request queue to start fresh without creating a new client: -# Clear queued requests to start fresh +```python +# Clear all queued requests client.clear_requests() -client.load_from_path("./different_data.xml") + +# Load new data +client.load_from_path("./different_data.json") + +# Verify new queue +status = client.get_status() +print(f"New queue size: {status['requests_queued']}") ``` +??? example "Example Debugging Workflow" + ```python + from healthchain.sandbox import SandboxClient + + client = SandboxClient( + url="http://localhost:8000/cds/cds-services/discharge-summary", + workflow="encounter-discharge" + ) + + # Load data + client.load_free_text("data/notes.csv", column_name="text") + + # Debug before sending + print("=== Client Status ===") + print(client.get_status()) + + print("\n=== Request Previews ===") + for preview in client.preview_requests(limit=2): + print(f"Request {preview['index']}: {preview['type']}") + + print("\n=== Inspecting First Request ===") + first_request = client.requests[0] + print(f"Hook: {first_request.hook}") + print(f"Context: {first_request.context}") + print(f"Prefetch keys: {first_request.prefetch.keys()}") + print(f"Example DocumentReference: {first_request.prefetch['document'].model_dump()}") + + # If everything looks good, send + responses = client.send_requests() + ``` + + ### Sending Requests ```python @@ -238,42 +318,31 @@ print(status) # } ``` -### Using Context Manager - -For automatic result saving on successful completion: - -```python -with SandboxClient( - url="http://localhost:8000/cds/cds-services/my-service", - workflow="encounter-discharge" -) as client: - client.load_free_text( - csv_path="./data/notes.csv", - column_name="text" - ) - responses = client.send_requests() - # Results automatically saved to ./output/ on successful exit -``` - ## Complete Examples === "CDS Hooks Test" ```python from healthchain.sandbox import SandboxClient - # Initialize for CDS Hooks + # Initialize client for CDS Hooks workflow client = SandboxClient( url="http://localhost:8000/cds/cds-services/sepsis-alert", workflow="patient-view" ) - # Load and send + # Load MIMIC-on-FHIR data client.load_from_registry( "mimic-on-fhir", data_dir="./data/mimic-iv-fhir", - resource_types=["MimicConditionED", "MimicObservation"], - sample_size=10, + resource_types=["MimicConditionED"], + sample_size=5 + ) + + # Optional: Inspect before sending + # client.preview_requests() + # client.get_status() + # Send requests and save results responses = client.send_requests() client.save_results("./output/") ``` @@ -282,15 +351,20 @@ with SandboxClient( ```python from healthchain.sandbox import SandboxClient - # Initialize for SOAP/CDA + # Initialize client for SOAP/CDA workflow client = SandboxClient( url="http://localhost:8000/notereader/ProcessDocument/", workflow="sign-note-inpatient", protocol="soap" ) - # Load CDA files from directory + # Load CDA documents from directory client.load_from_path("./data/cda_files/", pattern="*.xml") + + # Optional: Inspect before sending + # client.preview_requests() + + # Send requests and save results responses = client.send_requests() client.save_results("./output/") ``` @@ -299,24 +373,71 @@ with SandboxClient( ```python from healthchain.sandbox import SandboxClient - # Initialize client + # Initialize client for CDS workflow client = SandboxClient( url="http://localhost:8000/cds/cds-services/my-service", - workflow="patient-view" + workflow="encounter-discharge" ) - # Load text data + # Load and generate FHIR from clinical notes client.load_free_text( - csv_path="./data/clinical_notes.csv", - column_name="note_text", - generate_synthetic=True + csv_path="./data/discharge_notes.csv", + column_name="text", + generate_synthetic=True # Adds synthetic data ) - # Send and save + # Optional: Inspect generated data + # requests = client.get_request_data(format="dict") + # print(requests[0]['prefetch'].keys()) + + # Send requests responses = client.send_requests() - client.save_results("./output/") ``` +## Advanced Usage + +`SandboxClient` supports method chaining and context manager patterns for more concise code. + +### Method Chaining + +All data loading methods return `self`, enabling fluent method chaining: + +```python +from healthchain.sandbox import SandboxClient + +# Chain initialization, loading, and sending +responses = ( + SandboxClient( + url="http://localhost:8000/cds/cds-services/my-service", + workflow="encounter-discharge" + ) + .load_from_registry( + "synthea-patient", + data_dir="./data/synthea", + sample_size=5 + ) + .send_requests() +) +``` + +### Context Manager + +Use the context manager for automatic result saving on successful completion: + +```python +# Auto-save results to ./output/ on successful exit +with SandboxClient( + url="http://localhost:8000/cds/cds-services/my-service", + workflow="encounter-discharge" +) as client: + client.load_free_text( + csv_path="./data/notes.csv", + column_name="text" + ) + responses = client.send_requests() + # Results automatically saved on successful exit +``` + ## Migration Guide !!! warning "Decorator Pattern Deprecated" diff --git a/healthchain/sandbox/datasets.py b/healthchain/sandbox/datasets.py index 44ff9f4d..d86842f2 100644 --- a/healthchain/sandbox/datasets.py +++ b/healthchain/sandbox/datasets.py @@ -64,7 +64,6 @@ def load(cls, name: str, data_dir: str, **kwargs) -> Dict: ) loader = cls._datasets[name] - log.info(f"Loading dataset: {name}") return loader.load(data_dir=data_dir, **kwargs) @classmethod diff --git a/healthchain/sandbox/sandboxclient.py b/healthchain/sandbox/sandboxclient.py index db7a26e9..da121501 100644 --- a/healthchain/sandbox/sandboxclient.py +++ b/healthchain/sandbox/sandboxclient.py @@ -379,34 +379,38 @@ def preview_requests(self, limit: Optional[int] = None) -> List[Dict[str, Any]]: return previews def get_request_data( - self, format: Literal["raw", "dict", "json"] = "dict" - ) -> Union[List, str]: + self, format: Literal["dict", "json"] = "dict" + ) -> Union[List[Dict], str]: """ - Get raw request data for inspection. + Get transformed request data for inspection. - Allows direct access to request data for debugging or custom processing. + Allows access to serialized request data for debugging or custom processing. + For direct access to Pydantic models, use the `requests` attribute: + >>> for request in client.requests: + >>> print(request.model_dump()) Args: - format: Return format - "raw" for list of request objects, - "dict" for list of dictionaries, "json" for JSON string + format: Return format - "dict" for list of dictionaries, + "json" for JSON string Returns: Request data in specified format Raises: - ValueError: If format is not one of "raw", "dict", or "json" + ValueError: If format is not "dict" or "json" Examples: >>> client.load_from_path("data.xml") + >>> # Access raw Pydantic models directly + >>> for request in client.requests: + >>> print(request.model_dump(exclude_none=True)) >>> # Get as dictionaries >>> dicts = client.get_request_data("dict") >>> # Get as JSON string >>> json_str = client.get_request_data("json") >>> print(json_str) """ - if format == "raw": - return self.requests - elif format == "dict": + if format == "dict": result = [] for req in self.requests: if hasattr(req, "model_dump"): @@ -420,7 +424,8 @@ def get_request_data( return json.dumps(self.get_request_data("dict"), indent=2) else: raise ValueError( - f"Invalid format '{format}'. Must be 'raw', 'dict', or 'json'" + f"Invalid format '{format}'. Must be 'dict' or 'json'. " + f"For raw Pydantic models, access the 'requests' attribute directly." ) def send_requests(self) -> List[Dict]: diff --git a/tests/sandbox/test_sandbox_client.py b/tests/sandbox/test_sandbox_client.py index 5dc22fb7..d7a71614 100644 --- a/tests/sandbox/test_sandbox_client.py +++ b/tests/sandbox/test_sandbox_client.py @@ -323,7 +323,6 @@ def test_preview_requests_respects_limit(): @pytest.mark.parametrize( "format_type,check", [ - ("raw", lambda data: isinstance(data, list)), ("dict", lambda data: isinstance(data, list) and isinstance(data[0], dict)), ("json", lambda data: isinstance(data, str) and json.loads(data)), ],