<a href="https://colab.research.google.com/gist/johnnygreco/ed5fc5fa46a46887adb1ade232da359b/physician-notes-with-non-llm-data-sources.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
%pip install -U git+https://github.com/gretelai/gretel-python-client datasets

In [2]:
from datasets import load_dataset

from gretel_client.navigator_client import Gretel

gretel = Gretel(api_key="prompt", endpoint='https://api.dev.gretel.ai')

Found cached Gretel credentials
Logged in as kirit.thadaka@gretel.ai ✅
Gretel client configured to use project: proj_2uY0cfM0kjiegpyEZvCHNKZYxGf


## 🏥 Patient Notes Example

In [3]:
# Let's use Gretel's symptom-to-diagnosis dataset to seed our workflow.
df_seed = load_dataset("gretelai/symptom_to_diagnosis")["train"].to_pandas()
df_seed = df_seed.rename(columns={"output_text": "diagnosis", "input_text": "patient_summary"})

print(f"Number of records: {len(df_seed)}")

df_seed.head()

Number of records: 853


Unnamed: 0,diagnosis,patient_summary
0,cervical spondylosis,I've been having a lot of pain in my neck and ...
1,impetigo,I have a rash on my face that is getting worse...
2,urinary tract infection,I have been urinating blood. I sometimes feel ...
3,arthritis,I have been having trouble with my muscles and...
4,dengue,I have been feeling really sick. My body hurts...


In [4]:
aidd = gretel.data_designer.new(model_suite="apache-2.0")

# We use with_replacement=False, so our max num_records is 853.
aidd.with_seed_dataset(
    df_seed,
    sampling_strategy="shuffle",
    with_replacement=False
)

# Create a couple random person samplers. For now, the
# default locale has been updated to "en_GB", since we
# do not yet support the PGM in streaming mode.
aidd.with_person_samplers({"patient_sampler": {}, "doctor_sampler": {}})

[09:13:08] [INFO] 🌱 Using seed dataset with file ID: file_cf29088131334a2087c4115ccb0bdf1a


In [5]:
aidd.add_column(
    name="patient_id",
    type="uuid",
    params={"prefix": "PT-", "short_form": True, "uppercase": True},
)

aidd.add_column(
    name="first_name",
    type="expression",
    params={"expr": "patient_sampler.first_name"}
)

aidd.add_column(
    name="last_name",
    type="expression",
    params={"expr": "patient_sampler.last_name"}
)


aidd.add_column(
    name="dob",
    type="expression",
    params={"expr": "patient_sampler.birth_date"}
)


aidd.add_column(
    name="patient_email",
    type="expression",
    params={"expr": "patient_sampler.email_address"}
)


aidd.add_column(
    name="symptom_onset_date",
    type="datetime",
    params={"start": "2024-01-01", "end": "2024-12-31"},
)

aidd.add_column(
    name="date_of_visit",
    type="timedelta",
    params={
        "dt_min": 1,
        "dt_max": 30,
        "reference_column_name": "symptom_onset_date"
    },
)

aidd.add_column(
    name="physician",
    type="expression",
    params={"expr": "Dr. + ' ' + doctor_sampler.first_name + ' ' + doctor_sampler.last_name"},
)


# Note we have access to the seed data fields.
aidd.add_column(
    name="physician_notes",
    prompt="""\
<context>
You are a primary-care physician who just had an appointment with {{first_name}} {{last_name}},
who has been struggling with symptoms from {{diagnosis}} since {{symptom_onset_date}}.
The date of today's visit is {{date_of_visit}}.
</context>

<patient_summary_of_symptoms>
{{patient_summary}}
</patient_summary_of_symptoms>

<task>
Write careful notes about your visit with {{first_name}},
as {{physician}}.

Format the notes as a busy doctor might.
</task>
"""
 )

aidd

In [6]:
preview = aidd.preview()

[09:13:08] [INFO] 🚀 Generating preview
Task config validation failed: task: 'generate_columns_using_samplers' step: 'generate-columns-using-samplers':
	field='generate_columns_using_samplers' error_message='Unknown error'


WorkflowValidationError: Task config validation failed

In [None]:
preview.display_sample_record()

In [None]:
# The full dataset includes the seed data as columns.
preview.dataset.df

In [None]:
# workflow_run = aidd.create(
#     num_records=100,
#     workflow_run_name="physician_notes",
#     wait_for_completion=True
# )