In [None]:
import os
from pathlib import Path

import pandas as pd
import tomli
from dotenv import load_dotenv
from openai import AzureOpenAI

from discharge_docs.dashboard.helper import (
    get_data_from_patient_admission,
    get_patients_from_list_names,
    get_template_prompt,
)
from discharge_docs.processing.processing import (
    get_patient_file,
)
from discharge_docs.prompts.prompt import (
    load_prompts,
    load_template_prompt,
)
from discharge_docs.prompts.prompt_builder import PromptBuilder

%load_ext autoreload
%autoreload 2

# initialise Azure
load_dotenv()
TEMPERATURE = 0.2

# deployment_name = "aiva-gpt" # GPT 3.5
deployment_name = "aiva-gpt4"
client = AzureOpenAI(
    api_version="2024-02-01",
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
)
# load data
df_metavision = pd.read_parquet(
    Path.cwd().parent / "data" / "processed" / "metavision_new_data.parquet"
)

df_HIX = pd.read_parquet(Path.cwd().parent / "data" / "processed" / "HiX_data.parquet")

# Define your DataFrames for each department
df_dict = {
    "NICU": df_metavision,
    "IC": df_metavision,
    "CAR": df_HIX,
    "PSY": df_HIX,
}


# load used enc_ids
with open(
    Path.cwd().parent
    / "src"
    / "discharge_docs"
    / "dashboard"
    / "enc_ids_dashboard.toml",
    "rb",
) as f:
    enc_ids_dict = tomli.load(f)
    for key in enc_ids_dict:
        enc_ids_dict[key] = enc_ids_dict[key]["ids"]

for key in enc_ids_dict:
    enc_ids_dict[key] = enc_ids_dict[key][:25]
print(enc_ids_dict)

data_dict, values_list = get_patients_from_list_names(df_dict, enc_ids_dict)

# load prompts
user_prompt, system_prompt = load_prompts()
template_prompt_NICU = load_template_prompt("NICU")
template_prompt_IC = load_template_prompt("IC")
template_prompt_CAR = load_template_prompt("CAR")
template_prompt_PSY = load_template_prompt("PSY")
template_prompt_dict = {
    "nicu": template_prompt_NICU,
    "ic": template_prompt_IC,
    "car": template_prompt_CAR,
    "psy": template_prompt_PSY,
    "demo": template_prompt_NICU,
}

# Load existing documents if the file exists
try:
    bulk_generated_docs = pd.read_csv(
        Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt4.csv"
    )
except FileNotFoundError:
    bulk_generated_docs = pd.DataFrame(columns=["enc_id", "name", "generated_doc"])

for selected_patient_admission in data_dict:
    # Check if the patient's admission is listed in the generated documents
    if selected_patient_admission in bulk_generated_docs["name"].values:
        template_prompt, department = get_template_prompt(
            selected_patient_admission, template_prompt_dict
        )
        # Check if the document contains an "Error"
        if (
            "Error"
            in bulk_generated_docs.loc[
                bulk_generated_docs["name"] == selected_patient_admission,
                "generated_doc",
            ].values[0]
        ):
            print(f"Not skipping {selected_patient_admission}, because of error")
            if department == "psy":
                print(
                    f"skipped {selected_patient_admission} anyway as not wanted dept"
                )
                continue

        else:
            # If no error, skip this patient and continue to the next iteration
            print(f"Skipping {selected_patient_admission}")
            if department != "ic":
                continue

    print(selected_patient_admission)
    patient_data = get_data_from_patient_admission(
        selected_patient_admission, data_dict
    )

    prompt_builder = PromptBuilder(
        temperature=TEMPERATURE, deployment_name=deployment_name, client=client
    )

    patient_file_string, _ = get_patient_file(patient_data)
    discharge_letter = prompt_builder.generate_discharge_doc(
        patient_file=patient_file_string,
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        template_prompt=template_prompt,
    )
    generated_doc = discharge_letter

    if selected_patient_admission not in bulk_generated_docs["name"].values:
        new_row = pd.DataFrame(
            {
                "enc_id": [patient_data["enc_id"].values[0]],
                "name": [selected_patient_admission],
                "generated_doc": [generated_doc],
            }
        )
        bulk_generated_docs = pd.concat(
            [bulk_generated_docs, new_row], ignore_index=True
        )
    else:
        bulk_generated_docs.loc[
            bulk_generated_docs["name"] == selected_patient_admission, "generated_doc"
        ] = generated_doc

    bulk_generated_docs.to_csv(
        # Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt35.csv",
        Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt4.csv",
        index=False,
    )

In [None]:
# for pre-release data


import os
from pathlib import Path

import pandas as pd
import tomli
from dotenv import load_dotenv
from openai import AzureOpenAI

from discharge_docs.dashboard.helper import (
    get_data_from_patient_admission,
    get_patients_from_list_names,
    get_template_prompt,
)
from discharge_docs.processing.processing import (
    get_patient_file,
)
from discharge_docs.prompts.prompt import (
    load_prompts,
    load_template_prompt,
)
from discharge_docs.prompts.prompt_builder import PromptBuilder

%load_ext autoreload
%autoreload 2

# initialise Azure
load_dotenv()
TEMPERATURE = 0.2

# deployment_name = "aiva-gpt" # GPT 3.5
deployment_name = "aiva-gpt4"
client = AzureOpenAI(
    api_version="2024-02-01",
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
)
# load data
df_metavision = pd.read_parquet(
    Path.cwd().parent / "data" / "processed" / "metavision_data_april_dp.parquet"
)

df_HIX = pd.read_parquet(Path.cwd().parent / "data" / "processed" / "HiX_data.parquet")

# Define your DataFrames for each department
df_dict = {
    "NICU": df_metavision,
    "IC": df_metavision,
    "CAR": df_HIX,
    "PSY": df_HIX,
}


# load used enc_ids
with open(
    Path.cwd().parent
    / "src"
    / "discharge_docs"
    / "dashboard"
    / "enc_ids_pre_release_phase1_1.toml",
    "rb",
) as f:
    enc_ids_dict = tomli.load(f)
    for key in enc_ids_dict:
        enc_ids_dict[key] = enc_ids_dict[key]["ids"]

data_dict, values_list = get_patients_from_list_names(df_dict, enc_ids_dict)


# load prompts
user_prompt, system_prompt = load_prompts()
template_prompt_NICU = load_template_prompt("NICU")
template_prompt_IC = load_template_prompt("IC")
template_prompt_CAR = load_template_prompt("CAR")
template_prompt_PSY = load_template_prompt("PSY")
template_prompt_dict = {
    "nicu": template_prompt_NICU,
    "ic": template_prompt_IC,
    "car": template_prompt_CAR,
    "psy": template_prompt_PSY,
    "demo": template_prompt_NICU,
}

# Load existing documents if the file exists
try:
    bulk_generated_docs = pd.read_csv(
        Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt4_PReval.csv"
    )
except FileNotFoundError:
    bulk_generated_docs = pd.DataFrame(columns=["enc_id", "name", "generated_doc"])

for selected_patient_admission in data_dict:
    # Check if the patient's admission is listed in the generated documents
    if selected_patient_admission in bulk_generated_docs["name"].values:
        template_prompt, department = get_template_prompt(
            selected_patient_admission, template_prompt_dict
        )

        # Check if the document contains an "Error"
        if (
            "Error"
            in bulk_generated_docs.loc[
                bulk_generated_docs["name"] == selected_patient_admission,
                "generated_doc",
            ].values[0]
        ):
            print(f"Not skipping {selected_patient_admission}, because of error")


        else:
            # If no error, skip this patient and continue to the next iteration
            print(f"Skipping {selected_patient_admission}")
            continue
            # if department != "ic":
            #     continue
    template_prompt, department = get_template_prompt(
        selected_patient_admission, template_prompt_dict
    )
    print(selected_patient_admission)
    patient_data = get_data_from_patient_admission(
        selected_patient_admission, data_dict
    )

    prompt_builder = PromptBuilder(
        temperature=TEMPERATURE, deployment_name=deployment_name, client=client
    )

    patient_file_string, _ = get_patient_file(patient_data)
    discharge_letter = prompt_builder.generate_discharge_doc(
        patient_file=patient_file_string,
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        template_prompt=template_prompt,
    )
    generated_doc = discharge_letter

    if selected_patient_admission not in bulk_generated_docs["name"].values:
        new_row = pd.DataFrame(
            {
                "enc_id": [patient_data["enc_id"].values[0]],
                "name": [selected_patient_admission],
                "generated_doc": [generated_doc],
            }
        )
        bulk_generated_docs = pd.concat(
            [bulk_generated_docs, new_row], ignore_index=True
        )
    else:
        bulk_generated_docs.loc[
            bulk_generated_docs["name"] == selected_patient_admission, "generated_doc"
        ] = generated_doc

    bulk_generated_docs.to_csv(
        Path.cwd().parent
        / "data"
        / "processed"
        / "bulk_generated_docs_gpt4_PReval.csv",
        index=False,
    )

# Pre release phase 1 part 2 

In [None]:
# for pre-release data


import os
from pathlib import Path

import pandas as pd
import tomli
from dotenv import load_dotenv
from openai import AzureOpenAI

from discharge_docs.dashboard.helper import (
    get_data_from_patient_admission,
    get_patients_from_list_names_pilot,
    get_template_prompt,
)
from discharge_docs.processing.processing import (
    get_patient_file,
)
from discharge_docs.prompts.prompt import (
    load_prompts,
    load_template_prompt,
)
from discharge_docs.prompts.prompt_builder import PromptBuilder

%load_ext autoreload
%autoreload 2

# initialise Azure
load_dotenv()
TEMPERATURE = 0.2

# deployment_name = "aiva-gpt" # GPT 3.5
deployment_name = "aiva-gpt4"
client = AzureOpenAI(
    api_version="2024-02-01",
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
)
# load data
df_metavision = pd.read_parquet(
    Path.cwd().parent / "data" / "processed" / "metavision_data_april_dp.parquet"
)

df_HIX = pd.read_parquet(Path.cwd().parent / "data" / "processed" / "HiX_data.parquet")

# Define your DataFrames for each department
df_dict = {
    "NICU": df_metavision,
    "IC": df_metavision,
    "CAR": df_HIX,
    "PSY": df_HIX,
}


# load used enc_ids
with open(
    Path.cwd().parent
    / "src"
    / "discharge_docs"
    / "dashboard"
    / "enc_ids_pre_release_phase1_1.toml",
    "rb",
) as f:
    enc_ids_dict = tomli.load(f)
    id_dep_dict = {}
    for key in enc_ids_dict:
        id_dep_dict[key] = list(
            zip(enc_ids_dict[key]["ids"], enc_ids_dict[key]["department"], strict=False)
        )

data_dict, values_list = get_patients_from_list_names_pilot(df_dict, id_dep_dict)


# load prompts
user_prompt, system_prompt = load_prompts()
template_prompt_NICU = load_template_prompt("NICU")
template_prompt_IC = load_template_prompt("IC")
template_prompt_CAR = load_template_prompt("CAR")
template_prompt_PSY = load_template_prompt("PSY")
template_prompt_dict = {
    "nicu": template_prompt_NICU,
    "ic": template_prompt_IC,
    "car": template_prompt_CAR,
    "psy": template_prompt_PSY,
    "demo": template_prompt_NICU,
}

# Load existing documents if the file exists
try:
    bulk_generated_docs = pd.read_csv(
        Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt4_PReval_2.csv"
    )
except FileNotFoundError:
    bulk_generated_docs = pd.DataFrame(columns=["enc_id", "name", "generated_doc"])

for selected_patient_admission in data_dict:
    # Check if the patient's admission is listed in the generated documents
    if selected_patient_admission in bulk_generated_docs["name"].values:
        template_prompt, department = get_template_prompt(
            selected_patient_admission.rsplit('_', 1)[0], template_prompt_dict
        )

        # Check if the document contains an "Error"
        if (
            "Error"
            in bulk_generated_docs.loc[
                bulk_generated_docs["name"] == selected_patient_admission,
                "generated_doc",
            ].values[0]
        ):
            print(f"Not skipping {selected_patient_admission}, because of error")


        else:
            # If no error, skip this patient and continue to the next iteration
            print(f"Skipping {selected_patient_admission}")
            continue
            # if department != "ic":
            #     continue
    template_prompt, department = get_template_prompt(
        selected_patient_admission.rsplit('_', 1)[0], template_prompt_dict
    )
    print(selected_patient_admission)
    patient_data = get_data_from_patient_admission(
        selected_patient_admission, data_dict
    )

    prompt_builder = PromptBuilder(
        temperature=TEMPERATURE, deployment_name=deployment_name, client=client
    )

    patient_file_string, _ = get_patient_file(patient_data)
    discharge_letter = prompt_builder.generate_discharge_doc(
        patient_file=patient_file_string,
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        template_prompt=template_prompt,
    )
    generated_doc = discharge_letter

    if selected_patient_admission not in bulk_generated_docs["name"].values:
        new_row = pd.DataFrame(
            {
                "enc_id": [patient_data["enc_id"].values[0]],
                "name": [selected_patient_admission],
                "generated_doc": [generated_doc],
            }
        )
        bulk_generated_docs = pd.concat(
            [bulk_generated_docs, new_row], ignore_index=True
        )
    else:
        bulk_generated_docs.loc[
            bulk_generated_docs["name"] == selected_patient_admission, "generated_doc"
        ] = generated_doc

    bulk_generated_docs.to_csv(
        Path.cwd().parent
        / "data"
        / "processed"
        / "bulk_generated_docs_gpt4_PReval_2.csv",
        index=False,
    )

#### Pre-release phase 1 part 3: Cardio

In [None]:
# for pre-release data
import os
from pathlib import Path

import pandas as pd
import tomli
from dotenv import load_dotenv
from openai import AzureOpenAI

from discharge_docs.dashboard.helper import (
    get_data_from_patient_admission,
    get_patients_from_list_names_pilot,
    get_template_prompt,
)
from discharge_docs.processing.processing import (
    get_patient_file,
)
from discharge_docs.prompts.prompt import (
    load_prompts,
    load_template_prompt,
)
from discharge_docs.prompts.prompt_builder import PromptBuilder

%load_ext autoreload
%autoreload 2

# initialise Azure
load_dotenv()
TEMPERATURE = 0.2

# deployment_name = "aiva-gpt" # GPT 3.5
deployment_name = "aiva-gpt4"
client = AzureOpenAI(
    api_version="2024-02-01",
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
)
# load data
df_metavision = pd.read_parquet(
    Path.cwd().parent / "data" / "processed" / "metavision_data_april_dp.parquet"
)

df_HIX = pd.read_parquet(Path.cwd().parent / "data" / "processed" / "HiX_CAR_data_pre_pilot.parquet")

# Define your DataFrames for each department
df_dict = {
    "NICU": df_metavision,
    "IC": df_metavision,
    "CAR": df_HIX,
    "PSY": df_HIX,
}


# load used enc_ids
with open(
    Path.cwd().parent
    / "src"
    / "discharge_docs"
    / "dashboard"
    / "enc_ids_pre_release_phase1_1.toml",
    "rb",
) as f:
    enc_ids_dict = tomli.load(f)
    id_dep_dict = {}
    for key in enc_ids_dict:
        id_dep_dict[key] = list(
            zip(enc_ids_dict[key]["ids"], enc_ids_dict[key]["department"], strict=False)
        )

data_dict, values_list = get_patients_from_list_names_pilot(df_dict, id_dep_dict)


# load prompts
user_prompt, system_prompt = load_prompts()
template_prompt_NICU = load_template_prompt("NICU")
template_prompt_IC = load_template_prompt("IC")
template_prompt_CAR = load_template_prompt("CAR")
template_prompt_PSY = load_template_prompt("PSY")
template_prompt_dict = {
    "nicu": template_prompt_NICU,
    "ic": template_prompt_IC,
    "car": template_prompt_CAR,
    "psy": template_prompt_PSY,
    "demo": template_prompt_NICU,
}

# Load existing documents if the file exists
try:
    bulk_generated_docs = pd.read_csv(
        Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt4_PReval_3.csv"
    )
except FileNotFoundError:
    bulk_generated_docs = pd.DataFrame(columns=["enc_id", "name", "generated_doc"])

for selected_patient_admission in data_dict:
    # Check if the patient's admission is listed in the generated documents
    if selected_patient_admission in bulk_generated_docs["name"].values:
        template_prompt, department = get_template_prompt(
            selected_patient_admission.rsplit('_', 1)[0], template_prompt_dict
        )

        # Check if the document contains an "Error"
        if (
            "Error"
            in bulk_generated_docs.loc[
                bulk_generated_docs["name"] == selected_patient_admission,
                "generated_doc",
            ].values[0]
        ):
            print(f"Not skipping {selected_patient_admission}, because of error")


        else:
            # If no error, skip this patient and continue to the next iteration
            print(f"Skipping {selected_patient_admission}")
            continue
            # if department != "ic":
            #     continue
    template_prompt, department = get_template_prompt(
        selected_patient_admission.rsplit('_', 1)[0], template_prompt_dict
    )
    print(selected_patient_admission)
    patient_data = get_data_from_patient_admission(
        selected_patient_admission, data_dict
    )

    prompt_builder = PromptBuilder(
        temperature=TEMPERATURE, deployment_name=deployment_name, client=client
    )

    patient_file_string, _ = get_patient_file(patient_data)
    discharge_letter = prompt_builder.generate_discharge_doc(
        patient_file=patient_file_string,
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        template_prompt=template_prompt,
    )
    generated_doc = discharge_letter

    if selected_patient_admission not in bulk_generated_docs["name"].values:
        new_row = pd.DataFrame(
            {
                "enc_id": [patient_data["enc_id"].values[0]],
                "name": [selected_patient_admission],
                "generated_doc": [generated_doc],
            }
        )
        bulk_generated_docs = pd.concat(
            [bulk_generated_docs, new_row], ignore_index=True
        )
    else:
        bulk_generated_docs.loc[
            bulk_generated_docs["name"] == selected_patient_admission, "generated_doc"
        ] = generated_doc

    bulk_generated_docs.to_csv(
        Path.cwd().parent
        / "data"
        / "processed"
        / "bulk_generated_docs_gpt4_PReval_3.csv",
        index=False,
    )

#### Pre-release phase 1 part 4: additional Cardio files

In [None]:
# for pre-release data
import os
from pathlib import Path

import pandas as pd
import tomli
from dotenv import load_dotenv
from openai import AzureOpenAI

from discharge_docs.dashboard.helper import (
    get_data_from_patient_admission,
    get_patients_from_list_names_pilot,
    get_template_prompt,
)
from discharge_docs.processing.processing import (
    get_patient_file,
)
from discharge_docs.prompts.prompt import (
    load_prompts,
    load_template_prompt,
)
from discharge_docs.prompts.prompt_builder import PromptBuilder

%load_ext autoreload
%autoreload 2

# initialise Azure
load_dotenv()
TEMPERATURE = 0.2

# deployment_name = "aiva-gpt" # GPT 3.5
deployment_name = "aiva-gpt4"
client = AzureOpenAI(
    api_version="2024-02-01",
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
)
# load data
df_metavision = pd.read_parquet(
    Path.cwd().parent / "data" / "processed" / "metavision_data_april_dp.parquet"
)

df_HIX = pd.read_parquet(Path.cwd().parent / "data" / "processed" / "HiX_CAR_data_pre_pilot.parquet")

# Define your DataFrames for each department
df_dict = {
    "NICU": df_metavision,
    "IC": df_metavision,
    "CAR": df_HIX,
    "PSY": df_HIX,
}


# load used enc_ids
with open(
    Path.cwd().parent
    / "src"
    / "discharge_docs"
    / "dashboard"
    / "enc_ids_pre_release_phase1_1.toml",
    "rb",
) as f:
    enc_ids_dict = tomli.load(f)
    id_dep_dict = {}
    for key in enc_ids_dict:
        id_dep_dict[key] = list(
            zip(enc_ids_dict[key]["ids"], enc_ids_dict[key]["department"], strict=False)
        )

data_dict, values_list = get_patients_from_list_names_pilot(df_dict, id_dep_dict)


# load prompts
user_prompt, system_prompt = load_prompts()
template_prompt_NICU = load_template_prompt("NICU")
template_prompt_IC = load_template_prompt("IC")
template_prompt_CAR = load_template_prompt("CAR")
template_prompt_PSY = load_template_prompt("PSY")
template_prompt_dict = {
    "nicu": template_prompt_NICU,
    "ic": template_prompt_IC,
    "car": template_prompt_CAR,
    "psy": template_prompt_PSY,
    "demo": template_prompt_NICU,
}

# Load existing documents if the file exists
try:
    bulk_generated_docs = pd.read_csv(
        Path.cwd().parent / "data" / "processed" / "bulk_generated_docs_gpt4_PReval_4.csv"
    )
except FileNotFoundError:
    bulk_generated_docs = pd.DataFrame(columns=["enc_id", "name", "generated_doc"])

for selected_patient_admission in data_dict:
    # Check if the patient's admission is listed in the generated documents
    if selected_patient_admission in bulk_generated_docs["name"].values:
        template_prompt, department = get_template_prompt(
            selected_patient_admission.rsplit('_', 1)[0], template_prompt_dict
        )

        # Check if the document contains an "Error"
        if (
            "Error"
            in bulk_generated_docs.loc[
                bulk_generated_docs["name"] == selected_patient_admission,
                "generated_doc",
            ].values[0]
        ):
            print(f"Not skipping {selected_patient_admission}, because of error")


        else:
            # If no error, skip this patient and continue to the next iteration
            print(f"Skipping {selected_patient_admission}")
            continue
            # if department != "ic":
            #     continue
    template_prompt, department = get_template_prompt(
        selected_patient_admission.rsplit('_', 1)[0], template_prompt_dict
    )
    print(selected_patient_admission)
    patient_data = get_data_from_patient_admission(
        selected_patient_admission, data_dict
    )

    prompt_builder = PromptBuilder(
        temperature=TEMPERATURE, deployment_name=deployment_name, client=client
    )

    patient_file_string, _ = get_patient_file(patient_data)
    discharge_letter = prompt_builder.generate_discharge_doc(
        patient_file=patient_file_string,
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        template_prompt=template_prompt,
    )
    generated_doc = discharge_letter

    if selected_patient_admission not in bulk_generated_docs["name"].values:
        new_row = pd.DataFrame(
            {
                "enc_id": [patient_data["enc_id"].values[0]],
                "name": [selected_patient_admission],
                "generated_doc": [generated_doc],
            }
        )
        bulk_generated_docs = pd.concat(
            [bulk_generated_docs, new_row], ignore_index=True
        )
    else:
        bulk_generated_docs.loc[
            bulk_generated_docs["name"] == selected_patient_admission, "generated_doc"
        ] = generated_doc

    bulk_generated_docs.to_csv(
        Path.cwd().parent
        / "data"
        / "processed"
        / "bulk_generated_docs_gpt4_PReval_4.csv",
        index=False,
    )