# Building LLM-based applications for writting discharge summary

In this notebook, we will build an application to writting discharge summary.

For required packages, please run:

```bash
poetry install
```

There are a few methods explored in this notebook:
1. Stuff with human and system prompt
2. Stuff with human prompt
3. Decompose method
4. MapReduce method
5. Refine method

In [1]:
import os
import sys
from pathlib import Path

import nest_asyncio
from dotenv import load_dotenv
from IPython.display import Markdown
from langchain_openai import AzureChatOpenAI

nest_asyncio.apply()

load_dotenv()
llm = AzureChatOpenAI(
    deployment_name="gpt_35_16k",
    model_name="gpt-35-turbo-16k"
)

# Add notebooks parent directory to path
project_folder = Path(os.getcwd()).parent
sys.path.append(str(project_folder))
output_folder = project_folder / "data" / "output"
case_dir = project_folder / "data" / 'processed'
if not case_dir.exists():
    raise FileNotFoundError(f"Cases directory not found at {case_dir}")
if not output_folder.exists():
    output_folder.mkdir(parents=True)

In [2]:
# Local files
from helpers.read_data import read_json_file, read_markdown_file

# Read cases

In [3]:
# Filter those that are _original.md - ignore other languages for now
file_list = [
    f for f in (case_dir / 'merged').glob('*.md')
    if f.stem.endswith('_original')
]
work_with_file = file_list[0]

case = read_markdown_file(path=work_with_file)

# Print first 1000 characters of the case
Markdown(case[:1000] + '...')

# Medications, 2024-01-08
 * Norvasc (Amlodipine), PO, 5 Mg 1+0+0 times per day
 * Cozaar (Losartan), PO, 50 Mg 1+0+0 times per day
 * Fortamet (Metformin), PO, 500 Mg 2+0+2 times per day
 * Pfizerpen (Benzylpenicillin), IV, 1 G 0+0+1 times per day
 * Roxicodone (Oxicodone), PO, 5 Mg 0+0+1 times per day
 * Veetids (Phenoxymethylpenicillin), PO, 1 G 0+0+0 times per day

# Lab values, 2024-01-08, 18:26
 * B-Leukocytes 16.2 x10^9/L (3.5-8.8)
 * B-Erytrocytes 7.7 x10^12/L (3.9-5.2)
 * B-Hemoglobin 120 g/L (117-153)
 * B-EVF 0.45 U (0.35-0.46)
 * ERC(B)-MCV 92 fL (82-98)
 * ERC(B)-MCH 30 x10^9/L (27-33)
 * B-Trombocytes 178 x10^9/L (165-387)
 * P-Glucose 7.2 mmol/L (4.2-6.0)
 * P-Sodium 140 mmol/L (137-145)
 * P-Potassium 4.5 mmol/L (3.5-4.6)
 * P-CRP 279 mg/L (<3)
 * P-Creatinine 137 μmol/L (<90)

# ED admission note, 2024-01-08, 19:43, Dr A 

## Social security number
19550101–1111

## Reason for admittance
Swollen leg.

## Earlier/current diseases
Hypertension, diabetes mellitus type 2, ...

In [4]:
json_file_list = [f for f in (case_dir / 'markdown').glob('*.json') if f.stem.endswith('_original')]

# Read and parse the json file as a dictionary
json_file = json_file_list[0]

case_object = read_json_file(json_file)
case_object

Case(id='Surgery_Case 1', language='original', lab='|labTest      |referenceInterval |unit   | value|date       |time  |\n|:------------|:-----------------|:------|-----:|:----------|:-----|\n|B-Hemoglobin |117-153           |g/L    |  96.0|2024-02-01 |10:57 |\n|B-Hemoglobin |117-153           |g/L    |  83.0|2024-02-01 |14:33 |\n|B-Hemoglobin |117-153           |g/L    | 102.0|2024-02-02 |06:37 |\n|B-Hemoglobin |117-153           |g/L    |  93.0|2024-02-03 |06:47 |\n|B-Hemoglobin |117-153           |g/L    |  91.0|2024-02-04 |06:27 |\n|B-Hemoglobin |117-153           |g/L    |  93.0|2024-02-04 |06:55 |\n|P-Glucose    |4.2-6.0           |mmol/L |   7.1|2024-02-01 |10:57 |\n|P-Glucose    |4.2-6.0           |mmol/L |   7.1|2024-02-02 |06:37 |\n|P-CRP        |<3                |mg/L   |  10.0|2024-02-02 |06:37 |', singleLab=[LabTest(lab_test='B-Hemoglobin', reference_interval='117-153', unit='g/L', value=96, date=datetime.date(2024, 2, 1), time='10:57'), LabTest(lab_test='B-Hemoglobin', r

# Basic method (Naive)

In [None]:
from basic.basic import basic_chain

out = basic_chain(llm=llm).single.invoke({"notes": case})
Markdown(out)

In [None]:
from basic.basic import create_multiple_basic_prompts

stuff_df = create_multiple_basic_prompts(case=case, n=2, llm=llm)
stuff_df

In [None]:
stuff_df.to_csv(output_folder / "stuff_df_final.csv")

# Decompose Technique

In [5]:
from decompose import combine_all_sections

decompose_df = combine_all_sections(case=case_object, llm=llm, n = 2)

In [None]:
decompose_df.to_csv(output_folder / "decompose_df_final.csv")

# Map Reduce Prompt Techniques

In [7]:
from map_reduce import create_mapreduce_df

mapreduce_sum = create_mapreduce_df(case=case_object, llm=llm, n=2)

In [8]:
mapreduce_sum.to_csv(output_folder / "mapreduce_final.csv")

# Refine Prompt Techniques

In [11]:
from refine import create_refine_df

refine_sum = create_refine_df(case=case_object, n=2, llm=llm)
refine_sum.to_csv(output_folder / "refine_final.csv")