In [1]:

import json
import random

def load_subset(file_path, num_samples):
    subset = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        chosen_lines = random.sample(lines, num_samples)
        for line in chosen_lines:
            json_obj = json.loads(line.strip())
            subset.append(json_obj)
    return subset

def is_medical(text, medical_keywords):
    for keyword in medical_keywords:
        if keyword in text:
            return True
    return False

if __name__ == "__main__":
    file_path = '/home/IAIS/jdatta/kenlm_training/oscar_dataset.jsonl' 
    num_samples = 1000  # Number of samples you want to take
    german_medical_keywords = [
    "Gesundheit", "Krankheit", "Behandlung", "Medikament", "Therapie",
    "Diagnose", "Patient", "Arzt", "Klinik", "Operation",
    "Symptom", "Impfung", "Virus", "Infektion", "Schmerz",
    "Fieber", "Blutdruck", "Chirurgie", "Herz", "Krebs",
    "Diabetes", "Heilung", "Prävention", "Röntgen", "Allergie",
    "Atemwege", "Blut", "Gehirn", "Haut", "Knochen",
    "Leber", "Niere", "Sehvermögen", "Zahn", "Magen",
    "Ernährung", "Geburt", "Genetik", "Hygiene", "Immunsystem",
    "Infektionskrankheit", "Kardiologie", "Neurologie", "Orthopädie", "Pädiatrie",
    "Psychiatrie", "Radiologie", "Urologie", "Virologie", "Zahnmedizin",
    "Antibiotikum", "Biopsie", "Dialyse", "EKG", "Endoskopie",
    "Fraktur", "Grippe", "Hepatitis", "Impfstoff", "Katarakt",
    "Leukämie", "Mammographie", "Narkose", "Osteoporose", "Pathologie",
    "Quarantäne", "Rehabilitation", "Sterilisation", "Transplantation", "Ultraschall",
    "Vene", "Wunde", "Zyste", "Anästhesie", "Bronchitis",
    "Demenz", "Embolie", "Fettleibigkeit", "Gastroenterologie", "Hormon",
    "Infusion", "Jaundice", "Koma", "Lähmung", "Malaria",
    "Neuropathie", "Onkologie", "Pneumonie", "Rheuma", "Sepsis",
    "Tuberkulose", "Ulzeration", "Vakzin", "Arterie", "Blutzucker",
    "CT", "Dermatologie", "Epidemie", "Fistel", "Gehör",
    "Herzinfarkt", "Insemination", "Juckreiz", "Knorpel", "Lungenentzündung",
    "Metastase", "Neuralgie", "Orthodontie", "Pankreatitis", "Retina",
    "Skoliose", "Tumor", "Ulkus", "Varize", "Wirbelsäule",
    "Zerebral", "Amputation", "Bluttransfusion", "Chemotherapie", "Dysplasie",
    "Epilepsie", "Fistel", "Glaukom", "Hämorrhoiden", "Insulin",
    "Kardiomyopathie", "Lipid", "Muskeldystrophie", "Nephrologie", "Otitis",
    "Phlebitis", "Röntgenstrahl", "Stent", "Thrombose", "Uterus",
    "Vene", "Wundheilung", "Zytologie", "Anämie", "Biologie",
    "Cystoskopie", "Dyspnoe", "Endokrinologie", "Fibrose", "Gelenk",
    "Hysterektomie", "Intensivstation", "Koloskopie", "Lipom", "Myokardinfarkt"
]

    dataset_subset = load_subset(file_path, num_samples)

    for item in dataset_subset:
        text = item['text']  # Assuming the text content is under the key 'text'
        if is_medical(text, german_medical_keywords):
            print("Medical:", text[:100])  # Print first 100 characters for brevity
        else:
            print("Non-Medical:", text[:100])


In [None]:
def classify_text(text, keywords):
    return 'medical' if any(keyword in text for keyword in keywords) else 'non-medical'

for sample in dataset.take(5):
    classification = classify_text(sample['text'], german_medical_keywords)
    print(f"Text: {sample['text']}\nClassification: {classification}\n")

In [2]:
def classify_text(text, keywords):
    return 'medical' if any(keyword.lower() in text.lower() for keyword in keywords) else 'non-medical'

In [None]:
def add_classification(example):
    example['classification'] = classify_text(example['text'], german_medical_keywords)
    return example

dataset = dataset.map(add_classification)

In [1]:
from datasets import load_from_disk
ds = load_from_disk('/home/IAIS/jdatta/kenlm_training/oscar_dataset')

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import os
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

response = client.chat.completions.create(
  model="gpt-3.5-turbo-0125",
  response_format={ "type": "json_object" },
  messages=[
    {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
    {"role": "user", "content": "Ergänzende Krebstherapie: Behandlung mit bioaktiven Bestandteilen aus dem Blut des Crocodylus Siamensis (Krokodilblut) aus Thailand." 
     "\nZahlreiche laufende internationale Studien- und Forschungsprojekte vieler Universitäten zeigen, dass Krokodilblut ganz spezielle Proteine (Eiweiße) enthält." 
     "Diese zeigen eine starke antioxidative Wirkung speziell auf Tumorzellen. Man kann von einem ..."
     "Antwort: \n\n Antworten Sie nur mit medizinisch oder nicht-medizinisch."
     "Erklärung: \n\n Bitte geben Sie eine kurze Erklärung, warum es sich um einen medizinischen Text handelt oder nicht." 
     "Verfassen Sie eine Antwort mit höchstens 30 Wörtern."}
  ]
)
print(response.choices[0].message.content)

{
    "Antwort": "medizinisch",
    "Erklärung": "Es handelt sich um eine Beschreibung einer Krebstherapie mit bioaktiven Bestandteilen aus Krokodilblut, die antioxidative Wirkung auf Tumorzellen zeigt."
}


In [None]:
########### Medical Text Extraction ################

import os
import json
from openai import OpenAI
from collections import OrderedDict

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

input_file_path = '/home/IAIS/jdatta/kenlm_training/text.jsonl'
output_file_path = '/home/IAIS/jdatta/kenlm_training/output.json'

MAX_TOKENS = 10000

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    for line in input_file:
        input_data = json.loads(line)
        num_tokens = input_data.get('metadata', {}).get('num_tokens', 0)
        
        input_id = input_data['id']
        if num_tokens > MAX_TOKENS:
          output_data = {
                'id': input_id,
                'Antwort': 'token-Länge überschreitet'
            }
        else:
          input_text = input_data['text']
          detailed_prompt = f"""
Klassifizieren Sie den folgenden Text entweder als "medizinisch" oder "nicht-medizinisch" und 
geben Sie eine kurze Erklärung (nicht mehr als 30 Wörter) für Ihre Klassifizierung. 
Formatieren Sie Ihre Antwort als JSON-Objekt mit zwei Schlüsseln: "Antwort" und "Erklärung".
Text: "{input_text}"
"""
        
        response = client.chat.completions.create(
          model="gpt-3.5-turbo-0125",
          response_format={ "type": "json_object" },
          messages=[
            {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
            {'role': 'user', 'content': detailed_prompt}
            ]
          )
        chat_response = response.choices[0].message.content
        parsed_json = json.loads(chat_response)
        ordered_json = OrderedDict([('id', input_id)] + list(parsed_json.items()))
        # parsed_json['id'] = input_id
        #output_file.write(json.dumps(ordered_json, ensure_ascii=False, indent=4) + '\n')
        output_file.write(json.dumps(ordered_json, ensure_ascii=False) + '\n')
        
#print('Response saved to output.json')
print('Response saved to output.jsonl')

Response saved to output.jsonl


In [None]:
Text: "{input_text}"

In [None]:
########### Medical Text Extraction with tiktoken (Prompt-1)################

import os
import json
import tiktoken
from openai import OpenAI
from collections import OrderedDict

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

input_file_path = '/home/IAIS/jdatta/kenlm_training/test_dataset.jsonl'
output_file_path = '/home/IAIS/jdatta/kenlm_training/output.jsonl'

MAX_TOKENS = 14000
model = "gpt-3.5-turbo-0125"

def calculate_prompt_tokens(detailed_prompt, model="gpt-3.5-turbo-0125"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(detailed_prompt))

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    for line in input_file:
        input_data = json.loads(line)
        input_text = input_data['text']
        input_id = input_data['id']   
        
        detailed_prompt = f"""
Du bist ein AI Assistant. Du klassifizierst Texte zu Medizin nur wenn der Text Medizinische Worte enthält.
Deine Antworten gebst du nur im JSON-Format, entweder als {{"Medizin":"True"}} oder {{"Medizin":"False"}}.
Du gibst eine kurze Erklärung (nicht mehr als 30 wörter) für deine Klassifizierung. 
Enthält der folgende Text medizinische Worte?
Text: <{input_text}>
"""
        num_tokens = calculate_prompt_tokens(detailed_prompt, model)  
         
        if num_tokens > MAX_TOKENS:
            output_data = {
                'id': input_id,
                'Antwort': 'token-Länge überschreitet',
                'Token-Anzahl': num_tokens
            }
            output_file.write(json.dumps(output_data, ensure_ascii=False) + '\n')
        else:
            response = client.chat.completions.create(
                model=model,
                response_format={"type": "json_object"},
                messages=[
                    {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
                    {'role': 'user', 'content': detailed_prompt}
                ]
            )
            chat_response = response.choices[0].message.content
            parsed_json = json.loads(chat_response)
            ordered_json = OrderedDict([('id', input_id)] + list(parsed_json.items()))
            output_file.write(json.dumps(ordered_json, ensure_ascii=False) + '\n')
        
print('Response saved to output.jsonl')

Response saved to output.jsonl


In [None]:
########### Medical Text Extraction with tiktoken Prompt-2(examples from internet)################

import os
import json
import tiktoken
from openai import OpenAI
from collections import OrderedDict

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

input_file_path = '/home/IAIS/jdatta/kenlm_training/test_dataset.jsonl'
output_file_path = '/home/IAIS/jdatta/kenlm_training/output_4.jsonl'

MAX_TOKENS = 14000
model = "gpt-3.5-turbo-0125"

def calculate_prompt_tokens(detailed_prompt, model="gpt-3.5-turbo-0125"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(detailed_prompt))

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    for line in input_file:
        input_data = json.loads(line)
        input_text = input_data['text']
        #input_id = input_data['id']   
        
        detailed_prompt = f"""
Du bist ein Textklassifizierungsassistent, 
der Texte in medizinische und nicht-medizinische Texte klassifiziert.
 
Beispiel für einen medizinischen Text:
Text: "Typ-2-Diabetes mellitus ist eine chronische Erkrankung, 
die den Glukosestoffwechsel betrifft. Symptome umfassen erhöhten Durst, 
häufiges Urinieren und unerklärlichen Gewichtsverlust."
 
Beispiel für einen nicht-medizinischen Text:
Text: "Die Geschichte von Rom ist faszinierend und umfasst mehrere Jahrhunderte, 
von der legendären Gründung durch Romulus und Remus bis zum Fall des Römischen Reiches."
 
Deine Aufgabe ist es, die folgenden Aktionen durchzuführen:
1 - Klassifiziere, ob der Text ein medizinischer Text ist oder nicht.
Wenn der Text medizinische Keywörter oder Fachbegriffe enthält, Gesundheitszustände, 
Behandlungen oder Praktiken im Gesundheitswesen behandelt, 
2 - Erkläre, warum der Text medizinisch oder nicht-medizinisch ist, 
in dem du den Text mit höchstens 30 Wörtern zusammenfasst
 
Gewünschtes Format:
Medizin: <True or False>
Erklärung: <zusammenfassen>
label: <medical or non-medical>
 
Text: ```{input_text}```
"""
        num_tokens = calculate_prompt_tokens(detailed_prompt, model)  
         
        if num_tokens > MAX_TOKENS:
            output_data = {
                #'id': input_id,
                'Antwort': 'token-Länge überschreitet',
                'Token-Anzahl': num_tokens
            }
            output_file.write(json.dumps(output_data, ensure_ascii=False) + '\n')
        else:
            response = client.chat.completions.create(
                model=model,
                temperature=0.7,
                response_format={"type": "json_object"},
                messages=[
                    {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
                    {'role': 'user', 'content': detailed_prompt}
                ]
            )
            chat_response = response.choices[0].message.content
            parsed_json = json.loads(chat_response)
            ordered_json = OrderedDict(list(parsed_json.items()))
            #ordered_json = OrderedDict([('id', input_id)] + list(parsed_json.items()))
            output_file.write(json.dumps(ordered_json, ensure_ascii=False) + '\n')
        
print('Response saved to output_4.jsonl')

Response saved to output_4.jsonl


In [2]:
import json

def load_data(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        line_number = 0
        for line in file:
            line_number += 1
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line {line_number}: {line.strip()}")
                print(f"Error message: {e}")
    return data

true_data = load_data('/home/IAIS/jdatta/kenlm_training/test_dataset.jsonl')
predicted_data = load_data('/home/IAIS/jdatta/kenlm_training/output_5.jsonl')


In [6]:
import json

def load_data(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        line_number = 0
        for line in file:
            line_number += 1
            try:
                entry = json.loads(line)
                if 'label' not in entry:
                    print(f"Missing 'label' key at line {line_number}: {line.strip()}")
                else:
                    data.append(entry)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line {line_number}: {line.strip()}")
                print(f"Error message: {e}")
    return data

true_data = load_data('/home/IAIS/jdatta/kenlm_training/test_dataset.jsonl')

Missing 'label' key at line 15: {"text": "Anlauf- und Beratungsstelle f\u00fcr Menschen mit Drogenkonsum sowie f\u00fcr Menschen in prek\u00e4ren Lebenssituationen und Krisen.\nCafebetrieb, Mittagessen, Schutzraum, Waschm\u00f6glichkeit, Spritzentausch, Tagesstrukturierende Angebote/ Freizeitaktionen, \u00e4rztliche Ordination, HIV und HCV - Testungen, Klientenkonto, Meldeadresse, Wohnbegleitung, allgemeine sozialarbeiterische Unterst\u00fctzung und Betreuung, Online Beratung,\u2026..\nTr\u00e4ger der Einrichtung ist der Verein Do it yourself - Hilfe zur Selbsthilfe, Information und Aufkl\u00e4rung in Drogenangelegenheiten. Betroffenenvertretung im Vereinsvorstand.\nBeratung: Mo, Di, Mi, Fr von 09:00 bis 16:00Uhr und Do. von 12:00 bis 16:00Uhr Cafebetrieb: Mo, Di, Mi, Fr von 10:00 bis 14:00Uhr und Do. von 12:00 bis 16:00Uhr \u00e4rztliche Ordination: 13:00 bis 16:00Uhr\nAbo im Wert von CHF 1'300.00 f\u00fcr nur CHF 1'200.00. Das Wertabo muss im voraus bezahlt werden. (Details siehe Kun

In [6]:
import json
from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score

def load_data(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line))
    return data

true_data = load_data('/home/IAIS/jdatta/kenlm_training/test_dataset.jsonl')
predicted_data = load_data('/home/IAIS/jdatta/kenlm_training/output_4.jsonl')

true_labels = [entry['label'] for entry in true_data] 
predictions = [entry['label'] for entry in predicted_data]

precision = precision_score(true_labels, predictions, pos_label='medical')
recall = recall_score(true_labels, predictions, pos_label='medical')
f1 = f1_score(true_labels, predictions, pos_label='medical')

conf_matrix = confusion_matrix(true_labels, predictions)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)

Precision: 1.00
Recall: 0.84
F1-Score: 0.91
Confusion Matrix:
[[43  8]
 [ 0 49]]


In [23]:
import pandas as pd
from tabulate import tabulate

labels = ['medical', 'non-medical']
conf_matrix_df = pd.DataFrame(conf_matrix, index=labels, columns=labels)

print(tabulate(conf_matrix_df, headers='keys', tablefmt='psql'))

conf_matrix_df.to_csv('confusion_matrix.csv', index=True)

+-------------+-----------+---------------+
|             |   medical |   non-medical |
|-------------+-----------+---------------|
| medical     |        45 |             6 |
| non-medical |         0 |            49 |
+-------------+-----------+---------------+


In [None]:
########### Medical Text Extraction with tiktoken Prompt-3(examples from Oscar)################

import os
import json
import tiktoken
from openai import OpenAI
from collections import OrderedDict

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

input_file_path = '/home/IAIS/jdatta/kenlm_training/1000documents.jsonl'
output_file_path = '/home/IAIS/jdatta/kenlm_training/output_1.jsonl'

MAX_TOKENS = 15000
model = "gpt-3.5-turbo-0125"

def calculate_prompt_tokens(detailed_prompt, model="gpt-3.5-turbo-0125"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(detailed_prompt))

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    for line in input_file:
        input_data = json.loads(line)
        input_text = input_data['content']
        #input_id = input_data['id']   
        
        detailed_prompt = f"""
Du bist ein Textklassifizierungsassistent, 
der Texte in medizinische und nicht-medizinische Texte klassifiziert. 

Beispiel für einen medizinischen Text:
Text: "Ergänzende Krebstherapie: Behandlung mit bioaktiven Bestandteilen aus dem Blut des Crocodylus Siamensis (Krokodilblut) aus Thailand.
\nZahlreiche laufende internationale Studien- und Forschungsprojekte vieler Universitäten zeigen, dass Krokodilblut ganz spezielle Proteine (Eiweiße) enthält. Diese zeigen eine starke antioxidative Wirkung speziell auf Tumorzellen."

Beispiel für einen nicht-medizinischen Text:
Text: "Auch nach 47 Jahren werden Sie sich als Gast bei uns stets daheim fühlen. 
Dazu trägt unser gepflegtes Ambiente und die Herzlichkeit des ganzen\nTeams, sowie die offene Art der übrigen Besucher bei. 
Für Entspannung suchende Männer ist die „Clubsauna Amsterdam“ die allererste Adresse."

Deine Aufgabe ist es, die folgenden Aktionen durchzuführen:
1 - Klassifiziere, ob der Text ein medizinischer Text ist oder nicht.
Wenn der Text medizinische Keywörter oder Fachbegriffe enthält, Gesundheitszustände, 
Behandlungen oder Praktiken im Gesundheitswesen behandelt, 
handelt es sich um einen medizinischen Text, ansonsten ist es ein  nicht-medizinischer Text.
2 - Erkläre, warum der Text medizinisch oder nicht-medizinisch ist, 
in dem du den Text mit höchstens 30 Wörtern zusammenfasst

Gewünschtes Format:
Medizin: <True or False>
Erklärung: <zusammenfassen>
label: <medical or non-medical>
 
Text: ```{input_text}```
"""
        num_tokens = calculate_prompt_tokens(detailed_prompt, model)  
         
        if num_tokens > MAX_TOKENS:
            output_data = {
                #'id': input_id,
                'Antwort': 'token-Länge überschreitet',
                'Token-Anzahl': num_tokens
            }
            output_file.write(json.dumps(output_data, ensure_ascii=False) + '\n')
        else:
            response = client.chat.completions.create(
                model=model,
                temperature=0.7,
                response_format={"type": "json_object"},
                messages=[
                    {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
                    {'role': 'user', 'content': detailed_prompt}
                ]
            )
            chat_response = response.choices[0].message.content
            parsed_json = json.loads(chat_response)
            ordered_json = OrderedDict(list(parsed_json.items()))
            #ordered_json = OrderedDict([('id', input_id)] + list(parsed_json.items()))
            output_file.write(json.dumps(ordered_json, ensure_ascii=False) + '\n')
        
print('Response saved to output_1.jsonl')

KeyboardInterrupt: 

In [None]:
########### Medical Text Extraction with tiktoken Prompt-4 ################

import os
import json
import tiktoken
from openai import OpenAI
from collections import OrderedDict

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

input_file_path = '/home/IAIS/jdatta/kenlm_training/50_oscar_de_med.jsonl'
output_file_path = '/home/IAIS/jdatta/kenlm_training/output_4.jsonl'

MAX_TOKENS = 15000
model = "gpt-3.5-turbo-0125"

def calculate_prompt_tokens(detailed_prompt, model="gpt-3.5-turbo-0125"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(detailed_prompt))

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    for line in input_file:
        input_data = json.loads(line)
        input_text = input_data['text']
       # input_id = input_data['id']   
        
        detailed_prompt = f"""
Du bist ein Textklassifizierungsassistent, 
der Texte in medizinische und nicht-medizinische Texte klassifiziert. 

Deine Aufgabe ist es, die folgenden Aktionen durchzuführen:
1 - Klassifiziere, ob der Text ein medizinischer Text ist oder nicht.
Wenn der Text medizinische Keywörter oder Fachbegriffe enthält, Gesundheitszustände, 
Behandlungen oder Praktiken im Gesundheitswesen behandelt, 
handelt es sich um einen medizinischen Text, ansonsten ist es ein  nicht-medizinischer Text.
2 - Erkläre, warum der Text medizinisch oder nicht-medizinisch ist, 
in dem du den Text mit höchstens 30 Wörtern zusammenfasst

Gewünschtes Format:
Medizin: <True or False>
Erklärung: <zusammenfassen>
 
Text: ```{input_text}```
"""
        num_tokens = calculate_prompt_tokens(detailed_prompt, model)  
         
        if num_tokens > MAX_TOKENS:
            output_data = {
               # 'id': input_id,
                'Antwort': 'token-Länge überschreitet',
                'Token-Anzahl': num_tokens
            }
            output_file.write(json.dumps(output_data, ensure_ascii=False) + '\n')
        else:
            response = client.chat.completions.create(
                model=model,
                temperature=0.7,
                response_format={"type": "json_object"},
                messages=[
                    {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
                    {'role': 'user', 'content': detailed_prompt}
                ]
            )
            chat_response = response.choices[0].message.content
            parsed_json = json.loads(chat_response)
            ordered_json = OrderedDict(list(parsed_json.items()))
            #ordered_json = OrderedDict([('id', input_id)] + list(parsed_json.items()))
            output_file.write(json.dumps(ordered_json, ensure_ascii=False) + '\n')
        
print('Response saved to output_4.jsonl')

Response saved to output_4.jsonl


In [6]:
import json
import tiktoken

def num_tokens_from_jsonl_file(file_path: str, encoding_name: str) -> int:
    """Returns the total number of tokens in all text strings within a JSONL document."""
    encoding = tiktoken.get_encoding(encoding_name)
    total_num_tokens = 0
    
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            json_obj = json.loads(line)
            if 'content' in json_obj and isinstance(json_obj['content'], str):
                num_tokens = len(encoding.encode(json_obj['content']))
                total_num_tokens += num_tokens
                
    return total_num_tokens

# Example usage
file_path = "/home/IAIS/jdatta/kenlm_training/1000documents.jsonl"
encoding_name = "cl100k_base"
total_tokens = num_tokens_from_jsonl_file(file_path, encoding_name)
print(f"Total tokens in the JSONL document: {total_tokens}")

Total tokens in the JSONL document: 2334685


In [2]:
import tiktoken

# Assuming you're using the tiktoken library
def calculate_static_part_tokens(static_text, model="gpt-3.5-turbo-0125"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(static_text))

static_text = """{"hadm_id":25487364,"radiology_text":"INDICATION:  Nausea and abdominal pain.\n\nCOMPARISON:  Chest radiograph from ___.\n\nFINDINGS:  The lungs are clear with no evidence of a consolidation, effusion,\nor pneumothorax.  Cardiomediastinal silhouette is normal.  No acute fractures\nare identified.  No free air is noted under the hemidiaphragms.\n\nIMPRESSION:  No acute cardiopulmonary process.\n", "discharge_instructions":"You came to the hospital because you had abdominal pain. The \npancreas specialists saw you and assessed you and felt that your \npain was not related to your pancreas, that you have some belly \nspasm. We treated your pain with pain medications. The ERCP \ndoctors recommended ___ some new mediations which may help \nyour pain.\n\nplease START citrucel 1 tab at night with ___ glasses of water \n(you can get this over the counter)\nplease START Alish probiotic daily (you can get this over the \ncounter)\nplease STARTLevsin .12mg q12H PRN spasm\n\nPlease follow up with your GI doctor.\n\nHappy first birthday to your son!","brief_hospital_course":"___ year old woman w/ h/o sphincter of Oddi dysfunction s/p total \nof 4 ERCPs w/ sphincterotomy presents w/ abdominal pain.\n\n#Abdominal Pain: Pt has chronic abdominal pain and has previous \ndx of sphincter of odi dysfunction requring ERCP \nsphincterotomies. She has felt relief of pain after her last \nERCP but three weeks she had pain again. Her LFTs, lipase were \nwnl and RUQ u/s showed common bile duct dilation which she has \nhad on prior imaging. Her lipase and LFTs are all wnl which is \nreassuring. ERCP was consulted and they felt this was abdominal \nspasm and not related to her pancreas because of normal labs. \nShe was kept NPO and given IVF for 24 hrs and we managed her \npain with dilaudid 2mg q4H prn\nand tylenol prn. She was also given miralax for bowel regimen \nwhile on narcotics. She was then switched to a normal diet and \npatient was able to take in PO well. She will follow up with her \npancreas doctors. It weas recommedned pt try Hyoscyamine 0.125 \nmg PO Q12H PRN for abdominal spasm.\n\n#Depression/anxiety:\n-continued escitalopram\n-continued lorazepam .5 mg BIDprn\n\n#Tobacco:\n- gave nicotine patch"}

"""
static_part_tokens = calculate_static_part_tokens(static_text)

# Assuming 10,000 documents
total_static_tokens = static_part_tokens * 1000
total_static_tokens

577000

In [2]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string('"Medizin": false, "Erklärung": "Der Text beschreibt die berufliche Laufbahn und Interessen von Achim Schröer sowie Veranstaltungsinformationen, ohne medizinische Inhalte.", "label": "non-medical"', "cl100k_base")

57

In [10]:
import json

with open('/home/IAIS/jdatta/kenlm_training/medical_texts.jsonl', 'r') as file:
    lines = file.readlines()

filtered_lines = []

for line in lines:
    data = json.loads(line)
    filtered_line = {'content': data['content']}
    filtered_lines.append(json.dumps(filtered_line))

with open('/home/IAIS/jdatta/kenlm_training/filtered_data.jsonl', 'w') as file:
    for line in filtered_lines:
        file.write(line + '\n')


In [None]:
#################### API COST #####################

import json
import os
from openai import OpenAI
import nltk
from nltk.tokenize import word_tokenize

nltk.download('punkt')

INPUT_PRICE_PER_1M_TOKENS = 10.00  # $0.0005 per 1K tokens
OUTPUT_PRICE_PER_1M_TOKENS = 30.00  # $0.0015 per 1K tokens

os.environ["OPENAI_API_KEY"] = ""
client = OpenAI()

input_file_path = '/home/IAIS/jdatta/kenlm_training/text.jsonl' 
output_file_path = '/home/IAIS/jdatta/kenlm_training/output2.json' 

def calculate_cost(tokens, price_per_1M_tokens):
    return tokens / 1000000 * price_per_1M_tokens

total_cost_across_calls = 0 

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    output_data = {}
    
    for line_number, line in enumerate(input_file, start=1):
        input_data = json.loads(line)
        input_text = input_data['text']
        input_id = input_data['id']
        
        detailed_prompt = f"""
        Klassifizieren Sie den folgenden Text entweder als "medizinisch" oder "nicht-medizinisch" und 
        geben Sie eine kurze Erklärung (nicht mehr als 30 Wörter) für Ihre Klassifizierung. 
        Formatieren Sie Ihre Antwort als JSON-Objekt mit zwei Schlüsseln: "Antwort" und "Erklärung".
        Text: "{input_text}"
        """
        
        prompt_tokens = len(word_tokenize(detailed_prompt))
        
        response = client.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            response_format={"type": "json_object"},
            messages=[
                {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
                {'role': 'user', 'content': detailed_prompt}
            ]
        )
        
        chat_response = json.loads(response.choices[0].message.content)
        output_text = chat_response['Antwort'] + " " + chat_response['Erklärung']
        output_tokens = len(word_tokenize(output_text))
        
        input_cost = calculate_cost(prompt_tokens, INPUT_PRICE_PER_1M_TOKENS)
        output_cost = calculate_cost(output_tokens, OUTPUT_PRICE_PER_1M_TOKENS)
        api_cost = input_cost + output_cost
        total_cost_across_calls += api_cost
        
        output_data[f'call - {line_number}'] = {
            'id': input_id,
            'prompt_tokens': prompt_tokens,
            'output_tokens': output_tokens,
            'api_cost': api_cost,
            **chat_response  # Merging the chat_response dictionary
        }
        
    output_data['total_cost'] = total_cost_across_calls
    
    json.dump(output_data, output_file, ensure_ascii=False, indent=4)
      
print(f'Results and total cost saved to {output_file_path}')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/IAIS/jdatta/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Results and total cost saved to /home/IAIS/jdatta/kenlm_training/output2.json


In [1]:
detailed_prompt = f"""
Du bist ein Textklassifizierungsassistent, 
der Texte in medizinische und nicht-medizinische Texte klassifiziert. 

Beispiel für einen medizinischen Text:
Text: "Ergänzende Krebstherapie: Behandlung mit bioaktiven Bestandteilen aus dem Blut des Crocodylus Siamensis (Krokodilblut) aus Thailand.
\nZahlreiche laufende internationale Studien- und Forschungsprojekte vieler Universitäten zeigen, dass Krokodilblut ganz spezielle Proteine (Eiweiße) enthält. Diese zeigen eine starke antioxidative Wirkung speziell auf Tumorzellen."

Beispiel für einen nicht-medizinischen Text:
Text: "Auch nach 47 Jahren werden Sie sich als Gast bei uns stets daheim fühlen. 
Dazu trägt unser gepflegtes Ambiente und die Herzlichkeit des ganzen\nTeams, sowie die offene Art der übrigen Besucher bei. 
Für Entspannung suchende Männer ist die „Clubsauna Amsterdam“ die allererste Adresse."

Deine Aufgabe ist es, die folgenden Aktionen durchzuführen:
1 - Klassifiziere, ob der Text ein medizinischer Text ist oder nicht.
Ein medizinischer Text ist ein schriftliches, gedrucktes oder digitales Dokument, das sich auf den Bereich der Medizin bezieht.
Dazu gehören Inhalte die Informationen über medizinische Bedingungen, Behandlungen, Forschung,
Gesundheitspolitik, medizinische Ethik, Patientenversorgung und medizinische Ausbildung. 
Medizinische Texte können von medizinischen Fachkräften, Forschern, Institutionen oder sogar
Patienten und Pflegepersonal verfasst werden und umfassen ein breites Spektrum an Formaten wie Bücher,
Forschungsartikel, Leitlinien, Fallberichte Lehrbücher und Beipackzettel
2 - Erkläre, warum der Text medizinisch oder nicht-medizinisch ist, 
in dem du den Text mit höchstens 30 Wörtern zusammenfasst

Gewünschtes Format:
Medizin: <True or False>
Erklärung: <zusammenfassen>
label: <medical or non-medical>
 
Text: ```{text}```
"""

NameError: name 'text' is not defined

In [None]:
from base64 import b64encode
from openai import OpenAI
import json

genai_username = "jdatta"
genai_password = ""

token_string = f"{genai_username}:{genai_password}"
token_bytes = b64encode(token_string.encode())

client = OpenAI(
    api_key="",
    default_headers={"Authorization": f"Basic {token_bytes.decode()}"},
    base_url=""
)

prompt_text = '''
You are a doctor with 30 years of experience in writing medical discharge letters. 
Your task is to generate synthetic discharge summaries in a MIMIC-like JSONL format, ensuring they resemble real-world clinical records.
Ensure to generate summaries for patients from diverse age groups, including children, young adults, middle-aged, and elderly patients. 
Ensure an equal distribution of male and female patients to maintain demographic balance.
Using the format and style from the example provided below, generate 50 synthetic discharge summary:

Example:
{"hadm_id":25487364,"radiology_text":"INDICATION:  Nausea and abdominal pain.\n\nCOMPARISON:  Chest radiograph from ___.\n\nFINDINGS:  The lungs are clear with no evidence of a consolidation, effusion,\nor pneumothorax.  Cardiomediastinal silhouette is normal.  No acute fractures\nare identified.  No free air is noted under the hemidiaphragms.\n\nIMPRESSION:  No acute cardiopulmonary process.\n", "discharge_instructions":"You came to the hospital because you had abdominal pain. The \npancreas specialists saw you and assessed you and felt that your \npain was not related to your pancreas, that you have some belly \nspasm. We treated your pain with pain medications. The ERCP \ndoctors recommended ___ some new mediations which may help \nyour pain.\n\nplease START citrucel 1 tab at night with ___ glasses of water \n(you can get this over the counter)\nplease START Alish probiotic daily (you can get this over the \ncounter)\nplease STARTLevsin .12mg q12H PRN spasm\n\nPlease follow up with your GI doctor.\n\nHappy first birthday to your son!","brief_hospital_course":"___ year old woman w/ h/o sphincter of Oddi dysfunction s/p total \nof 4 ERCPs w/ sphincterotomy presents w/ abdominal pain.\n\n#Abdominal Pain: Pt has chronic abdominal pain and has previous \ndx of sphincter of odi dysfunction requring ERCP \nsphincterotomies. She has felt relief of pain after her last \nERCP but three weeks she had pain again. Her LFTs, lipase were \nwnl and RUQ u/s showed common bile duct dilation which she has \nhad on prior imaging. Her lipase and LFTs are all wnl which is \nreassuring. ERCP was consulted and they felt this was abdominal \nspasm and not related to her pancreas because of normal labs. \nShe was kept NPO and given IVF for 24 hrs and we managed her \npain with dilaudid 2mg q4H prn\nand tylenol prn. She was also given miralax for bowel regimen \nwhile on narcotics. She was then switched to a normal diet and \npatient was able to take in PO well. She will follow up with her \npancreas doctors. It weas recommedned pt try Hyoscyamine 0.125 \nmg PO Q12H PRN for abdominal spasm.\n\n#Depression/anxiety:\n-continued escitalopram\n-continued lorazepam .5 mg BIDprn\n\n#Tobacco:\n- gave nicotine patch"}
Your output should be in JSONL format.
'''

response = client.chat.completions.create(
    model="Llama-3.3-70B-Instruct",
    messages = [{"role": "system", "content": "You are a helpful assistant designed to output JSON."}, 
                {"role": "user", "content": prompt_text}],
    #prompt=prompt_text,
    seed=11,
    max_tokens=2048,
    temperature=0.7,
    top_p=0.6,
    extra_headers={
        "X-Request-ID": "rating-00001",
    },
    extra_body={},
    stream=False
)

#print(completion.choices[0].text)

#response_text = completion.choices[0].text
chat_response = response.choices[0].message.content

lines = chat_response.strip().split("\n")

output_file_path = "/home/IAIS/jdatta/mimic_syntheticData/english/synthetic_discharge_summaries.jsonl"

with open(output_file_path, "w", encoding="utf-8") as f:
    for line in lines:
        line = line.strip()
        if not line:
            continue
        try:
            parsed_json = json.loads(line)
            f.write(json.dumps(parsed_json, ensure_ascii=False) + "\n")
        except json.JSONDecodeError:
            print(f"Skipping invalid JSON line: {line}")



InternalServerError: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
<hr><center>nginx</center>
</body>
</html>

In [None]:
from base64 import b64encode
from openai import OpenAI
import json

genai_username = "jdatta"
genai_password = ""

token_string = f"{genai_username}:{genai_password}"
token_bytes = b64encode(token_string.encode())

client = OpenAI(
    api_key="",
    default_headers={"Authorization": f"Basic {token_bytes.decode()}"},
    base_url=""
)

prompt_text = '''
You are a doctor with 30 years of experience in writing medical discharge letters. 
Your task is to generate synthetic discharge summaries in a MIMIC-like JSONL format, ensuring they resemble real-world clinical records.
Ensure to generate summaries for patients from diverse age groups, including children, young adults, middle-aged, and elderly patients. 
Ensure an equal distribution of male and female patients to maintain demographic balance.
Using the exact format and style from the example provided below, generate a synthetic discharge summary:

Example:
{"hadm_id":20000254,"radiology_text":"EXAMINATION:\nChest:  Frontal and lateral views\n\nINDICATION:  History: ___ with hx pancreatitis now with burning epigastric\npain radiating to back  // please assess for etiologies of abdominal pain\n\nTECHNIQUE:  Chest:  Frontal and Lateral\n\nCOMPARISON:  None.\n\nFINDINGS: \n\nThe lungs are clear without focal consolidation.  No pleural effusion or\npneumothorax is seen. The cardiac and mediastinal silhouettes are\nunremarkable.  No evidence of free air is seen beneath the diaphragm.\n\nIMPRESSION: \n\nNo acute cardiopulmonary process.\n\\BEGIN NEW REPORT:___\nINDICATION:  Evaluate for obstruction in a ___ woman with abdominal\npain and constipation.\n\nTECHNIQUE:  Frontal supine and upright abdominal radiographs were obtained.\n\nCOMPARISON:  CT abdomen/pelvis from ___.\n\nFINDINGS: \n\nThere are no abnormally dilated loops of large or small bowel.  A large fecal\nburden is noted.\nThere is no free intraperitoneal air.\nOsseous structures are unremarkable.\nThere are no unexplained soft tissue calcifications or radiopaque foreign\nbodies.  Surgical clips in the right upper quadrant are compatible with prior\ncholecystectomy.\n\nIMPRESSION: \n\nConstipation without obstruction.\n","discharge_instructions":"You were admitted to the hospital due to nausea, vomiting, \nabdominal pain, and difficulty defecating. As you know, there \nremains some uncertainty regarding the cause of your symptoms. \nWe have not changed the current outpatient work-up plans, \nincluding colonoscopy, anorectal manometry, and gastric emptying \nstudy. While you were here we treated you with high doses of \nlaxatives for constipation. While this did cause diarrhea, it \nalso appears to have helped somewhat with you pain, which may \nargue that the constipation is playing a role in your abdominal \npain. We would recommend continuing to adjust your medications \nfor constipation as needed to acheieve ___ bowel movements per \nday with a soft/loose consistency. Senna and bisacodyl seemed to \nbe the medications that you tolerated best.\n\nGastroenterology has also recommended the following treatment \nfor your hemorrhoids and rectal fissure:\n\n 1. 2 tsp mineral oil daily or colace stool softner\n 2. 0.2 % nifedipine cream +  lidocaine 3%.  Needs to be made at \n___ or compounding pharmacy (___- \n___- ___ \n___, ___)\n 3. ___ baths in very warm water BID\n 4. +/- 1% topical hydrocortisone \n\nThey have also recommended the option of starting amitriptyline \nwith your psychiatrist, which is an antidepressant that can also \nhelp with abdominal pain.\n\nWe recommend minimizing tramadol as this can worsen \nconstiaption. Tylenol may also be effective for your pain. Avoid \nNSAIDs such as ibuprofen or naproxen.\n\nWe held your blood pressure medication, amlodipine, since it has \nthe potential to worsen constipation. Your blood pressures were \nin a good range while you were here. You should re-address this \nissue with your primary doctor.","brief_hospital_course":"___ year old woman with history of pancreatitis of unknown \netiology in ___, HTN, GERD, who presents with \nprogressive abdominal pain and difficulty with defecation. She \nhas been followed by gastroenterology in the outpatient setting \nand had multiple studies planned for work-up given the uncertain \ncause of her symptoms, including anorectal manometry, gastric \nemptying study, and colonoscopy. When evaluated by the \ngastroenterology team, it was noted that she had large external \nhemorrhoids and there was concern for fissure. Therefore it was \npostulated that painful defecation could have led to worsening \nconstipation, which could be driving much of her abdominal pain. \nWith this in mind, her bowel regimen was uptitrated \naggressively, which was successful in causing numerous large \nloose bowel movements. While the patient endorsed some \nimprovement in her abdominal pain, she also found it very \ndistressing to have that degree of diarrhea (of note she has \nexperienced considerable anxiety surrounding her symptoms and \nwas frequently tearful during the admission). During the \nadmission she was also offered topical treatments for rectal \npain, but declined them, as she did not feel that hemorrhoids or \nfissure were the cause of her symptoms. \n\nGiven the continued uncertainty surrounding her symptoms, she \nwill still plan for the outpatient diagnostic work-up as \ndetailed above. In the mean time she plans to titrate the bowel \nmedications to acheieve ___ loose/soft BMs daily. She was also \nprescribed topical treatments for her hemorrhoids and fissure, \nwhich she will consider. \n\nGI also recommended considering amitriptyline for functional \nabdominal pain, which she will discuss with her mental health \nprovider. \n\nNotably her amlodipine was held in case this was worsening her \nconstipation. Her BPs were wnl in house but if they are elevated \nin the future then another class of agent may be considered. \n\nPatient seen and examined on day of discharge. She was stable \nand safe to return home. She will arrange for PCP ___. \n\n======================================================".}
Your output should be in JSONL format. Write different discharge letter every time I run the code.
Important: Do not use pretty-printing. Each JSON object should be in a single line.
'''

response = client.chat.completions.create(
    model="Llama-3.1-70B-Instruct",
    messages = [{"role": "system", "content": "You are a helpful assistant designed to output JSON."}, 
                {"role": "user", "content": prompt_text}],
    #prompt=prompt_text,
    max_tokens=3000,
    temperature=0.7,
    top_p=0.6,
    extra_headers={
        "X-Request-ID": "rating-00001",
    },
    extra_body={},
    stream=False
)

#print(completion.choices[0].text)

#response_text = completion.choices[0].text
chat_response = response.choices[0].message.content

lines = chat_response.strip().split("\n")

output_file_path = "/home/IAIS/jdatta/mimic_syntheticData/english/synthetic_discharge_summaries_1.jsonl"

with open(output_file_path, "w", encoding="utf-8") as f:
    for line in lines:
        line = line.strip()
        if not line:
            continue
        try:
            parsed_json = json.loads(line)
            f.write(json.dumps(parsed_json, ensure_ascii=False) + "\n")
        except json.JSONDecodeError:
            print(f"Skipping invalid JSON line: {line}")



In [None]:
from base64 import b64encode
from openai import OpenAI
import json
import time  # optional: for spacing out requests

genai_username = "jdatta"
genai_password = ""

token_string = f"{genai_username}:{genai_password}"
token_bytes = b64encode(token_string.encode())

client = OpenAI(
    api_key="",
    default_headers={"Authorization": f"Basic {token_bytes.decode()}"},
    base_url=""
)

base_prompt = '''
You are a doctor with 30 years of experience in writing medical discharge letters. 
Your task is to generate synthetic discharge summaries in a MIMIC-like JSONL format, ensuring they resemble real-world clinical records.
Ensure to generate summaries for patients from diverse age groups, including children, young adults, middle-aged, and elderly patients. 
Ensure an equal distribution of male and female patients to maintain demographic balance.
Using the exact format and style from the example provided below, generate a synthetic discharge summary:

Example:
{"hadm_id":20000750,"radiology_text":"INDICATION:  Postoperative views of the cervical spine\n\nCOMPARISON:  Prior from ___\n\nFINDINGS: \n\nLateral views of the cervical spine provided.  There is both anterior and\nposterior spinal fusion which appears to involve C3, C4, C5 anteriorly with\nanterior plate, vertebral body screws and disc spacers spanning the segment. \nPosteriorly fusion rods and screws are seen extending from C3, C4, C5, C6, C7\nthrough T1.  The alignment from C1 through C4 is preserved.  Inferior to this\nlevel, lima cannot be reliably assessed.\n\nIMPRESSION: \n\nAs above.\n","discharge_instructions":"Weigh yourself every morning, call MD if weight goes up more \nthan 3 lbs.\n\n \n\nPosterior Cervical Fusion\n\nYou have undergone the following operation: Posterior Cervical \nDecompression and Fusion\n\nImmediately after the operation:\n\n                Activity:You should not lift anything greater \nthan 10 lbs for 2 weeks.You will be more comfortable if you do \nnot sit in a car or chair for more than~45 minutes without \ngetting up and walking around.\n\n                Rehabilitation/ Physical ___ times a \nday you should go for a walk for ___ minutes as part of your \nrecovery.You can walk as much as you can tolerate.Limit any kind \nof lifting. \n\n                Cervical Collar / Neck Brace:You need to wear \nthe brace at all times until your follow-up appointment which \nshould be in 2 weeks.You may remove the collar to take a \nshower.Limit your motion of your neck while the collar is \noff.Place the collar back on your neck immediately after the \nshower.\n\n                Wound Care:Remove the dressing in 2 days.If the \nincision is draining cover it with a new sterile dressing.If it \nis dry then you can leave the incision open to the air.Once the \nincision is completely dry (usually ___ days after the \noperation) you may take a shower.Do not soak the incision in a \nbath or pool.If the incision starts draining at anytime after \nsurgery,do not get the incision wet.Call the office at that \ntime.If you have an incision on your hip please follow the same \ninstructions in terms of wound care.\n\n                You should resume taking your normal home \nmedications\n\n                You have also been given Additional Medications \nto control your pain.Please allow 72 hours for refill of \nnarcotic prescriptions, so please plan ahead.You can either have \nthem mailed to your home or pick them up at the clinic located \nin ___ office.We are not allowed to call in narcotic \nprescriptions (oxycontin,oxycodone,percocet) to the pharmacy.In \naddition,we are only allowed to write for pain medications for \n90 days from the date of surgery.\n\n                Follow up:\n\n___                Please Call the office and make an \nappointment for 2 weeks after the day of your operation if this \nhas not been done already.\n\n___                At the 2-week visit we will check your \nincision,take baseline x rays and answer any questions.\n\n \n\nPlease call the office if you have a fever>101.5 degrees \nFahrenheit,drainage from your wound,or have any questions.\nPhysical Therapy:\nAmbulate as tolerated- soft collar for comfort\nTreatments Frequency:\nincision clean and dry\nStaples in place","brief_hospital_course":"Patient was admitted to Orthopedic Spine Service on ___ for \nfurther management.  He was preoped for possible I&D.  \nCOnsequently, on ___ he underwent the above stated \nprocedure.  Please review dictated operative report for details. \nPatient was extubated without incident and was transferred to \nPACU then floor in stable condition.  \n\n \n\nDuring the patient's course ___ were used for \npostoperative DVT prophylaxis.  Intravenous antibiotics were \ncontinued for 24hrs postop per standard protocol. Initial postop \npain was controlled with oral and IV pain medication. Diet was \nadvanced as tolerated. Hospital course was otherwise \nunremarkable.  He was started on Keflex for 10 days.  His hand \nweakness remained unchanged. \n\n \n\nNow, Day of Discharge, patient is afebrile, VSS, and neuro \nintact with improvement of radiculopathy. Patient tolerated a \ngood oral diet and pain was controlled on oral pain medications. \nPatient ambulated independently. Patient's wound is clean, dry \nand intact. Patient noted improvement in radicular pain. Patient \nis set for discharge to home in stable condition."}
Your output should be in JSONL format. Write different discharge letter every time I run the code.
Important: Do not use pretty-printing. Each JSON object should be in a single line.
'''

for i in range(1, 101):  # 1 to 100
    prompt_text = base_prompt + f"\n\n[SESSION_RUN: {i}]"

    response = client.chat.completions.create(
        model="Llama-3.1-70B-Instruct",
        messages=[
            {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
            {"role": "user", "content": prompt_text}
        ],
        max_tokens=3000,
        temperature=0.7,
        top_p=0.6,
        extra_headers={"X-Request-ID": f"discharge-{i:05}"},
        stream=False
    )

    chat_response = response.choices[0].message.content
    lines = chat_response.strip().split("\n")

    output_file_path = f"/home/IAIS/jdatta/mimic_syntheticData/english/synthetic_discharge_summaries_{i}.jsonl"

    with open(output_file_path, "w", encoding="utf-8") as f:
        for line in lines:
            line = line.strip()
            if not line:
                continue
            try:
                parsed_json = json.loads(line)
                f.write(json.dumps(parsed_json, ensure_ascii=False) + "\n")
            except json.JSONDecodeError:
                print(f"[Run {i}] Skipping invalid JSON line: {line}")

    print(f"[✓] Generated: {output_file_path}")
    # Optional: Avoid hitting API limits too quickly
    # time.sleep(1)


[✓] Generated: /home/IAIS/jdatta/mimic_syntheticData/english/synthetic_discharge_summaries_1.jsonl
[Run 2] Skipping invalid JSON line: {"hadm_id":20000751,"radiology_text":"INDICATION:  Chest X-ray to evaluate for pneumonia
[Run 2] Skipping invalid JSON line: COMPARISON:  Prior from 2022
[Run 2] Skipping invalid JSON line: FINDINGS:
[Run 2] Skipping invalid JSON line: The lungs are clear of infiltrates or effusions. The cardiac silhouette is normal. The mediastinum is normal.
[Run 2] Skipping invalid JSON line: IMPRESSION:
[Run 2] Skipping invalid JSON line: No evidence of pneumonia.","discharge_instructions":"Take your medications as directed by your doctor.
[Run 2] Skipping invalid JSON line: Pneumonia
[Run 2] Skipping invalid JSON line: You have been treated for pneumonia. To help you recover:
[Run 2] Skipping invalid JSON line:                 Activity:You should rest and avoid strenuous activities for 1-2 weeks.
[Run 2] Skipping invalid JSON line:                 Rehabilitation/

InternalServerError: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
<hr><center>nginx</center>
</body>
</html>

In [None]:
from base64 import b64encode
from openai import OpenAI
import json
import time  # optional: for spacing out requests

genai_username = "jdatta"
genai_password = ""

token_string = f"{genai_username}:{genai_password}"
token_bytes = b64encode(token_string.encode())

client = OpenAI(
    api_key="",
    default_headers={"Authorization": f"Basic {token_bytes.decode()}"},
    base_url=""
)

base_prompt = '''
You are a doctor with 30 years of experience in writing medical discharge letters. 
Your task is to generate synthetic discharge summaries in a MIMIC-like JSONL format, ensuring they resemble real-world clinical records.
Ensure to generate summaries for patients from diverse age groups, including children, young adults, middle-aged, and elderly patients. 
Ensure an equal distribution of male and female patients to maintain demographic balance.
Following the format from the example provided below, generate a synthetic discharge summary:

Example:
{"hadm_id": 21070242, "radiology_text": "INDICATION:  This year old female with fevers and 50.\n\nTECHNIQUE:  Chest PA and lateral\n\nCOMPARISON:  None available.\n\nFINDINGS: \n\nPA and lateral chest radiograph demonstrates clear lungs bilaterally. No focal\nconsolidation convincing for pneumonia is identified. Cardiomediastinal and\nhilar contours are within normal limits. There is no pleural effusion or\npneumothorax. Osseous structures are without an acute abnormality.\n\nIMPRESSION: \n\nNo focal opacity convincing for pneumonia.\n\\BEGIN NEW REPORT:___\nEXAMINATION:  LIVER OR GALLBLADDER US (SINGLE ORGAN)\n\nINDICATION:  ___ with elevated LFTs and fevers\n\nTECHNIQUE:  Grey scale and color Doppler ultrasound images of the abdomen were\nobtained.\n\nCOMPARISON:  None available.\n\nFINDINGS: \n\nLIVER: The hepatic parenchyma appears within normal limits.The contour of the\nliver is smooth. There is no focal liver mass.  Main portal vein is patent\nwith hepatopetal flow. There is no ascites.\n\nBILE DUCTS: There is no intrahepatic biliary dilation. The CBD measures 2 mm.\n\nGALLBLADDER: There is no evidence of stones or gallbladder wall thickening.\nUnremarkable appearance of the liver, no biliary ductal dilatation.\n\nPANCREAS: Imaged portion of the pancreas appears within normal limits, without\nmasses or pancreatic ductal dilation, with portions of the pancreatic tail\nobscured by overlying bowel gas.\n\nSPLEEN:  The spleen appears heterogeneously echogenic which may reflect\nchronic changes in this patient with sickle cell disease. The spleen measures\n7.5 cm in dimension.\n\n\nIMPRESSION: \n\n1.  No acute intra-abdominal process.\n2.  Heterogeneously echogenic spleen may reflect chronic changes in setting of\nsickle cell disease and prior infarcts.\n\nNOTIFICATION:  Updated wet read paged to Dr. ___.\n", "discharge_instructions": "You presented to the hospital with post-op fever.  Initially \nthere was concern for infection, however, all your infectious \nwork-up was NEGATIVE and the Infectious Disease doctors ___ \nthat ___ was unlikely.  After discussion with the Blood \nBank and the Hematology consult team, your fever was felt to be \nlikely due to a delayed tranfusion reaction to blood you \nreceived during your recent hip replacement surgery.  Because of \nthe transfusion reaction, you had worsening anemia / sickle \ncrisis, which responded well to supportive treatment.  We \nrecommend that you take an increased dose of folic acid till \nfollo-wup with your PCP or ___.  We also recommend that \nyou have your family members have their blood screened to be \npotential donors for you in the future.  You can contact the \n___ Cross at ___.\n.\nPlease take your medications as directed.\n.\nPlease see your physicians as listed.", "brief_hospital_course": "___ yo F with sickle cell disease who presents with 3 weeks of \nfevers and chills s/p R hip replacement, initially thought to be \ninfectious in nature, ultimately felt to be due to delayed \ntransfusion reaction, further complicated by mild sickle cell \ncrisis.  \n\n# Fever, likely delayed transfusion reaction\nInitially given pt's fever and leukocytosis, there was high \nconcern for underlying infection.  Given elevated LFT's, RUQ US \nwas obtained in the ED, however, no evidence of cholangitis.  \nPatient also had clear CXR without evidence of PNA.  Her right \nhip replacement wound did not appear infected and she had \nexcellent mobility and ROM, making post-op wound infection or \nseptic prosthetic joint highly unlikely.  Given h/o of murmur, \nshe underwent TTE, but there was no obvious vegetation.  She was \ninitially placed on IV Ceftriaxone but after being seen by the \nInfectious Disease consult, this was stopped and she remained \nafebrile for more than 72 hours after stopping abx.  Given \nelevated LFT's, tick-borne illness was considered, but parasite \nsmear and Anaplasma serologies all NEGATIVE.  HIV Ab and VL were \nNEGATIVE, as were CMV VL, HCV VL and HSV Ab's.  Her CMV IgG was \npositive, but CMV IgM was NEGATIVE.  Flu swab was NEGATIVE and \nall blood cultures drawn showed NGTD.  Overall infection was \nfelt to be unlikely.  The most likely etiology of her fever and \nleukocytosis was a delayed transfusion reaction, given that she \nhad received 2 units PRBC pre-operatively and she has history of \nmany allo-antibodies.  She was seen by Hematology Consult and \nBlood Bank also reviewed her blood sample, and it was felt that \nher overall picture was highly consistent with delayed \ntransfusion reaction.  As noted above, she defervesced without \nfurther treatment or intervention and remained clinically stable \nand asymptomatic.  \n\n# Sickle Cell Disease, acute crisis\nPatient initially presented with baseline Hct, however, her Hct \nquickly dropped to approximately 19, which is below her \nbaseline.  Of note, she also had elevated LFT's, including T. \nbili, and elevated LDH, low haptoglobin and high retic #, \noverall picture consistent with a sickle crisis.  The sickle \ncrisis was likely triggered by her delayed transfusion reaction. \n She also had some bony pain in her knees and ankle.  She did \nnot have cough or chest pain.  She was placed on high-dose folic \nacid and O2 nasal cannula.  She initially complained of some \nfatigue during her ___ sessions, but her symptoms eventually \nresolved as her Hct stabilized above 20.  Hematology had \ninitially recommended transfusing PRBC's for goal Hct >20, \nhowever, due to presence of many alloantibodies, suitable PRBC's \ncould not be located.  As such patient did not receive any \nPRBC's during this hospitalization.  She is being discharged on \nhigh-dose folic acid and will continue until f/u with \nHematology.  Both Blood Bank and Hematology recommended that the \npatient have her closest family members screened by ___ \nCross for potential matching blood donor.  Patient will have her \nchildren tested to be donors (typically would ask for closely \nlikely match with pt's siblings, however, all of pt's siblings \nare only half-siblings).  Lastly, pt felt that supplemental O2 \nwas helpful in overcoming her sickle crisis, as such, inpatient \nHematology will coordinate with outpatient Hematology to try to \nobtain home O2 to be used PRN.\n\n# s/p R hip replacement\nPatient's hip showed excellent ROM, and as noted above, low \nconcern for septic joint.  She was able to ambulate and was seen \nby the ___ consult service to continue ___ during her \nhospitalization.  She was placed on heparin SQ for DVT \nprophylaxis.  She will continue with home ___ on discharge and \nwill f/u with her outpatient orthopedist as previously \nscheduled.  Per patient, her orthopedist did not recommend more \nthan 2 weeks of post-op DVT prophylaxis and since her surgery \nwas approximately 1 month ago, she is being discharged WITHOUT \nany further DVT prophylaxis.\n."}
Your output should be in JSONL format.
Important: Do not use pretty-printing. Each JSON object should be in a single line.
'''

for i in range(1, 101):  # 1 to 100
    prompt_text = base_prompt + f"\n\n[SESSION_RUN: {i}]"

    response = client.chat.completions.create(
        model="Llama-3.3-70B-Instruct",
        messages=[
            {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
            {"role": "user", "content": prompt_text}
        ],
        max_tokens=3000,
        temperature=0.7,
        top_p=0.6,
        extra_headers={"X-Request-ID": f"discharge-{i:05}"},
        stream=False
    )

    chat_response = response.choices[0].message.content
    lines = chat_response.strip().split("\n")

    output_file_path = f"/home/IAIS/jdatta/mimic_syntheticData/english/discharge_summaries_{i}.jsonl"

    with open(output_file_path, "w", encoding="utf-8") as f:
        for line in lines:
            line = line.strip()
            if not line:
                continue
            try:
                parsed_json = json.loads(line)
                f.write(json.dumps(parsed_json, ensure_ascii=False) + "\n")
            except json.JSONDecodeError:
                print(f"[Run {i}] Skipping invalid JSON line: {line}")

    print(f"[✓] Generated: {output_file_path}")
    # Optional: Avoid hitting API limits too quickly
    # time.sleep(1)


InternalServerError: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
<hr><center>nginx</center>
</body>
</html>

In [None]:
1.{"hadm_id":25487364,"radiology_text":"INDICATION:  Nausea and abdominal pain.\n\nCOMPARISON:  Chest radiograph from ___.\n\nFINDINGS:  The lungs are clear with no evidence of a consolidation, effusion,\nor pneumothorax.  Cardiomediastinal silhouette is normal.  No acute fractures\nare identified.  No free air is noted under the hemidiaphragms.\n\nIMPRESSION:  No acute cardiopulmonary process.\n", "discharge_instructions":"You came to the hospital because you had abdominal pain. The \npancreas specialists saw you and assessed you and felt that your \npain was not related to your pancreas, that you have some belly \nspasm. We treated your pain with pain medications. The ERCP \ndoctors recommended ___ some new mediations which may help \nyour pain.\n\nplease START citrucel 1 tab at night with ___ glasses of water \n(you can get this over the counter)\nplease START Alish probiotic daily (you can get this over the \ncounter)\nplease STARTLevsin .12mg q12H PRN spasm\n\nPlease follow up with your GI doctor.\n\nHappy first birthday to your son!","brief_hospital_course":"___ year old woman w/ h/o sphincter of Oddi dysfunction s/p total \nof 4 ERCPs w/ sphincterotomy presents w/ abdominal pain.\n\n#Abdominal Pain: Pt has chronic abdominal pain and has previous \ndx of sphincter of odi dysfunction requring ERCP \nsphincterotomies. She has felt relief of pain after her last \nERCP but three weeks she had pain again. Her LFTs, lipase were \nwnl and RUQ u/s showed common bile duct dilation which she has \nhad on prior imaging. Her lipase and LFTs are all wnl which is \nreassuring. ERCP was consulted and they felt this was abdominal \nspasm and not related to her pancreas because of normal labs. \nShe was kept NPO and given IVF for 24 hrs and we managed her \npain with dilaudid 2mg q4H prn\nand tylenol prn. She was also given miralax for bowel regimen \nwhile on narcotics. She was then switched to a normal diet and \npatient was able to take in PO well. She will follow up with her \npancreas doctors. It weas recommedned pt try Hyoscyamine 0.125 \nmg PO Q12H PRN for abdominal spasm.\n\n#Depression/anxiety:\n-continued escitalopram\n-continued lorazepam .5 mg BIDprn\n\n#Tobacco:\n- gave nicotine patch"}

2.{"hadm_id":20000019,"radiology_text":"EXAM:  Renal ultrasound.\n\nCLINICAL INFORMATION:  Flank pain and dysuria.\n\nCOMPARISON:  None.\n\nFINDINGS:  The right kidney measures 11.2 cm in length and the left kidney\nmeasures 10.4 cm in length.  No hydronephrosis is seen bilaterally.  There is\nno sonographically evident renal stone, solid mass, or findings to suggest\nperinephric abscess.  There is a small 0.8 cm simple-appearing cyst arising\nfrom the lateral upper pole of the left kidney.  The bladder is unremarkable\nand thin-walled.  Incidental note is made of multiple gallstones within a\ncollapsed gallbladder.  The common bile duct measures 0.6 cm in diameter and\nthere is no evidence of intrahepatic biliary dilatation.\n\nIMPRESSION:\n1.  No hydronephrosis or sonographic evidence of renal abscess.  No\nsonographically evident renal stone.\n2.  Small 8 mm simple-appearing left renal cyst.\n3.  Incidental note made of cholelithiasis within a collapsed gallbladder\nwithout evidence of intra- or extra-hepatic biliary dilatation.\n","discharge_instructions":"It was a pleasure taking part in your care at ___ \n___.\n\n___ were admitted for an infection in your blood and in your \nkidneys, after recently being diagnosed and treated for a \nurinary tract infection. Your kidney and blood infection were \ntreated with IV antibiotics. ___ also had temporary kidney \ninjury resulting from dehydration, which improved with \nintravenous fluids. An ultrasound was preformed to look at your \nkidneys, and everything looked normal. \n\n___ did not have a fever on the day of discharge. PLease buy a \nthermometer for home. ___ should take your temperature several \ntimes a day for the next few days. If your temperature is \ngreater than 102 degrees, ___ should return to the ED.\n\n___ are discharged on ciprofloxacin 500mg twice a day for 14 \ndays, to be completed on ___.\n\n___ should follow up with your PCP, ___ week\n\n___ should follow up your kidney doctor on ___ as planned.","brief_hospital_course":"___ was admitted on ___ for fevers, flank pain, \nnausea/vomiting and headache. She had been admitted ___ for \nurinary tract infection and discharged on nitrofurantoin. She \nrepresented on ___, found to be febrile, with urinalysis \nconsistent with infection, and was started on IV ceftriaxone. \nRenal ultrasound was preformed, showing only a 8mm simple cyst. \nSubsequent blood cultures showed GNR. \n.\nACUTE ISSUES: \n.\n# Pyelonephritis and Sepsis: Fever, dysuria, flank pain. \ntreating initially w/ iv ceftriaxone. Renal u/s w/o e/o abscess \nor stone as nidus. Patient was started on IV ceftriaxone, and \ntransitioned to PO cipro\nplan ___: Likely in setting of volume depletion given insensible \nlosses (fever), vomiting, poor PO intake, as evidence by \nelevated lactate and creatinine.  \n- Cr 1.3 on admission --> .9 on discharged, resolved with IV \nfluids\n.\nAnemia: requires outpatient evaluation.  Iron studies and lysis \nlabs above. \n."}

3. {"hadm_id":20001446,"radiology_text":"HISTORY:  Pain in the right ankle.\n\nCOMPARISON:  None available.\n\nTECHNIQUE:  Right ankle radiograph, three views.\n\nFINDINGS:  The soft tissues of the ankle are mildly prominent without\nfracture, dislocation or significant degenerative change.  The ankle mortise\nis well preserved and is congruent with the talus.  There is no periarticular\nerosion or focal lytic or sclerotic lesion.  There is a small plantar\ncalcaneal enthesophyte.  There is no soft tissue calcification or radiopaque\nforeign body.\n\nIMPRESSION:  Small plantar calcaneal enthesophyte.  Prominent soft tissues\nwithout underlying fracture or dislocation.\n\n\\BEGIN NEW REPORT:___\nINDICATION:  History of pain and swelling of the right ankle and leg, rule out\nDVT.\n\nCOMPARISONS:  None.\n\nFINDINGS:  Grayscale and color Doppler ultrasound was performed of the\nbilateral common femoral, superficial femoral, popliteal, posterior tibial and\nperoneal veins.  There is non-occlusive thrombus in the right popliteal vein. \nAdditionally, there is occlusive thrombus in one of the peroneal veins on the\nleft.  The remainder of the lower extremity veins demonstrate normal flow,\naugmentation and compressibility.\n\nIMPRESSION:\n1.  Partially occlusive thrombus in the right popliteal vein.\n2.  Occlusive thrombus in one of the left peroneal veins.\n","discharge_instructions":"Mr. ___,\n\n  You were admitted to ___ for leg swelling. You were found to \nhave blood clots in your legs. You will need blood thinning \nmedications to treat these blood clots. It will be important to \nfollow closely with your primary care doctor while you are still \nnew to these medications.","brief_hospital_course":"Mr. ___ is a ___ year-old man with HLD, HTN, GERD, CRF who \npresents with non provoked bilateral ___ DVT.  ",__ is a ___ year-old man with HLD, HTN, GERD, CRF who \npresents with non provoked bilateral ___ DVT.  \n \nACTIVE ISSUES:\n# Bilateral DVT's: Patient has no known risk factors. He was \nstarted on Lovenox and Coumadin with plan to follow-up with PCP \n___ ___ for INR check and bridging guidance. Aspirin and \nplavix being given for history of amaurosis was discontiued as \nCoumadin therapy was initiated. Ongoing evaluation for possible \nmalignancy should continue."}

4. {"hadm_id":20003946,"radiology_text":"HISTORY:  Nausea, recent urinary tract infection.\n\nTECHNIQUE:  PA and lateral views of the chest.\n\nCOMPARISON:  ___.\n\nFINDINGS:\n\nHeart size is mildly enlarged but unchanged.  The aorta is tortuous and\ndiffusely calcified.  Mediastinal and hilar contours are unchanged.  There is\nno pulmonary vascular congestion.  Linear opacities within the right mid lung\nfield likely reflect areas of scarring.  Lungs remain hyperinflated with\nflattening of the diaphragms suggestive of COPD.  No new focal consolidation,\npleural effusion or pneumothorax is present.  There are multilevel\ndegenerative changes in the thoracic spine.  Oral contrast is seen within\nbowel loops in the left upper quadrant of the abdomen.\n\nIMPRESSION:\n\nNo acute cardiopulmonary abnormality.\n\n","discharge_instructions":"You were admitted with nausea, abdominal pain and low sodium. \nYour HCTZ was stopped and your sodium improved. You were seen by \npsychiatry. You were started on a low-dose antidepressant.","brief_hospital_course":"___ F w/GERD, chronic constipation presents with nausea, ___ \nand hyponatremia.\n\nHyponatremia: Improved with holding hctz. This should continue \nto be held as this is the patient's second hospitalization for \nhyponatremia due to this medication.\n\nDepression: Seen by psychiatry. Started on a low dose of \nmirtazepine. \n\nNausea and episodic abdominal pain: Remained stable. Barium \nstudy revealed no evidence of stricture. Pt was able to tolerate \na regular diet while hospitalized. She has undergone an \nextensive GI workup with Dr. ___ at ___ including \na recent small bowel enteroscopy which was normal. She should \ncontinue to follow with him.  \n\nCode status: DNR, DNI, HCP is son. Confirmed with patient and \nson.", Brief Hospital Course:\n___ F w/GERD, chronic constipation presents with nausea, ___ \nand hyponatremia.\n\nHyponatremia: Improved with holding hctz. This should continue \nto be held as this is the patient's second hospitalization for \nhyponatremia due to this medication.\n\nDepression: Seen by psychiatry. Started on a low dose of \nmirtazepine. \n\nNausea and episodic abdominal pain: Remained stable. Barium \nstudy revealed no evidence of stricture. Pt was able to tolerate \na regular diet while hospitalized. She has undergone an \nextensive GI workup with Dr. ___ at ___ including \na recent small bowel enteroscopy which was normal. She should \ncontinue to follow with him.  \n\nCode status: DNR, DNI, HCP is son. Confirmed with patient and \nson.\n \nMedications on Admission:\nThe Preadmission Medication list is accurate and complete.\n1. Amlodipine 10 mg PO HS \n2. Bisacodyl 5 mg PO DAILY:PRN constipation \n3. Dipyridamole-Aspirin 1 CAP PO BID \n4. Docusate Sodium 100 mg PO BID \n5. Metoclopramide 5 mg PO BID:PRN nausea \n6. Metoprolol Succinate XL 100 mg PO DAILY \n7. Omeprazole 20 mg PO BID \n8. Polyethylene Glycol 17 g PO DAILY \n9. Zolpidem Tartrate 2.5 mg PO HS:PRN insomnia \n10. HydrALAzine 50 mg PO TID \n11. Vitamin D 800 UNIT PO DAILY \n12. lisinopril-hydrochlorothiazide ___ mg oral BID \n13. Sucralfate 1 gm PO BID \n\n \nDischarge Medications:\n1. Metoclopramide 5 mg PO BID:PRN nausea \n2. Mirtazapine 15 mg PO HS \n3. Vitamin D 800 UNIT PO DAILY \n4. Sucralfate 1 gm PO BID \n5. Polyethylene Glycol 17 g PO DAILY \n6. Omeprazole 20 mg PO BID \n7. Metoprolol Succinate XL 100 mg PO DAILY \n8. HydrALAzine 50 mg PO TID \n9. Docusate Sodium 100 mg PO BID \n10. Dipyridamole-Aspirin 1 CAP PO BID \n11. Bisacodyl 5 mg PO DAILY:PRN constipation \n12. Amlodipine 10 mg PO HS \n\n \nDischarge Disposition:\nExtended Care\n \nFacility:\n___\n \nDischarge Diagnosis:\nHyponatremia\nDepression\n \nDischarge Condition:\nMental Status: Clear and coherent.\nLevel of Consciousness: Alert and interactive.\nActivity Status: Ambulatory - Independent.\n \nDischarge Instructions:\nYou were admitted with nausea, abdominal pain and low sodium. \nYour HCTZ was stopped and your sodium improved. You were seen by \npsychiatry. You were started on a low-dose antidepressant.\n \nFollowup Instructions:\n___\n".}

5. {"hadm_id":20004038,"radiology_text":"EXAMINATION:\nChest:  Frontal and lateral views\n\nINDICATION:  History: ___ with history of ovarian cancer, fever, positive\nblood culture  // Pneumonia? Mass?\n\nTECHNIQUE:  Chest:  Frontal and Lateral\n\nCOMPARISON:  No prior chest radiograph available for comparison.  Reference\nmade to chest CT from ___\n\nFINDINGS: \n\nThe lungs are clear without focal consolidation.  No pleural effusion or\npneumothorax is seen.  The aorta is tortuous.  The cardiac silhouette is\nmildly to moderately enlarged.  Chronic irregularity of the posterior right\nseventh rib with better assessed on prior CTs; possibly representing fibrous\ndysplasia, chronic fracture not excluded radiographically.\n\nIMPRESSION: \n\nNo acute cardiopulmonary process.\n","brief_hospital_course":"___ w/ HTN, asthma, Afib on warfarin, remote DVT, and met high \ngrade serous ovarian cancer s/p TAH-BSO and chemo, now on \nmaintenance olaparib (follows with Dr ___ s/p ureteral \nstent exchange 1 week ago for chronic hydronephrosis, who p/w \nF/C, sinus pain, and positive GNR BSI from an OP blood culture. \n \n1. E.coli blood stream infection:\nSource is presumed acute Sinusitis vs Urological source given \nrecent stent exchange.\nShe was started on levaquin outpatient when her culture came \nback positive but did not take any doses as she was referred to \nthe ER. \nShe was hemodynamically stable with no fevers, abnormal blood \nwork and so was continued on PO levaquin pending blood cultures.\nHer blood cultures have been negative to date. Her blood culture \nfrom ___ medical records show 1 out of 2 bottles positive for \nE.coli, sensitive to levaquin. The other bottle was with no \ngrowth. Possible contamination however given her \nimmunocompromised state and symptoms of infectious outpatient, \nshe was continued with levaquin for total of 10 days. \nHer olaparib was held but resumed on discharge after discussion \nwith Dr ___. \n\n2. She was found to have an elevated INR > 4 so her coumadin was \ninitially held and resumed after it was maintained within goal. \nNo signs of bleeding. She is to have an INR check in 2 days for \nclose monitoring. Her last INR was 2. She was given 3.75mg dose \nday of discharge instead of her 2.5mg.\n\nPatient has her GYN/ONC appointment on ___ and her HEME/ONC \nappointment is ___. \nWill monitor for inpatient finalized blood culture results and \nif any changes, patient will be contacted.", Discharge Instructions:\nYou were admitted with fever E.coli infection in your \nbloodstream and have improved with oral antibiotics, Levaquin. \nPlease complete the antibiotic course for another 5 days and \nfollow up with your PCP next week.\n\nYou may take your Olaparib tonight, this has been discussed with \nyour oncology team.\n\nYou may continue to take your coumadin at regular dosing with an \nINR check in 2 days as the Levaquin may interfere with the INR \nand your INR number was high when you were admitted. Your last \ncoumadin was 3.75mg (1.5 tablets) on ___\n \nFollowup Instructions:\n___\n".}

6. {"hadm_id":20006989,"radiology_text":"EXAMINATION:  CT ABDOMEN AND PELVIS WITH CONTRAST\n\nINDICATION:  ___ year old man with ___, pancreatic adeno s/p\nWhipple, DVT/PE on apixaban, recurrent bacteremia p/w fever + rigors.//\nevidence for intra-abdominal source of infection?\n\nTECHNIQUE:   Single phase split bolus contrast: MDCT axial images were\nacquired through the abdomen and pelvis following intravenous contrast\nadministration with split bolus technique.\nOral contrast was administered.\nCoronal and sagittal reformations were performed and reviewed on PACS.\n\nDOSE:  Acquisition sequence:\n   1) Spiral Acquisition 8.1 s, 52.4 cm; CTDIvol = 7.0 mGy (Body) DLP = 359.7\nmGy-cm.1\n Total DLP (Body) = 360 mGy-cm.\n\nCOMPARISON:  CT abdomen and pelvis ___\n\nFINDINGS: \n\nLOWER CHEST: There are small bilateral pleural effusions with associated\natelectasis, unchanged from ___.\n\nABDOMEN:\n\nHEPATOBILIARY: The liver is unchanged in morphology with multiple vascular\nmalformations noted throughout the liver unchanged in configuration compared\nto ___.  Mild intrahepatic biliary duct dilation, most pronounced in the\nleft hepatic lobe is also grossly unchanged given differences in imaging\ntechnique.  There is new air within the periphery of segment II of the liver\nfelt to be in continuity with mildly dilated ducts in this location (series 5,\nimage 10).  There is also mild periportal edema.  The portal vein is patent. \nThere is small volume ascites.\n\nPANCREAS: Patient is status post Whipple.  Note that this study is not\noptimized for evaluation for pancreatic lesions but no obvious masses seen. \nThere is unchanged soft tissue adjacent to the surgical bed.  There is no\npancreatic duct dilation.\n\nSPLEEN: Spleen is enlarged measuring 13.0 cm.  No focal splenic lesion is\nseen.\n\nADRENALS: The right and left adrenal glands are normal in size and shape.\n\nURINARY: The kidneys are symmetric in size.  There are multiple bilateral\nrenal cysts with the largest measuring 3.2 x 2.5 cm in the interpolar left\nkidney.  There other subcentimeter hypodensities which are too small to\ncharacterize but statistically likely represent additional cysts.  There is no\nhydronephrosis.\n\nGASTROINTESTINAL: There is unchanged appearance of multiple gastric polypoid\nlesions since prior CT, please correlate with recent upper endoscopy from ___.  Largest confluent polypoid mass is located in the gastric antrum. \nThere is no evidence of gastric outlet obstruction.  There is no bowel\nobstruction.  There is no evidence of small or large bowel wall thickening\n\nPELVIS: The urinary bladder and distal ureters are unremarkable.  There is no\nfree fluid in the pelvis.\n\nREPRODUCTIVE ORGANS: The prostate is unremarkable.\n\nLYMPH NODES: Mildly enlarged retroperitoneal and mesenteric lymph nodes are\nunchanged.  There is no pelvic sidewall or inguinal adenopathy.\n\nVASCULAR: There is no abdominal aortic aneurysm.  Celiac trunk demonstrates\ndiffuse enlargement as do the hepatic arterial vessels.  There is a single\nrenal artery bilaterally.  An IVC filter is in place with some retraction of\nthe more distal IVC, likely from prior thrombus.\n\nBONES: There is no evidence of worrisome osseous lesions or acute fracture.\n\nSOFT TISSUES: There are collateral vessels noted in the left anterior\nabdominal wall.\n\nIMPRESSION:\n\n\n1. New peripheral foci of air within in segment II of the liver.  Although\ngiven peripheral location this is somewhat atypical, this air is felt to be in\ncontinuity with mildly dilated intrahepatic bile ducts and is therefore likely\npneumobilia rather than portal venous gas.  This appears similar to prior\nstudy from ___.\n2. Otherwise unchanged morphology of the liver with multiple vascular\nmalformations in keeping with known OWR syndrome.\n3. Multiple enhancing gastric polyp/masses, correlate with prior upper\nendoscopies.\n4. Unremarkable appearance of small and large bowel.\n5. Unchanged postsurgical appearance after Whipple and partial colectomy.\n","brief_hospital_course":"___ with ___ polyposis/HHT cross-over \nsyndrome, duodenal adenoCA s/p Whipple ___ c/b chronic TPN \nrequirement, DVT/PE (s/p IVC, on apixaban), multiple recent \nadmissions for bacteremia ___ for S.___ w/chest port \nremoval and ___ for Klebsiella, source unclear) p/w fevers and \nrigors of unclear etiology.\n\n# Fevers/rigors:\n# Recurrent bacteremia:\nPt with hx of recurrent bacteremia (S.lug___ in ___, \nthought due to line infection and s/p Nafcillin and chest port \nremoval and then Klebsiella ___ of unclear source, treated \nwith ethanol locks and CTX. Now presenting with fevers/rigors \nconcerning for recurrent bacteremia. No clear localizing source \nby symptoms or exam. BCx on ___, and ___ -- along \nwith UCx -- were all negative at the time of discharge. CT A/P \nwith no clear source; stable pneumobilia likely secondary to \nprior Whipple. TTE without vegetations (obtained for new murmur \non exam). Afebrile since admission. Initially got Vanc/Cefepime \nin ED on ___, switched to CTX ___. Antibiotics were \nstopped on ___ and he was observed for 24h without recurrence \nof his symptoms. He is discharged off antibiotics and will f/u \nwith his PCP and hematologist on ___. \n\nOf note, patient has been getting intrmittent fevers of unclear \nsource on and off for a year with several PCP visits and \nadmissions for fever and hematemesis ___ and ___ with \nno clear source of fever identified.  He is s/p whipple for \nampullary carcinoma which was completely resected; he also has \nchronic cytopenias for which he is followed by hemeonc as an \noutpatient. Hypogammaglobulinemia recently ruled out by Ig \neletrophoresis.. Currently no evidence of active malignancy. \nAlso no associated clinical features suggesting a \nconnective-tissue disease. His occasional fevers could be caused \nby intermittent bacterial translocation from the gut iso \nunderlying polyposis. He will be followed up for this in the out \npatient setting by ID. \n\n# Chronic moderate malnutrition:\n# Protein-losing enteropathy s/p hemicholectomy:\nTPN was initially held on admission, resumed on the night of \n___. He was discharged to continue his home TPN \nformulation.\n\n# Chronic anemia:\nThought secondary to chronic GI blood loss, followed by \nhematology (Dr. ___ and on monthly IV iron infusions, next \ndue ___.\n\n# Leukopenia:\n# Thrombocytopenia:\nChronic and generally stable since at least ___. Haematologist \ndid not think there is concern for blood malignancy. likely \nsecondary to marrow suppression in setting of infection given \nsignificant fluctuation over last few years. Has f/u apt \nw/outpatient hematologist next week.\n\n# ? pleural thickening: per subtle residual opacity along\nthe  periphery of the left lower lung on CXR. Will require \nrepeat CXR for interval change in outpatient setting. \n\n# HFpEF: Appears euvolemic currently. Continued on home Lasix \n40mg.\n\n# DVT: continue apixaban\n# Depression/anxiety: continue citalopram\n# Seizure disorder, brain AVM: continue keppra\n# Chronic pain: continue oxycodone\n# Asthma: continue albuterol PRN\n# h/o adenocarcinoma of duodenum/ampulla and high grade \ndysplasia of pancreatic tail s/p Whipple; stable\n\n** TRANSITIONAL **\n\n[]f/u pending BCx ___ , UCx\n[] IV iron scheduled ___\n[] will f/u as outpatient with ID to consider further workup if \nfevers recur. \n[] PCP to repeat CXR in 4 weeks for interval change given ? \npleural thickening. \n[] f/u with hematology \n[] resume previous TPN orders",Discharge Instructions:\nDear Mr. ___,\n\nYou were admitted to the hospital with fevers/shakes. Blood \ncultures did not grow a culprit organism. Echocardiogram of your \nheart showed no evidence of valvular disease, and CT scan of \nyour belly showed no infection there either. You were initially \ntreated with antibiotics, which were discontinued in the absence \nof clear evidence for infection. You did well with a period of \nobservation and are being discharge home on your prior \nmedication regimen\n\nPlease be vigilant for recurrent fevers and notify your doctors \nabout ___ immediately. \n\nWith best wishes,\n___ Medicine\n \nFollowup Instructions:\n___\n".}

7. {"hadm_id":20007442,"radiology_text":"HISTORY:  ___ man, status post L5-S1 fusion.  Now with low back pain. \nAssess for postoperative changes.  Rule out abscess or infectious process.\n\nCOMPARISON:  Preoperative lumbar MR on ___.\n\nTECHNIQUE:  Multiplanar, multisequence T1- and T2-weighted images were\nacquired through the cervical, thoracic and lumbar spine before and after\nadministration of IV gadolinium contrast.\n\nFINDINGS:\n\nCERVICAL SPINE:  The craniocervical junction is normal.  The vertebral body\nheight and disc height are preserved.  There is a mild C3-C4 disc bulge,\nmildly effacing the anterior thecal sac, but without significant spinal canal\nstenosis.  There are otherwise no significant degenerative changes elsewhere. \nThe cervical spinal cord is normal in morphology and signal intensity.\n\nTHORACIC SPINE:  The vertebral body height and disc height are preserved. \nThere is normal spinal alignment.  There are no significant degenerative\nchanges.  The thoracic spinal cord is normal in morphology and signal\nintensity.\n\nLUMBAR SPINE:  The patient is status post L5-S1 fusion with a disc spacer. \nThe vertebral body height is preserved.  The conus medullaris terminates at\nL1-L2 level.\n\nAt L1-L2 and L2-L3, There are no significant degenerative changes.\n\nAt L3-L4, there is a small posterior disc bulge with facet arthropathy,\nresulting in left greater than right mild-to-moderate neural foraminal\nnarrowing.  There is also mild subarticular zone narrowing, resulting in mild\ncrowding of the traversing nerve roots.\n\nAt L4-L5, there is a small posterior disc bulge, with facet arthropathy,\nresulting in left greater than right mild-to-moderate neural foraminal\nnarrowing.  There is also mild subarticular recess narrowing, resulting in\nmild crowding of the traversing nerve roots.\n\nAt L5-S1, there is fusion hardware with a disc spacer.  There is no\nsignificant spinal canal stenosis.  Susceptibility artifacts from the fusion\nhardware significantly limit assessment of the neural foramina.  There are\nexpected postoperative changes in the posterior paraspinal soft tissues. \nThere is no abnormal enhancement to suggest infectious process.\n\nThere is no abnormal fluid collection.\n\nIMPRESSION:\n\n1.  Expected postoperative appearance of L5-S1 fusion without evidence of\ninfectious process.  Specifically, no evidence of discitis, osteomyelitis or\nepidural abscess.\n\n2.  Multilevel degenerative changes as described above.\n\\BEGIN NEW REPORT:___\nHISTORY:  Fever and elevated CRP; no evidence of spinal infection on MRI after\nlumbar surgery in ___.  Evaluate for infiltrate.\n\nTECHNIQUE:  AP and lateral chest.\n\nCOMPARISON:  Chest radiograph:  ___.\n\nFINDINGS:\n\nLung volumes are low but unchanged since previous exam.  The lungs are clear\nwithout focal opacities to suggest pneumonia.  Mild cardiomegaly is unchanged.\nMediastinal silhouette and hilar contours are normal.  There is no pleural\neffusion or pneumothorax.\n\nIMPRESSION:\n\n1. Mild cardiomegaly is unchanged since previous exam.  \n\n2. There are no new focal airspace opacities to suggest pneumonia.\n\n","discharge_instructions":"You were admitted for fever workup.\nYour post-surgical incision looks well healed and uninfected.\nImaging of your lumbar spine does not suggest active infection.\nYou will need further workup of the fevers.\n\n-Activity: You should not lift anything greater than 10 lbs for \n2 weeks. You will be more comfortable if you do not sit or stand \nmore than ~45 minutes without getting up and walking around.\n\n-Rehabilitation/ Physical Therapy: \no2-3 times a day you should go for a walk for ___ minutes as \npart of your recovery.  You can walk as much as you can \ntolerate. \noLimit any kind of lifting.\n\n-Diet: Eat a normal healthy diet. You may have some \nconstipation after surgery.  You have been given medication to \nhelp with this issue.\n\n-Wound Care: monitor incisions for any acute changes in \nappearance such as redness or drainage.\n\n-You should resume taking your normal home medications.\n\n-You have also been given Additional Medications to control \nyour pain.  Please allow 72 hours for refill of narcotic \nprescriptions, so please plan ahead.  You can either have them \nmailed to your home or pick them up at the clinic located on \n___ 2.  We are not allowed to call in or fax narcotic \nprescriptions (oxycontin, oxycodone, percocet) to your pharmacy. \n In addition, we are only allowed to write for pain medications \nfor 90 days from the date of surgery.\n\n-Follow up:\noPlease Call the office and make an appointment for within 2 \nweeks.  \n\nPlease call the office if you have a fever>101.5 degrees \nFahrenheit and/or drainage from your wound.","brief_hospital_course":"Patient was admitted to the ___ Spine Surgery Service.  \nIncision clean dry and intact, no obvious evidence of infection \nat MRI or CT at L5S1 implants.  Infectious disease consulted for \nhelp with fever work up.  UA negative, CXR negative, Urine Cx \nand Blood Cx pending.  TEDs/pnemoboots were used for \npostoperative DVT prophylaxis.  Antibiotics were not initiated \nas there was no obvious source. Diet was regular.  The patient \nwas transitioned to oral pain medication when tolerating PO \ndiet.  Hospital course was otherwise unremarkable.  On the day \nof discharge the patient was afebrile with stable vital signs, \ncomfortable on oral pain control and tolerating a regular diet ".}

8. {"hadm_id":20007599,"radiology_text":"EXAMINATION:  SHOULDER ___ VIEWS NON TRAUMA LEFT\n\nINDICATION:  History: ___ with left shoulder pain and swelling// osteomyelitis\n\nTECHNIQUE:  Left shoulder 4 views\n\nCOMPARISON:  ___\n\nFINDINGS: \n\nNo acute fracture or dislocation is seen.  The left acromioclavicular joint is\nintact.  No cortical destruction is seen.  On the external rotation view,\nthere is a an ovoid 3.0 x 1.0 cm lucency projecting over the superficial soft\ntissue lateral to the proximal humeral shaft, which may represent soft tissue\ngas, possibly from recent surgery.  The left upper outer hemithorax is grossly\nunremarkable.\n\nIMPRESSION: \n\nNo acute fracture or dislocation of the left shoulder.  No cortical\ndestruction seen.\n\nOn the external rotation view, there is a an ovoid 3.0 x 1.0 cm lucency\nprojecting over the superficial soft tissue lateral to the proximal humeral\nshaft, which may represent soft tissue gas, possibly from recent surgery.\n\\BEGIN NEW REPORT:___\nEXAMINATION:  UNILAT UP EXT VEINS US LEFT\n\nINDICATION:  History: ___ with left upper extremity swelling after surgery//\nAbscess or fluid collection\n\nTECHNIQUE:  Grey scale and Doppler evaluation was performed on the left upper\nextremity veins.\n\nCOMPARISON:  None.\n\nFINDINGS: \n\nThere is normal flow with respiratory variation in the left subclavian vein.\nThe left internal jugular and axillary veins are patent, show normal color\nflow and compressibility. The left brachial, basilic, and cephalic veins are\npatent, compressible and show normal color flow and augmentation.\n\nSubcutaneous edema of the left upper extremity is noted.  No drainable fluid\ncollection is seen.\n\nIMPRESSION: \n\nNo evidence of deep vein thrombosis in the left upper extremity.\n\nSubcutaneous edema.\n","discharge_instructions":"Take Bactrim as previously prescribed, take the Clindamycin as \nprescribed - notify us if you are having any diarrhea or GI \nupset\nOkay to shower\nActivity as instructed by Dr. ___ incision for cellulitis\nLeave incision open to air\nReturn to ED if persistent or worsening infection\nFollow up with Dr. ___ as written\n___ Therapy:\nYou will begin outpatient ___ after your first post-operative \nappointment with Dr. ___. \nTreatments Frequency:\nClean incision sites daily and pat dry. Monitor for redness or \nsigns of infection.","brief_hospital_course":"The patient was admitted to the orthopedic surgery service for \nworsening left shoulder cellulitis. \nHD#0: Patient received IV antibiotics.\nHD#1: IV vancomycin dose was increased due to a slightly low \ntrough. Cellulitis improved quickly and patient labs were WNL.\nHD#2: Patient's pain well controlled. Erythema resolved. \nDischarged home on oral Bactrim and clindamycin with f/u \nscheduled. Benadryl given for pruritis prn. \n\nPain was well controlled. The patient was voiding independently \nprior to discharge.  Labs were checked throughout the hospital \ncourse and repleted accordingly. At the time of discharge the \npatient was tolerating a regular diet and feeling well.  The \npatient was afebrile with stable vital signs.  The patient's \nhematocrit was acceptable and pain was adequately controlled on \nan oral regimen. The operative extremity was neurovascularly \nintact and the dressing was intact. \n \nMr. ___ is discharged to home in stable condition".}

9. {"hadm_id":20007881,"radiology_text":"EXAM:  Abdominal series, chest frontal view and supine upright views of the\nabdomen.\n\nCLINICAL INFORMATION:  Abdominal pain and distention.\n\nCOMPARISON:  None.\n\nFINDINGS:\n\nCHEST:  The lungs are clear without focal consolidation.  No pleural effusion\nor pneumothorax is seen.  The cardiac and mediastinal silhouettes are\nunremarkable.\n\nABDOMEN:  Supine and upright views of the abdomen were obtained.  There is a\nnonobstructive bowel gas pattern.  Moderate amount of stool is seen throughout\nthe colon.  No large air-fluid levels are seen.\n\nIMPRESSION:  No evidence of bowel obstruction or free air.  Clear lungs.\n\\BEGIN NEW REPORT:___\nEXAM:  Abdomen lateral decubitus views.\n\nCLINICAL INFORMATION:  Abdominal pain.\n\nCOMPARISON:  None.\n\nFINDINGS:  Left lateral decubitus views of the abdomen were obtained.  No\nevidence of free air is seen.  There are no large air-fluid levels.\n\\BEGIN NEW REPORT:___\nHISTORY:  History of ulcerative colitis with right lower quadrant pain of\nunclear etiology.  Rule out appendicitis/abscess.\n\nTECHNIQUE:  MDCT contiguous imaging was performed through the abdomen and\npelvis following the administration of oral and intravenous contrast.  Coronal\nand sagittal reformations were provided.\n\nCOMPARISON:  CT ___.  \n\nFINDINGS:\nThere is bibasilar atelectasis with trace pleural effusions.  No suspicious\npulmonary lesion.  No pericardial effusion.\n\nCT ABDOMEN:  The liver enhances homogeneously.  Multiple hypodensities are\nagain noted throughout the liver (series 2: 11, 15), these are too small to\ncharacterize but likely represent simple cysts or biliary hamartomas.  No\nintra or extrahepatic biliary dilatation.  The gallbladder is unremarkable. \nThe spleen is within normal limits.\n\nThe pancreas enhances homogeneously, no pancreatic duct dilatation or focal\npancreatic lesion.  No peripancreatic fluid collection.\n\nNormal right adrenal gland.  The 1 cm hypodense lesion within the left adrenal\ngland is unchanged and has been incompletely characterized on the current\nstudy.  The kidneys enhance and excrete contrast symmetrically without\nhydronephrosis.  No suspicious renal lesion.\n\nThe appendix is distended measuring up to 13 mm in maximal diameter, there is\nwall thickening and enhancement with adjacent mesenteric fat stranding\ncompatible with appendicitis (series 2, 44 to 50). There is mild associated\nthickening of the adjacent cecum.  No perforation or abscess.  \n\nNo bowel obstruction.  The remainder of the small and large bowel are within\nnormal limits.  No significant upper abdominal or retroperitoneal\nlymphadenopathy.  The abdominal aorta is of normal caliber without aneurysmal\ndilatation.  Mild calcified atherosclerosis is noted.  No free air.\n\nCT PELVIS:  The bladder, distal ureters, uterus and rectum are within normal\nlimits.  There is a trace amount of free fluid within the pelvis.  No pelvic\nlymphadenopathy.\n\nIMPRESSION:\n\n1. Features consistent with acute appendicitis with a dilated fluid-filled\nappendix with adjacent fat stranding.  There is mild thickening of the\nadjacent cecum.  No evidence of perforation or abscess.\n\n2. 1 cm nodule within the right adrenal gland, incompletely characterized on\nthe current study.  Correlation with any previous imaging is advised in the\nfirst instance.  If this is not available, dedicated adrenal CT or MRI could\nbe performed for additional characterization.\n\n3. Multiple hepatic hypodensities, too small to characterize, likely cysts or\nbiliary hamartomas.\n\nThis result was discussed with Dr. ___ ___ by telephone at 6.15pm on\n___. \n\n","discharge_instructions":"___ were admitted to the hospital with lower abdominal pain.  \n___ underwent a cat scan of the abdomen and ___ wee found to \nhave appendicitis. ___ were started on antibiotics.  ___ are \npreparing for discharge home but will need to have your appendix \nremoved at some later time.  ___ are preparing for discharge \nhome with the following instructions: \n\nPlease call your doctor or return to the emergency room if ___ \nhave any of the following:\n\n* ___ have a recurrence of your abdominal pain\n \n\n * ___ experience new chest pain, pressure, squeezing or \n tightness.\n \n* New or worsening cough or wheezing.\n \n* If ___ are vomiting and cannot keep in fluids or your \n medications.\n \n* ___ are getting dehydrated due to continued vomiting, \n diarrhea or other reasons. Signs of dehydration include dry \n mouth, rapid heartbeat or feeling dizzy or faint when standing.\n \n* ___ see blood or dark/black material when ___ vomit or have a \n bowel movement.\n \n\n* ___ have shaking chills, or a fever greater than 101.5 (F) \n degrees or 38(C) degrees.\n \n* Any serious change in your symptoms, or any new symptoms that \n concern ___. \n \n* Please resume all regular home medications and take any new \nmeds \n as ordered.","brief_hospital_course":"The patient was re-admitted to the hospital with lower abdominal \npain.  Upon admission, she was made NPO, given intravenous \nfluids, and underwent imaging. An x-ray of the abdomen was done \nwhich showed no evidence of bowel obstruction or free air.  To \nfurther identify the etiology of her pain, the patient underwent \na cat scan of the abdomen which showed a dilated fluid-filled \nappendix with adjacent fat stranding suggestive of appendicitis. \nThere was no evidence of perforation or abscess. The patient was \nstarted on a 2 week course of ciprofloxacin and flagyl and \nunderwent serial abdominal examinations.  Because of her history \nof ulcerative colitis, the GI service was consulted who \nsupported current management with antibiotics as well as an \ninterval appendectomy.  The patient's vital signs have been \nstable and she has been afebrile.  She was tolerating a regular \ndiet.  She was  discharged home on HD # 5 in stable condition \nwith instructions to complete the antibiotic course.  An \nappointment for follow-up was made with the acute care service \nand with her primary care provider.\n\nOf note:  report of cat scan : 1 cm nodule within the right \nadrenal gland seen, recommendation made for adrenal cat scan or \nMRI.  Patient informed of these findings and copy of report \ngiven to patient.  Follow-up with primary care provider \n___".}

10. {"hadm_id":21067346,"radiology_text":"HISTORY:  MVC and question of pulmonary contusion on chest radiographs, now\nrequiring assessment for rib fractures and pulmonary contusion.  \n\nTECHNIQUE:  MDCT imaging of the chest without intravenous contrast was\nperformed.  Multiplanar reformats were prepared and reviewed.  \n\nCOMPARISON:  Comparison is made with OSH CT abdomen from earlier the same day,\n___.  \n\nFINDINGS:  Peripheral ground glass opacities and consolidation consistent with\npulmonary contusions are seen in the posterior right lower lobe and the right\nmiddle lobe, similar to prior exam.  Less extensive ground glass seen in the\nright upper lobe as well.  The lungs are otherwise clear.  There is a small\nright pneumothorax adjacent to pulmonary contusions laterally.  No\npathologically enlarged axillary, mediastinal, or hilar lymph nodes are\nidentified.  There is no pleural or pericardial effusion.  The heart,\npericardium, and great vessels are within normal limits.  The visualized\nthyroid gland is unremarkable.\n\nThis study is not tailored for subdiaphragmatic evaluation, but the visualized\nintra-abdominal organs are unremarkable.  \n\nBONE WINDOWS:  No acute fracture or dislocation.  No focal lytic or sclerotic\nosseous lesion suspicious for infection or malignancy is seen.  Mild upper\nthoracic levoscoliosis noted. \n\nIMPRESSION:\n\n1.  Posterior right lower lobe and right middle lobe pulmonary contusions. \n\n2.  Small right pneumothorax adjacent to pulmonary contusions laterally. \n\n3.  No acute fracture or dislocation.  \n\nUpdated findings from wet read communicated to Dr. ___ at 9:22 p.m. on\n___ by phone.\n\n\n\\BEGIN NEW REPORT:___\nPA AND LATERAL CHEST OF ___\n\nCOMPARISON:  Chest CT of ___.\n\nFINDINGS:\n\nThe small right lateral pneumothorax on chest CT of one day earlier is not\napparent on the radiograph, but may be below the resolution of detection. \nConsolidation involving portions of the right middle and right lower lobe have\nslightly progressed in the interval, and likely represent contusion in the\nsetting of recent trauma, although aspiration is an additional consideration. \nLungs are otherwise clear, and there are no substantial pleural effusions.\n", "discharge_instructions":"You were admitted to the Acute Care Surgery service after being \nin a motor vehicle collision.  You suffered a skull base \nfracture and a pulmonary contusion on the right.  You were seen \nby Neurosurgery who fitted an Aspen collar for you.  You are now \nready to complete your recovery at home.  Please follow the \ninstructions below:\n\n-You are advised to wear the Aspen neck collar until follow up \nin the ___ clinic in 8 weeks.  Please call ___ \nto schedule this appointment and a repeat CT scan of your \nc-spine.  \n\n-You are being given a prescription for narcotic pain \nmedication.  Please do not drink alcohol or drive while taking \nthis medication.\n\n-You may resume a regular diet.\n\n-You may remove the neck collar to shower.\n\n-If you experience severe headaches, neck pain, fever>101, or \nanything else that concerns you, please call the ___ \nclinic or go to the closest emergency department.","brief_hospital_course":"The patient is a ___ year old male who was admitted to the Acute \nCare Surgery service after a MVC.  He was a restrained passenger \nin a vehicle that hit a tree.  He was transferred from an OSH \nwith right sided occipital condyle fracture and a right \npulmonary contusion.  When he arrived at ___, he was \nthoroughly examined and had a CT abdomen/pelvis which revealed a \nsmall PTX on the right. He was also seen by Neurosurgery who \nfitted him with an Aspen collar. He was admitted for \nobservation.  He had a PA/lat CXR in the morning of HD1 which \ndid not show a PTX on the right.  Neurosurgery recommended \nwearing the Aspen collar until follow up in their clinic in 8 \nweeks.  He was also seen by ___ who recommended another night in \nthe hospital due to dizziness and unsteadiness with walking.  On \nHD2, he worked with ___ again, who cleared him for discharge to \nhome.  At the time of discharge, he was ambulating \nindependently, and his vital signs were stable.  He was given a \nprescription for oxycodone and instructions to follow up in the \nNeurosurgery clinc".}

11. {"hadm_id":21068123,"radiology_text":"EXAMINATION:  DX HAND AND WRIST\n\nINDICATION:  History: ___ with right wrist pain and TTP. unable to move\nfingers*** WARNING *** Multiple patients with same last name!  // eval for\nfracture/dislocation       eval for fracture/dislocation                      \neval for fracture/dislocation\n\nTECHNIQUE:  Right hand, three views, and right wrist, three views.\n\nCOMPARISON:  Right wrist radiograph dated ___.\n\nFINDINGS: \n\nThe patient is status post resection of the entire proximal carpal row,\nplacement of screws transfixing the distal radius to the distal carpal row and\nplacement of bone graft material.  Overall the appearance is unchanged.  There\nis no evidence of acute fracture.  Mild degenerative changes are seen at the\nfirst MCP joint and the CMC joints.\n\nIMPRESSION: \n\nNo significant interval change.  No acute fracture.\n\nNOTIFICATION:  The findings were discussed by Dr. ___ with Dr.\n___ in personon ___ at 5:36 ___, 2 minutes after discovery of the\nfindings.\n\\BEGIN NEW REPORT:___\nEXAMINATION:  CHEST (PORTABLE AP)\n\nINDICATION:  ___ year old man with Desat s/p stellate ganglion block  // Desat\ns/p stellate ganglion block      Desat s/p stellate ganglion block\n\nIMPRESSION: \n\nIn comparison with the study of ___, there is little change and\nno evidence of acute cardiopulmonary disease.  Cardiac silhouette is within\nnormal limits and there is no evidence of vascular congestion, pleural\neffusion, or acute focal pneumonia.\n", "discharge_instructions":"You were admitted for observation and treatment of acute right \nwrist pain.  Follow these discharge instructions:\n.\n - If your right wrist begins to worsen after discharge home \nwith an acute increase in pain, please call Dr. ___ office \nto report this.\n .\n Medications: \n * Resume your regular medications unless instructed otherwise. \n * You may take your prescribed pain medication for moderate to \nsevere pain. You may switch to Tylenol or Extra Strength Tylenol \nfor mild pain as directed on the packaging. \n * Take Colace, 100 mg by mouth 2 times per day, while taking \nthe prescription pain medication to prevent constipation. You \nmay use a different over-the-counter stool softener if you wish. \n\n * Do not drive or operate heavy machinery while taking any \nnarcotic pain medication. You may have constipation when taking \nnarcotic pain medications (oxycodone, percocet, vicodin, \nhydrocodone, dilaudid, etc.); you should continue drinking \nfluids, you may take stool softeners, and should eat foods that \nare high in fiber.\n .\n Return to the ER if:\n * If you are vomiting and cannot keep in fluids or your \nmedications.\n * If you have shaking chills, fever greater than 101.5 (F) \n degrees or 38 (C) degrees, increased redness, swelling or \n discharge from incision, chest pain, shortness of breath, or \nanything else that is troubling you.","brief_hospital_course":"The patient was admitted to the plastic surgery service on \n___ for observation and treatment of severe right hand pain. \n The patient tolerated the procedure well. \n .\n Neuro: A chronic pain service consult was obtained and \nrecommendations were followed including increasing home \nneurontin dose, adding toradol, and adding opioid pain \nmedication.  Patient reported some relief but also had periods \nof \"shooting, shock like pain\" radiating from a focal point of \nradial side of wrist (radial neuropathy).  Patient became \nincreasingly anxious with episodes of pain and was given Ativan \nprn.  Patient reported good effect with Ativan both for anxiety \nand assisting with pain control.  In further review with patient \nand discussing symptoms, it was agreed that patient would \nundergo a repeat stellate ganglion injection of hydrocortisone \nby the Chronic Pain Service.  This was completed prior to \ndischarge home and patient will follow up with Dr. ___ CPS \nto discuss outcome of procedure and effect on radial neuropathy.\n .\n CV: The patient was stable from a cardiovascular standpoint; \nvital signs were routinely monitored.\n .\n Pulmonary: The patient was stable from a pulmonary standpoint; \nvital signs were routinely monitored.\n .\n GI/GU: The patient tolerated a regular diet. He was also \nstarted on a bowel regimen to encourage bowel movement. Voiding \nspontaneously.\n .\n At the time of discharge on hospital day#4, the patient was \ndoing well, afebrile with stable vital signs, tolerating a \nregular diet, ambulating, voiding without assistance, and pain \nwas controlled".}

12. {"hadm_id": 21069761, "radiology_text": "EXAMINATION:  CHEST (PA AND LAT)\n\nINDICATION:  History: ___ with h/o CHF, 1 wk URI sx, increased cough// eval\nfor pneumonia, pulm edema\n\nTECHNIQUE:  Frontal and lateral view radiographs of the chest.\n\nCOMPARISON:  Chest radiographs ___.\n\nFINDINGS: \n\nThe lungs are hyperinflated with flattening of the diaphragm, which may\nrepresent chronic emphysematous changes.  There is no focal consolidation,\npleural effusion or pneumothorax.  The cardiac silhouette remains borderline\nenlarged.  There is no pulmonary edema.  There is diffuse idiopathic skeletal\nhyperostosis of the thoracic spine.  Degenerative changes are seen in both\nshoulders.\n\nIMPRESSION: \n\nNo pneumonia or acute cardiopulmonary process.\n", "discharge_instructions": "Dear Mr. ___,\n\nIt was a pleasure taking care of you at ___ \n___. \n\nWHY WAS I ADMITTED TO THE HOSPITAL?\n- You were admitted to the hospital because you were having \nworsening cough and pain with urination\n\nWHAT HAPPENED WHILE I WAS IN THE HOSPITAL?\n- You received medications to remove the extra fluid in your \nbody and were started on new medications to help your heart\n- You were given antibiotics for your urinary tract infection\n\nWHAT SHOULD I DO WHEN I GO HOME?\n- You should continue to take your medications as prescribed. \n- You should attend the appointments listed below. \n- Weigh yourself every morning, call your cardiologist if your \nweight goes up more than 3 lbs.  \n- Seek medical attention if you have new or concerning symptoms \nor you develop swelling in your legs, abdominal distention, or \nshortness of breath at night.  \n- Your discharge weight: 190 lbs. You should use this as your \nbaseline after you leave the hospital.  \n\nWe wish you the ___!\nYour ___ Care Team", "brief_hospital_course": "TRANSITIONAL ISSUES:\n====================\nDISCHARGE WEIGHT: 190 lbs\nDISCHARGE CR: 3.0\nDISCHARGE DIURETIC: Torsemide 20mg\n\n[ ] Recommend repeat Chem-10 within 1 week to ensure resolution \n___\n[ ] Recommend repeat CBC within in 1 week to ensure Hgb \nstability. Ensure patient is up to date with age-appropriate \nmalignancy screening\n[ ] Follow-up blood pressure, consider uptitrating hydralazine \nas outpatient\n[ ] Lisinopril and spironolactone deferred given poor outpatient \nlaboratory follow-up. Consider starting if able to make reliable \nfollow-up.\n[ ] Atorvastatin dose increased to 40mg but patient \nintermittently refused to take 40mg stating that he took 20mg at \nhome. Not amenable to increasing dose to 80mg. Re-address and \nconsider high intensity statin therapy if amenable.\n[ ] Ensure up to date with all preventative vaccinations (annual \nflu, Tdap, PPSV 23, PCV 13)\n[ ] Recommend code status discussion and completion of MOLST"}

13. {"hadm_id": 21069864, "radiology_text": "INDICATION:  A ___ male struck by car.\n\nCOMPARISONS:  None.\n\nPORTABLE AP VIEW OF THE CHEST:  Fine detail is obscured by overlying trauma\nboard.  The lung volumes are low.  Accounting for technique, the heart size is\nnormal.  There is no pleural effusion or pneumothorax.  No focal airspace\nconsolidation is seen to suggest pneumonia.  \n\nSINGLE FRONTAL VIEW OF THE PELVIS:  Fine detail is obscured by overlying\ntrauma board.  The bony pelvis is intact.  There is no fracture or dislocation\nseen.\n\nIMPRESSION:  No evidence of acute process.\n\nThese findings were discussed with the ACS intern at 2100 on ___ in\nperson.\n\n\\BEGIN NEW REPORT:___\nINDICATION:  ___ male struck by car, now with foot pain.\n\nCOMPARISONS:  None.\n\nTHREE VIEWS OF THE RIGHT FOOT:  There is a comminuted fracture of the proximal\nportion of the first metatarsal although with a predominant complete\nhorizontal component.  There is moderate lateral displacement of the distal\nfragment.  Mild medial subluxation of the first tarsometatarsal joint is also\nnoted.  Soft tissue swelling overlying the fracture is appreciated.\n\nTHREE VIEWS OF THE LEFT FOOT.  There is a comminuted fracture involving the\ndistal portion of the proximal phalanx and the proximal portion of the distal\nphalanx.  There is mild to moderate valgus angulation at the interphalangeal\njoint owing to impaction along the lateral side of the middle phalanx\nfracture.  The interphalangeal joint is narrowed and distorted by subluxation\nof comminuted, mildly distracted fragments of the distal phalanx fracture. \nSoft tissue swelling at the fracture site is noted.\n\nIMPRESSION:\n\n1.  Comminuted fracture of the right first proximal metatarsal with slight\nmedial subluxation at the first tarsometatarsal joint.\n\n2.  Comminuted fractures of the left first proximal and distal phalanges with\nnarrowing of the interphalangeal joint space and subluxation associated with\nmild distraction and impaction of comminuted distal phalanx fragments; there\nis also valgus angulation at the interphalangeal joint associated with\nimpaction of the lateral side of the proximal phalanx fracture site.\n\nThese findings were discussed with the ACS intern at 2100 on ___ in\nperson.\n\\BEGIN NEW REPORT:___\nINDICATION:  ___ male struck by car.\n\nCOMPARISONS:  None.\n\nTHREE VIEWS OF THE RIGHT ELBOW.  There is no fracture or dislocation of the\nelbow.  There is a non-displaced fracture of the right radial styloid with the\nfracture line likely extending into the joint.\n\nTHREE VIEWS OF THE LEFT ELBOW:  There is no fracture or dislocation of the\nleft elbow, or of joint effusion.  The radius and ulna are unremarkable.\n\nIMPRESSION:  Non-displaced transverse fracture of the distal radial styloid\nwith the minimally displaced fracture line extending to the radiocarpal joint\nspace.\n\nThese findings were discussed with the ACS intern at 2100 on ___ in\nperson.\n\\BEGIN NEW REPORT:___\nSTUDY:  Right wrist, ___.\n\nCLINICAL HISTORY:  ___ man, status post trauma, status post reduction.\n\nFINDINGS:  Comparison is made to prior radiographs from ___.\n\nThere is again seen a fracture involving the radial styloid.  Fine bony detail\nis somewhat limited due to overlying cast material.  The rest of the bony\nstructures are grossly intact.\n\\BEGIN NEW REPORT:___\nSTUDY:  Right foot, ___.\n\nCLINICAL HISTORY:  Patient with trauma.  Status post reduction.\n\nFINDINGS:  Comparison is made to the prior radiographs from ___\n\nThree views of the right foot again demonstrate a fracture involving the base\nof the first metatarsal.  There is medial displacement of the first ray in\nrelation to the first TMT joint.  A prominent lateral butterfly fragment at\nthe base is seen.  Fine bony detail is very limited due to the overlying\nsplint material.\n\\BEGIN NEW REPORT:___\nHISTORY:  ORIF.\n\nFINDINGS:  Images from the operating suite show placement of a metallic pin\nacross the interphalangeal joint of the great toe.  Further information can be\ngathered from the operative report.\n", "discharge_instructions": "******SIGNS OF INFECTION**********\nPlease return to the emergency department or notify MD if you \nshould experience severe pain, increased swelling, decreased \nsensation, difficulty with movement; fevers >101.5, chills, \nredness or drainage at the incision site; chest pain, shortness \nof breath or any other concerns.\n-Wound Care: You can get the wound wet/take a shower starting \nfrom 3 days post-op. No baths or swimming for at least 4 weeks. \nAny stitches or staples that need to be removed will be taken \nout at your 2-week follow up appointment. No dressing is needed \nif wound continued to be non-draining.\n\n******WEIGHT-BEARING*******\nRight arm weight bearing as tolerated through the elbow but nonn \nweight bearing at wrist(cast), Right leg touch down weight \nbearing through the heel (splint); Left leg weight bearing as \ntolerated through the heel (boot)\n\n******MEDICATIONS***********\n- Resume your pre-hospital medications.\n- You have been given medication for your pain control. Please \ndo not operate heavy machinery or drink alcohol when taking this \nmedication. As your pain improves please decrease the amount of \npain medication. This medication can cause constipation, so you \nshould drink ___ glasses of water daily and take a stool \nsoftener (colace) to prevent this side effect.\n-Medication refills cannot be written after 12 noon on ___.\n\n*****ANTICOAGULATION******\n- Take Lovenox for DVT prophylaxis for 2 weeks post-operatively.", "brief_hospital_course": "The patient was admitted to the Orthopaedic Trauma Service for \nrepair fractures of R ___ MT,L great toe,R distal radius. The \npatient was taken to the OR and underwent an uncomplicated ORIF \nR ___ MT,CRPP L great toe,casting R DRF. The patient tolerated \nthe procedure without complications and was transferred to the \nPACU in stable condition.  Please see operative report for \ndetails. Post operatively pain was controlled with a PCA with a \ntransition to PO pain meds once tolerating POs.  The patient \ntolerated diet advancement without difficulty and made steady \nprogress with ___.\n\nWeight bearing status: RUE WBAT through the elbow but NWB \nwrist(cast), RLE TDWB through the heel (splint), LLE WBAT \nthrough the heel (heal boot).\n\nThe patient received ___ antibiotics as well as \nlovenox for DVT prophylaxis.  The incision was clean, dry, and \nintact without evidence of erythema or drainage; and the \nextremity was NVI distally throughout.  The patient was \ndischarged in stable condition with written instructions \nconcerning precautionary instructions and the appropriate \nfollow-up care.  The patient will be continued on chemical DVT \nprophylaxis for 4 weeks post-operatively.  All questions were \nanswered prior to discharge and the patient expressed readiness \nfor discharge."}

14. {"hadm_id": 21070242, "radiology_text": "INDICATION:  This year old female with fevers and 50.\n\nTECHNIQUE:  Chest PA and lateral\n\nCOMPARISON:  None available.\n\nFINDINGS: \n\nPA and lateral chest radiograph demonstrates clear lungs bilaterally. No focal\nconsolidation convincing for pneumonia is identified. Cardiomediastinal and\nhilar contours are within normal limits. There is no pleural effusion or\npneumothorax. Osseous structures are without an acute abnormality.\n\nIMPRESSION: \n\nNo focal opacity convincing for pneumonia.\n\\BEGIN NEW REPORT:___\nEXAMINATION:  LIVER OR GALLBLADDER US (SINGLE ORGAN)\n\nINDICATION:  ___ with elevated LFTs and fevers\n\nTECHNIQUE:  Grey scale and color Doppler ultrasound images of the abdomen were\nobtained.\n\nCOMPARISON:  None available.\n\nFINDINGS: \n\nLIVER: The hepatic parenchyma appears within normal limits.The contour of the\nliver is smooth. There is no focal liver mass.  Main portal vein is patent\nwith hepatopetal flow. There is no ascites.\n\nBILE DUCTS: There is no intrahepatic biliary dilation. The CBD measures 2 mm.\n\nGALLBLADDER: There is no evidence of stones or gallbladder wall thickening.\nUnremarkable appearance of the liver, no biliary ductal dilatation.\n\nPANCREAS: Imaged portion of the pancreas appears within normal limits, without\nmasses or pancreatic ductal dilation, with portions of the pancreatic tail\nobscured by overlying bowel gas.\n\nSPLEEN:  The spleen appears heterogeneously echogenic which may reflect\nchronic changes in this patient with sickle cell disease. The spleen measures\n7.5 cm in dimension.\n\n\nIMPRESSION: \n\n1.  No acute intra-abdominal process.\n2.  Heterogeneously echogenic spleen may reflect chronic changes in setting of\nsickle cell disease and prior infarcts.\n\nNOTIFICATION:  Updated wet read paged to Dr. ___.\n", "discharge_instructions": "You presented to the hospital with post-op fever.  Initially \nthere was concern for infection, however, all your infectious \nwork-up was NEGATIVE and the Infectious Disease doctors ___ \nthat ___ was unlikely.  After discussion with the Blood \nBank and the Hematology consult team, your fever was felt to be \nlikely due to a delayed tranfusion reaction to blood you \nreceived during your recent hip replacement surgery.  Because of \nthe transfusion reaction, you had worsening anemia / sickle \ncrisis, which responded well to supportive treatment.  We \nrecommend that you take an increased dose of folic acid till \nfollo-wup with your PCP or ___.  We also recommend that \nyou have your family members have their blood screened to be \npotential donors for you in the future.  You can contact the \n___ Cross at ___.\n.\nPlease take your medications as directed.\n.\nPlease see your physicians as listed.", "brief_hospital_course": "___ yo F with sickle cell disease who presents with 3 weeks of \nfevers and chills s/p R hip replacement, initially thought to be \ninfectious in nature, ultimately felt to be due to delayed \ntransfusion reaction, further complicated by mild sickle cell \ncrisis.  \n\n# Fever, likely delayed transfusion reaction\nInitially given pt's fever and leukocytosis, there was high \nconcern for underlying infection.  Given elevated LFT's, RUQ US \nwas obtained in the ED, however, no evidence of cholangitis.  \nPatient also had clear CXR without evidence of PNA.  Her right \nhip replacement wound did not appear infected and she had \nexcellent mobility and ROM, making post-op wound infection or \nseptic prosthetic joint highly unlikely.  Given h/o of murmur, \nshe underwent TTE, but there was no obvious vegetation.  She was \ninitially placed on IV Ceftriaxone but after being seen by the \nInfectious Disease consult, this was stopped and she remained \nafebrile for more than 72 hours after stopping abx.  Given \nelevated LFT's, tick-borne illness was considered, but parasite \nsmear and Anaplasma serologies all NEGATIVE.  HIV Ab and VL were \nNEGATIVE, as were CMV VL, HCV VL and HSV Ab's.  Her CMV IgG was \npositive, but CMV IgM was NEGATIVE.  Flu swab was NEGATIVE and \nall blood cultures drawn showed NGTD.  Overall infection was \nfelt to be unlikely.  The most likely etiology of her fever and \nleukocytosis was a delayed transfusion reaction, given that she \nhad received 2 units PRBC pre-operatively and she has history of \nmany allo-antibodies.  She was seen by Hematology Consult and \nBlood Bank also reviewed her blood sample, and it was felt that \nher overall picture was highly consistent with delayed \ntransfusion reaction.  As noted above, she defervesced without \nfurther treatment or intervention and remained clinically stable \nand asymptomatic.  \n\n# Sickle Cell Disease, acute crisis\nPatient initially presented with baseline Hct, however, her Hct \nquickly dropped to approximately 19, which is below her \nbaseline.  Of note, she also had elevated LFT's, including T. \nbili, and elevated LDH, low haptoglobin and high retic #, \noverall picture consistent with a sickle crisis.  The sickle \ncrisis was likely triggered by her delayed transfusion reaction. \n She also had some bony pain in her knees and ankle.  She did \nnot have cough or chest pain.  She was placed on high-dose folic \nacid and O2 nasal cannula.  She initially complained of some \nfatigue during her ___ sessions, but her symptoms eventually \nresolved as her Hct stabilized above 20.  Hematology had \ninitially recommended transfusing PRBC's for goal Hct >20, \nhowever, due to presence of many alloantibodies, suitable PRBC's \ncould not be located.  As such patient did not receive any \nPRBC's during this hospitalization.  She is being discharged on \nhigh-dose folic acid and will continue until f/u with \nHematology.  Both Blood Bank and Hematology recommended that the \npatient have her closest family members screened by ___ \nCross for potential matching blood donor.  Patient will have her \nchildren tested to be donors (typically would ask for closely \nlikely match with pt's siblings, however, all of pt's siblings \nare only half-siblings).  Lastly, pt felt that supplemental O2 \nwas helpful in overcoming her sickle crisis, as such, inpatient \nHematology will coordinate with outpatient Hematology to try to \nobtain home O2 to be used PRN.\n\n# s/p R hip replacement\nPatient's hip showed excellent ROM, and as noted above, low \nconcern for septic joint.  She was able to ambulate and was seen \nby the ___ consult service to continue ___ during her \nhospitalization.  She was placed on heparin SQ for DVT \nprophylaxis.  She will continue with home ___ on discharge and \nwill f/u with her outpatient orthopedist as previously \nscheduled.  Per patient, her orthopedist did not recommend more \nthan 2 weeks of post-op DVT prophylaxis and since her surgery \nwas approximately 1 month ago, she is being discharged WITHOUT \nany further DVT prophylaxis.\n."}



1.0

In [5]:
import json
from collections import OrderedDict

def filter_json_objects(jsonl_lines):
    keys_to_keep = ["hadm_id", "radiology_text", "discharge_instructions", "brief_hospital_course"]
    filtered_output = []

    for line in jsonl_lines:
        line = line.strip()
        if not line:
            continue
        try:
            data = json.loads(line)
            filtered_data = OrderedDict((key, data[key]) for key in keys_to_keep if key in data)
            filtered_output.append(filtered_data)
        except json.JSONDecodeError as e:
            print("Skipping line due to JSON error:", e)

    return filtered_output

path = '/home/IAIS/jdatta/mimic_syntheticData/data.jsonl'

with open(path, "r", encoding="utf-8") as f:
    jsonl_lines = f.readlines()

filtered = filter_json_objects(jsonl_lines)

# Print first entry as a single JSONL line with correct key order
print(json.dumps(filtered[0], ensure_ascii=False))

{"hadm_id": 21070242, "radiology_text": "INDICATION:  This year old female with fevers and 50.\n\nTECHNIQUE:  Chest PA and lateral\n\nCOMPARISON:  None available.\n\nFINDINGS: \n\nPA and lateral chest radiograph demonstrates clear lungs bilaterally. No focal\nconsolidation convincing for pneumonia is identified. Cardiomediastinal and\nhilar contours are within normal limits. There is no pleural effusion or\npneumothorax. Osseous structures are without an acute abnormality.\n\nIMPRESSION: \n\nNo focal opacity convincing for pneumonia.\n\\BEGIN NEW REPORT:___\nEXAMINATION:  LIVER OR GALLBLADDER US (SINGLE ORGAN)\n\nINDICATION:  ___ with elevated LFTs and fevers\n\nTECHNIQUE:  Grey scale and color Doppler ultrasound images of the abdomen were\nobtained.\n\nCOMPARISON:  None available.\n\nFINDINGS: \n\nLIVER: The hepatic parenchyma appears within normal limits.The contour of the\nliver is smooth. There is no focal liver mass.  Main portal vein is patent\nwith hepatopetal flow. There is no 