#Data (no need to rerun)

In [1]:
# just upload zip file

Saving mimic-iii-clinical-database-demo-1.4.zip to mimic-iii-clinical-database-demo-1.4.zip


In [2]:
# Extract and Load All Tables
import pandas as pd
import os
import json
from zipfile import ZipFile

# Extract uploaded ZIP
zip_filename = list(uploaded.keys())[0]
extract_path = '/content/mimiciii_demo'
with ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Locate actual CSV path
nested_dir = os.path.join(extract_path, 'mimic-iii-clinical-database-demo-1.4')

# Load relevant CSVs
csv_files = {
    'PATIENTS': pd.read_csv(f"{nested_dir}/PATIENTS.csv"),
    'ADMISSIONS': pd.read_csv(f"{nested_dir}/ADMISSIONS.csv"),
    'ICUSTAYS': pd.read_csv(f"{nested_dir}/ICUSTAYS.csv"),
    'CHARTEVENTS': pd.read_csv(f"{nested_dir}/CHARTEVENTS.csv", low_memory=False),
    'LABEVENTS': pd.read_csv(f"{nested_dir}/LABEVENTS.csv"),
    'INPUTEVENTS': pd.read_csv(f"{nested_dir}/INPUTEVENTS_CV.csv", low_memory=False),
    'OUTPUTEVENTS': pd.read_csv(f"{nested_dir}/OUTPUTEVENTS.csv"),
    'PRESCRIPTIONS': pd.read_csv(f"{nested_dir}/PRESCRIPTIONS.csv"),
    'NOTEEVENTS': pd.read_csv(f"{nested_dir}/NOTEEVENTS.csv"),
}

In [3]:
# Build Patient-wise JSON
# Use lowercase 'subject_id' for consistency
subject_col = 'subject_id'
patient_jsons = {}

for subject_id in csv_files['PATIENTS'][subject_col].unique():
    patient_record = {}
    for table_name, df in csv_files.items():
        if subject_col in df.columns:
            filtered = df[df[subject_col] == subject_id]
            filtered = filtered.drop(columns=[col for col in filtered.columns if 'row_id' in col], errors='ignore')
            if not filtered.empty:
                patient_record[table_name] = filtered.to_dict(orient='records')
    patient_jsons[str(subject_id)] = patient_record

### note this json contains all info (not what I used for training)

In [4]:
# Save and Download JSON
json_path = '/content/mimiciii_full_patients.json'
with open(json_path, 'w') as f:
    json.dump(patient_jsons, f, indent=2)

# Download it
files.download(json_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [5]:
# Preview what json file looks like (print first patient and print first 1–2 entries per table)
json_path = '/content/mimiciii_full_patients.json'
with open(json_path, 'r') as f:
    patient_data = json.load(f)

first_patient_id = list(patient_data.keys())[0]
first_patient = patient_data[first_patient_id]

print(f"📌 Patient ID: {first_patient_id}")
print("📁 Tables included:", list(first_patient.keys()))

for table, rows in first_patient.items():
    print(f"\n📄 {table} ({len(rows)} records):")
    for i, row in enumerate(rows[:2]):
        print(f"Record {i+1}:")
        print(json.dumps(row, indent=2))

📌 Patient ID: 10006
📁 Tables included: ['PATIENTS', 'ADMISSIONS', 'ICUSTAYS', 'CHARTEVENTS', 'LABEVENTS', 'INPUTEVENTS', 'OUTPUTEVENTS', 'PRESCRIPTIONS']

📄 PATIENTS (1 records):
Record 1:
{
  "subject_id": 10006,
  "gender": "F",
  "dob": "2094-03-05 00:00:00",
  "dod": "2165-08-12 00:00:00",
  "dod_hosp": "2165-08-12 00:00:00",
  "dod_ssn": "2165-08-12 00:00:00",
  "expire_flag": 1
}

📄 ADMISSIONS (1 records):
Record 1:
{
  "subject_id": 10006,
  "hadm_id": 142345,
  "admittime": "2164-10-23 21:09:00",
  "dischtime": "2164-11-01 17:15:00",
  "deathtime": NaN,
  "admission_type": "EMERGENCY",
  "admission_location": "EMERGENCY ROOM ADMIT",
  "discharge_location": "HOME HEALTH CARE",
  "insurance": "Medicare",
  "language": NaN,
  "religion": "CATHOLIC",
  "marital_status": "SEPARATED",
  "ethnicity": "BLACK/AFRICAN AMERICAN",
  "edregtime": "2164-10-23 16:43:00",
  "edouttime": "2164-10-23 23:00:00",
  "diagnosis": "SEPSIS",
  "hospital_expire_flag": 0,
  "has_chartevents_data": 1
}



### For now, use data of just 3 patient overall personal/administrative info to train (left out stuff like hundreds of lab test result entries, prescriptions, etc. which is too big for this training)

In [13]:
json_path = '/content/mimiciii_full_patients.json'

# Load JSON
with open(json_path, 'r') as f:
    all_data = json.load(f)

# Filter: Keep only PATIENTS, ADMISSIONS, ICUSTAYS
filtered_data = {
    pid: {k: v for k, v in tables.items() if k in ['PATIENTS', 'ADMISSIONS', 'ICUSTAYS']}
    for pid, tables in all_data.items()
}

# Save new JSON
filtered_json_path = '/content/mimic_small.json'
with open(filtered_json_path, 'w') as f:
    json.dump(filtered_data, f, indent=2)

# Download
files.download(filtered_json_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### I used gpt-4o to convert mimic_small from .json to .jsonl
### (see prompt definition below after the training for example of the "instruction" and "input" fields I used)

#Model Training

In [1]:
!pip install -q peft transformers accelerate bitsandbytes datasets

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m117.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m95.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

In [3]:
from huggingface_hub import login
login("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")  # ← paste your token here just once

### Used Llama-3-8b-Instruct as base model (compartivly small, can just use lora to finetune)

In [4]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    token=True
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [5]:
from peft import LoraConfig, get_peft_model, TaskType

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


In [6]:
import os
os.environ["WANDB_DISABLED"] = "true"

# Using hugging face dataset (need to use numpy 1.xx or there will be error)

In [37]:
#!pip install numpy==1.24.4 --quiet

In [28]:
from datasets import Dataset
from transformers import Trainer, TrainingArguments, default_data_collator
from torch.utils.data import Dataset as TorchDataset
import torch
import json

# Read and fix the JSONL
with open("/content/mimic_all_fixed.jsonl", "r", encoding="utf-8") as f:
    fixed_data = []
    for line in f:
        entry = json.loads(line)
        # If output is already a dict, we will convert it to a string for training
        if isinstance(entry["output"], dict):
            entry["output"] = json.dumps(entry["output"], ensure_ascii=False)
        fixed_data.append(entry)

In [29]:
# Convert to HuggingFace Dataset
hf_dataset = Dataset.from_list(fixed_data)

In [30]:
hf_dataset[0]  # Preview

{'instruction': 'You are a synthetic patient data generator. Your task is to generate virtual ICU patient data.\nIMPORTANT: return only a valid JSON object, with no preamble, no python code.',
 'input': '',
 'output': '"{\\"10006\\": {\\"PATIENTS\\": [{\\"subject_id\\": 10006, \\"gender\\": \\"F\\", \\"dob\\": \\"2094-03-05 00:00:00\\", \\"dod\\": \\"2165-08-12 00:00:00\\", \\"dod_hosp\\": \\"2165-08-12 00:00:00\\", \\"dod_ssn\\": \\"2165-08-12 00:00:00\\", \\"expire_flag\\": 1}], \\"ADMISSIONS\\": [{\\"subject_id\\": 10006, \\"hadm_id\\": 142345, \\"admittime\\": \\"2164-10-23 21:09:00\\", \\"dischtime\\": \\"2164-11-01 17:15:00\\", \\"deathtime\\": NaN, \\"admission_type\\": \\"EMERGENCY\\", \\"admission_location\\": \\"EMERGENCY ROOM ADMIT\\", \\"discharge_location\\": \\"HOME HEALTH CARE\\", \\"insurance\\": \\"Medicare\\", \\"language\\": NaN, \\"religion\\": \\"CATHOLIC\\", \\"marital_status\\": \\"SEPARATED\\", \\"ethnicity\\": \\"BLACK/AFRICAN AMERICAN\\", \\"edregtime\\": \\"2

In [27]:
# Tokenizer setup
tokenizer.pad_token = tokenizer.eos_token

def tokenize(example):
    prompt = f"{example['instruction']}\n\n{example['input']}".strip()
    input_tokens = tokenizer(prompt, padding="max_length", truncation=True, max_length=1024)
    output_tokens = tokenizer(example["output"], padding="max_length", truncation=True, max_length=1024)
    input_tokens["labels"] = output_tokens["input_ids"]
    return input_tokens

In [17]:
# Apply tokenizer and format
tokenized_dataset = hf_dataset.map(tokenize, remove_columns=hf_dataset.column_names)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [23]:
# Training args
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    num_train_epochs=10, # change to train more/less epochs
    logging_dir="./logs",
    report_to="none",  # Disable wandb and others
)

In [24]:
# Initialize
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=default_data_collator,
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [25]:
# Step 8: Train
trainer.train()

ValueError: Unable to avoid copy while creating an array as requested.
If using `np.array(obj, copy=False)` replace it with `np.asarray(obj)` to allow a copy when needed (no behavior change in NumPy 1.x).
For more details, see https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword.

# Not using hugging face (preferred)

In [7]:
import json
import torch
from torch.utils.data import Dataset as TorchDataset
from transformers import Trainer, TrainingArguments

# Step 1: Load mimic_all.jsonl
with open("/content/mimic_all_fixed.jsonl", "r", encoding="utf-8") as f:
    raw_data = [json.loads(line) for line in f]

# Step 2: Ensure output is a JSON object string (if not already)
for ex in raw_data:
    if isinstance(ex["output"], dict):
        ex["output"] = json.dumps(ex["output"])

# Step 3: Fix tokenizer padding
tokenizer.pad_token = tokenizer.eos_token

# Step 4: Tokenize each example
def tokenize_example(example):
    prompt = example["instruction"].strip()
    output = example["output"].strip()

    full_text = prompt + "\n" + output
    tokenized = tokenizer(full_text, truncation=True, padding="max_length", max_length=1024)

    # Compute label mask to ignore prompt tokens
    prompt_len = len(tokenizer(prompt, truncation=True, max_length=1024)["input_ids"])
    labels = [-100] * prompt_len + tokenized["input_ids"][prompt_len:]

    # Pad labels if needed
    labels += [-100] * (1024 - len(labels))
    labels = labels[:1024]

    return {
        "input_ids": torch.tensor(tokenized["input_ids"]),
        "attention_mask": torch.tensor(tokenized["attention_mask"]),
        "labels": torch.tensor(labels)
    }


# Step 5: Custom PyTorch-compatible dataset
class CustomDataset(TorchDataset):
    def __init__(self, data):
        self.samples = [tokenize_example(ex) for ex in data]

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]

train_dataset = CustomDataset(raw_data)

In [8]:
# preview
train_dataset[0]

{'input_ids': tensor([128000,   2675,    527,  ..., 128009, 128009, 128009]),
 'attention_mask': tensor([1, 1, 1,  ..., 0, 0, 0]),
 'labels': tensor([  -100,   -100,   -100,  ..., 128009, 128009, 128009])}

In [9]:
# Step 6: Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    num_train_epochs=30,
    logging_dir="./logs",
    report_to="none",  # disables wandb
    save_strategy="no",
    label_names=["labels"]
)

# Step 7: Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)

  trainer = Trainer(


In [13]:
# Step 8: Train
trainer.train()

Step,Training Loss
500,0.2686
1000,0.1124
1500,0.0605


TrainOutput(global_step=1500, training_loss=0.14714817174275716, metrics={'train_runtime': 885.4954, 'train_samples_per_second': 3.388, 'train_steps_per_second': 1.694, 'total_flos': 1.38456399347712e+17, 'train_loss': 0.14714817174275716, 'epoch': 30.0})

In [14]:
model.save_pretrained("./llama3-lora-final")
tokenizer.save_pretrained("./llama3-lora-final")

('./llama3-lora-final/tokenizer_config.json',
 './llama3-lora-final/special_tokens_map.json',
 './llama3-lora-final/chat_template.jinja',
 './llama3-lora-final/tokenizer.json')

### model download (just the lora adapter)

In [15]:
!zip -r llama3-lora-final.zip llama3-lora-final

  adding: llama3-lora-final/ (stored 0%)
  adding: llama3-lora-final/tokenizer_config.json (deflated 96%)
  adding: llama3-lora-final/adapter_model.safetensors (deflated 7%)
  adding: llama3-lora-final/chat_template.jinja (deflated 52%)
  adding: llama3-lora-final/tokenizer.json (deflated 85%)
  adding: llama3-lora-final/adapter_config.json (deflated 55%)
  adding: llama3-lora-final/special_tokens_map.json (deflated 63%)
  adding: llama3-lora-final/README.md (deflated 66%)


In [16]:
from google.colab import files
files.download("llama3-lora-final.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# New model evaluation and testing

In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# Load base model + adapter
peft_model_path = "/content/llama3-lora-final"
config = PeftConfig.from_pretrained(peft_model_path)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load adapter
new_model = PeftModel.from_pretrained(base_model, peft_model_path, torch_dtype=torch.float16)
new_model = new_model.to("cuda")
new_model = torch.compile(new_model)
new_model.eval()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

OptimizedModule(
  (_orig_mod): PeftModelForCausalLM(
    (base_model): LoraModel(
      (model): LlamaForCausalLM(
        (model): LlamaModel(
          (embed_tokens): Embedding(128256, 4096)
          (layers): ModuleList(
            (0-31): 32 x LlamaDecoderLayer(
              (self_attn): LlamaAttention(
                (q_proj): lora.Linear(
                  (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=4096, out_features=8, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(in_features=8, out_features=4096, bias=False)
                  )
                  (lora_embedding_A): ParameterDict()
                  (lora_embedding_B): ParameterDict()
                  (lora_magnitude_ve

In [18]:
prompt = "You are a synthetic patient data generator. Your task is to generate virtual ICU patient data.\nIMPORTANT: return only a valid JSON object, with no preamble, no python code.\n\n"

### try make one new patient data

In [19]:
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    outputs = new_model.generate(
        **inputs,
        max_new_tokens=600,
        do_sample=True,
        temperature=0.5,
        top_p=0.97,
        pad_token_id=tokenizer.eos_token_id,
    )

synthetic_output = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(synthetic_output)

You are a synthetic patient data generator. Your task is to generate virtual ICU patient data.
IMPORTANT: return only a valid JSON object, with no preamble, no python code.

"{\"44154\": {\"PATIENTS\": [{\"subject_id\": 44154, \"gender\": \"F\", \"dob\": \"2051-10-13 00:00:00\", \"dod\": \"2107-07-24 00:00:00\", \"dod_hosp\": NaN, \"dod_ssn\": \"2107-07-24 00:00:00\", \"expire_flag\": 1}], \"ADMISSIONS\": [{\"subject_id\": 44154, \"hadm_id\": 725711, \"admittime\": \"2105-07-06 04:19:00\", \"dischtime\": \"2105-07-09 18/19:00\", \"deathtime\": NaN, \"admission_type\": \"ELECTIVE\", \"admission_location\": \"PHYS REFERRAL\", \"discharge_location\": \"HOME\", \"insurance\": \"Private\", \"language\": \"ENGL\", \"marital_status\": \"S\", \"ethnicity\": \"ENGLIS\", \"edregtime\": NaN, \"edouttime\": NaN, \"discharge_status\": \"DISCHDED\", \"chartdate\": \"2105-07-09 00:00:00\", \"chartreadIndicator\": 1}], \"ICUSTAYS\": [{\"subject_id\": 44154, \"hadm_id\": 725711, \"icustay_id\": 966690,

### try to make another one

In [20]:
inputs = tokenizer(prompt, return_tensors="pt").to(new_model.device)

with torch.no_grad():
    outputs = new_model.generate(
        **inputs,
        max_new_tokens=600,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id,
    )

synthetic_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(synthetic_output)

You are a synthetic patient data generator. Your task is to generate virtual ICU patient data.
IMPORTANT: return only a valid JSON object, with no preamble, no python code.

"{\"44221\": {\"PATIENTS\": [{\"subject_id\": 44221, \"gender\": \"M\", \"dob\": \"2068-06-14 00:00:00\", \"dod\": \"2112-04-12 00:00:00\", \"dod_hosp\": \"2112-04-12 00:00:00\", \"dod_ssn\": NaN, \"expire_flag\": 1}], \"ADMISSIONS\": [{\"subject_id\": 44221, \"hadm_id\": 713686, \"admittime\": \"2111-12-08 13:50:00\", \"dischtime\": \"2111-12-11 15:05:00\", \"deathtime\": NaN, \"admission_type\": \"EMERGENCY\", \"admission_location\": \"EMERGENCY ROOM ADMIT\", \"discharge_location\": \"HOME\", \"insurance\": \"Medicare\", \"language\": \"ENGL\", \"marital_status\": \"WIDOWED\", \"ethnicity\": \"WHITE\", \"religion\": \"CATHOLIC\", \"edregtime\": \"2111-12-08 11:52:00\", \"edouttime\": \"2111-12-08 14:25:00\", \"diagnosis_entity\": \"PHYSIANS ORDERS\", \"discharge_status\": \"DISCHGED\", \"chartdate\": 21111108}, \

In [21]:
print("Prompt:\n", prompt)
print("Output:\n", tokenizer.decode(outputs[0], skip_special_tokens=True).strip())

Prompt:
 You are a synthetic patient data generator. Your task is to generate virtual ICU patient data.
IMPORTANT: return only a valid JSON object, with no preamble, no python code.


Output:
 You are a synthetic patient data generator. Your task is to generate virtual ICU patient data.
IMPORTANT: return only a valid JSON object, with no preamble, no python code.

"{\"44221\": {\"PATIENTS\": [{\"subject_id\": 44221, \"gender\": \"M\", \"dob\": \"2068-06-14 00:00:00\", \"dod\": \"2112-04-12 00:00:00\", \"dod_hosp\": \"2112-04-12 00:00:00\", \"dod_ssn\": NaN, \"expire_flag\": 1}], \"ADMISSIONS\": [{\"subject_id\": 44221, \"hadm_id\": 713686, \"admittime\": \"2111-12-08 13:50:00\", \"dischtime\": \"2111-12-11 15:05:00\", \"deathtime\": NaN, \"admission_type\": \"EMERGENCY\", \"admission_location\": \"EMERGENCY ROOM ADMIT\", \"discharge_location\": \"HOME\", \"insurance\": \"Medicare\", \"language\": \"ENGL\", \"marital_status\": \"WIDOWED\", \"ethnicity\": \"WHITE\", \"religion\": \"CATHO