In [7]:
import json
import csv
from datetime import datetime

# Step 1: Define functions to read and parse JSON files
def read_json_file(file_path):
    with open(file_path, 'r') as file:
        data = [json.loads(line) for line in file]
    return data

# Step 2: Read and parse the provided JSON files
patients = read_json_file("Patient.ndjson")
conditions = read_json_file("Condition.ndjson")
encounters = read_json_file("Encounter.ndjson")
encounters_icu = read_json_file("EncounterICU.ndjson")


# Step 3: Create a dictionary to store conditions associated with each patient
patient_conditions = {}

# Associate conditions with patients
for condition in conditions:
    patient_id = condition['subject']['reference'].split('/')[-1]
    if patient_id not in patient_conditions:
        patient_conditions[patient_id] = []
    patient_conditions[patient_id].append(condition)

# Step 4: Associate estimated time for each condition using encounter start time
def get_encounter_start_time(condition):
    encounter_id = condition['encounter']['reference'].split('/')[-1]
    for encounter in encounters:
        if encounter['id'] == encounter_id:
            return encounter['period']['start']
    for encounter in encounters_icu:
        if encounter['id'] == encounter_id:
            return encounter['period']['start']
    return None

# Step 5: Generate CSV file with required columns
with open('patient_conditions.csv', 'w', newline='') as csvfile:
    fieldnames = ['pid', 'time', 'code', 'description']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for patient_id, condition_list in patient_conditions.items():
        for condition in condition_list:
            start_time = get_encounter_start_time(condition)
            if start_time:
                unix_timestamp = datetime.fromisoformat(start_time).timestamp()
                code = condition['code']['coding'][0]['code']
                description = condition['code'].get('text', 'N/A')  # Using get() to handle missing 'text' key
                writer.writerow({'pid': patient_id,
                                 'time': int(unix_timestamp),
                                 'code': code,
                                 'description': description})
