In [2]:
import os
import json
import csv
from glob import glob

# Folder and output path
input_folder = 'fhir'
output_csv = 'fhir_patient_summary_with_city_and_covid.csv'

# CSV headers
csv_headers = [
    'PatientID', 'Name', 'DOB', 'Gender', 'Race', 'Ethnicity',
    'City', 'EncounterCount', 'CovidConfirmed',
    'Conditions', 'DiagnosisDates', 'CarePlanActivities',
    'Devices', 'TotalClaimAmount', 'Notes'
]

rows = []

# Only process the first 100 files
filepaths = sorted(glob(os.path.join(input_folder, '*.json')))

for filepath in filepaths:
    with open(filepath, 'r') as f:
        data = json.load(f)

    # Containers
    patient = {}
    cities = []
    encounter_count = 0
    covid_confirmed = False
    conditions = []
    careplans = []
    devices = []
    claims = []
    notes = []

    for entry in data.get('entry', []):
        resource = entry.get('resource', {})
        rtype = resource.get('resourceType')

        if rtype == 'Patient':
            patient = {
                'PatientID': resource.get('id'),
                'Name': ' '.join(resource['name'][0].get('given', [])) + ' ' + resource['name'][0].get('family', ''),
                'DOB': resource.get('birthDate', ''),
                'Gender': resource.get('gender', ''),
                'Race': '',
                'Ethnicity': ''
            }

            # Race & Ethnicity
            for ext in resource.get('extension', []):
                if 'us-core-race' in ext.get('url', ''):
                    patient['Race'] = ext['extension'][1].get('valueString', '')
                if 'us-core-ethnicity' in ext.get('url', ''):
                    patient['Ethnicity'] = ext['extension'][1].get('valueString', '')

            # City
            for addr in resource.get('address', []):
                if 'city' in addr:
                    cities.append(addr['city'])

        elif rtype == 'Encounter':
            encounter_count += 1

        elif rtype == 'Condition':
            conditions.append({
                'text': resource['code']['text'],
                'date': resource.get('onsetDateTime', '')
            })
            # Check for COVID-19 code
            for coding in resource.get('code', {}).get('coding', []):
                if coding.get('code') in ['840539006', '840544004', '840546002', 'U07.1']:
                    covid_confirmed = True

        elif rtype == 'CarePlan':
            for act in resource.get('activity', []):
                if 'detail' in act:
                    careplans.append(act['detail']['code']['text'])

        elif rtype == 'Device':
            devices.append(resource.get('type', {}).get('text', ''))

        elif rtype == 'Claim':
            claims.append(float(resource.get('total', {}).get('value', 0)))

        elif rtype == 'DiagnosticReport':
            for form in resource.get('presentedForm', []):
                if form.get('contentType', '').startswith('text') and 'data' in form:
                    notes.append('[Note Present]')

    # Build one row per patient
    rows.append({
        'PatientID': patient.get('PatientID', ''),
        'Name': patient.get('Name', ''),
        'DOB': patient.get('DOB', ''),
        'Gender': patient.get('Gender', ''),
        'Race': patient.get('Race', ''),
        'Ethnicity': patient.get('Ethnicity', ''),
        'City': '; '.join(set(cities)),
        'EncounterCount': encounter_count,
        'CovidConfirmed': covid_confirmed,
        'Conditions': '; '.join([c['text'] for c in conditions]),
        'DiagnosisDates': '; '.join([c['date'] for c in conditions]),
        'CarePlanActivities': '; '.join(careplans),
        'Devices': '; '.join(devices),
        'TotalClaimAmount': sum(claims),
        'Notes': ' '.join(notes)
    })

# Write CSV
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=csv_headers)
    writer.writeheader()
    writer.writerows(rows)

print(f"✅ Final CSV created with {len(rows)} patients at {output_csv}")

✅ Final CSV created with 1481 patients at fhir_patient_summary_with_city_and_covid.csv
