In [None]:
# Get Patient list
# read patients_to_filter.txt to list

patient_list = []
with open("patients_to_filter.txt", "r") as file:
    for line in file:
        patient_list.append(line.strip())
patient_list

# Getting Patients with specific code

In [None]:
import requests
import json


# Function to get patients with a specific condition code
def get_patients_with_condition(condition_code, base_url="http://localhost:8080/fhir/"):
    patients = []
    url = f"{base_url}Condition?code={condition_code}"

    while url:
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")
            break

        data = response.json()
        if not data.get("entry"):
            break

        # Extract patient IDs from the current page
        for entry in data["entry"]:
            patient_id = entry["resource"]["subject"]["reference"].split("/")[-1]
            patients.append(patient_id)

        print(f"Found {len(patients)} patients so far...")

        # Look for the next page URL
        url = None
        for link in data.get("link", []):
            if link.get("relation") == "next":
                url = link.get("url")
                break

    return patients

In [None]:
patient_list = get_patients_with_condition("I82401")

In [None]:
# create new patient ids to filter text file
with open("patient_ids_to_filter.txt", "w") as file:
    for patient in patient_list:
        file.write(f"{patient}\n")

In [None]:
# call FHIR API to get patient data and then for each patient_id get patient identifier key_value pairs
# hapi fhir is running on localhost:8080/fhir

In [None]:
import requests

patient_to_identifier = {}
for patient_id in patient_list:
    response = requests.get(f"http://localhost:8080/fhir/Patient/{patient_id}")
    if response.status_code == 200:
        patient_data = response.json()
        identifiers = patient_data.get("identifier", [])
        identifiers = [identifier["value"] for identifier in identifiers][-1]
        patient_to_identifier[patient_id] = identifiers
    else:
        print(f"Failed to fetch data for patient {patient_id}: {response.status_code}")

In [None]:
patient_to_identifier

# Get Notes

In [None]:
# now load all the notes csv files from fhir_notes folder and filter rows that subject column matches one of these patient identifiers
# then write the filtered notes to a new folder called filtered_fhir_notes and same filename as original csv file with csv extension

import pandas as pd
import os
import glob

# Create the filtered_fhir_notes directory if it doesn't exist
filtered_notes_dir = "filtered_fhir_notes"
os.makedirs(filtered_notes_dir, exist_ok=True)

# Get all CSV files in the fhir_notes directory
notes_files = glob.glob("fhir_notes/*.csv")

subject_ids = list(patient_to_identifier.values())

for file_path in notes_files:
    print(f"Processing file: {file_path}")
    df = pd.read_csv(file_path)
    # Filter rows where the 'subject_id' column matches any of the patient identifiers
    filtered_df = df[df["subject_id"].astype(str).str.strip().isin(subject_ids)]
    print(f"Filtered {len(filtered_df)} rows from {len(df)} total rows in {file_path}")

    # Write the filtered DataFrame to a new CSV file in the filtered_fhir_notes directory
    output_file_path = os.path.join(filtered_notes_dir, os.path.basename(file_path))
    filtered_df.to_csv(output_file_path, index=False)
    print(f"Filtered notes written to {output_file_path}")