## Formating Example Data 
### For Model Training and BlockChain Testing

In [30]:
#Pandas for data organization and filtering
import pandas as pd
import json
import numpy as np

In [31]:
#Patient info Databases .iloc[: , 1:] removes row_id from each df
#Datasets from MIMIC-III Clinical Database Demo Located at: https://physionet.org/content/mimiciii-demo/1.4/
DRGCODES_df = pd.read_csv("Datasets/mimic-iii-clinical-database-demo-1.4/DRGCODES.csv").iloc[: , 1:]
CPTEVENTS_df = pd.read_csv("Datasets/mimic-iii-clinical-database-demo-1.4/CPTEVENTS.csv").iloc[: , 1:]
PATIENTS_df = pd.read_csv("Datasets/mimic-iii-clinical-database-demo-1.4/PATIENTS.csv").iloc[: , 1:4]
PRESCRIPTIONS_df = pd.read_csv("Datasets/mimic-iii-clinical-database-demo-1.4/PRESCRIPTIONS.csv").iloc[: , 1:]
DIAGNOSES_df = pd.read_csv("Datasets/mimic-iii-clinical-database-demo-1.4/DIAGNOSES_ICD.csv").iloc[: , 1:]

In [32]:
# Reference Data

# IDC9 Codes lookup database from: https://www.fda.gov/drugs/drug-approvals-and-databases/national-drug-code-directory
ICD9CODES_df = pd.read_csv("Datasets/ReferenceDataSets/icd9dx2015.csv")


In [33]:
#Adds new long description diagnoses column
DIAGNOSES_df['diagnosis'] = ""

#Translates icd9 codes to diagnoses and stores it in "diagnosis" column
icd9_to_description = ICD9CODES_df.set_index('dgns_cd')['longdesc']
DIAGNOSES_df['diagnosis'] = DIAGNOSES_df['icd9_code'].map(icd9_to_description)

#Removes nonpertinent data
DIAGNOSES_df = DIAGNOSES_df.iloc[:, [0] + list(range(3, DIAGNOSES_df.shape[1]))]
CPTEVENTS_df  = CPTEVENTS_df.iloc[:, [0] + list(range(7, 10)) + list(range(11, CPTEVENTS_df.shape[1]))]
DRGCODES_df = DRGCODES_df.iloc[:, [0] + list(range(3, 5)) + list(range(7, DRGCODES_df.shape[1]))]
PRESCRIPTIONS_df = PRESCRIPTIONS_df.iloc[:, [0] + list(range(3, PRESCRIPTIONS_df.shape[1]))]

In [34]:
#For Testing

print(DRGCODES_df.columns)
print(CPTEVENTS_df.columns)
print(PATIENTS_df.columns)
print(PRESCRIPTIONS_df.columns)
print(DIAGNOSES_df.columns)

Index(['subject_id', 'drg_code', 'description'], dtype='object')
Index(['subject_id', 'ticket_id_seq', 'sectionheader', 'subsectionheader'], dtype='object')
Index(['subject_id', 'gender', 'dob'], dtype='object')
Index(['subject_id', 'startdate', 'enddate', 'drug_type', 'drug',
       'drug_name_poe', 'drug_name_generic', 'formulary_drug_cd', 'gsn', 'ndc',
       'prod_strength', 'dose_val_rx', 'dose_unit_rx', 'form_val_disp',
       'form_unit_disp', 'route'],
      dtype='object')
Index(['subject_id', 'icd9_code', 'diagnosis'], dtype='object')


In [26]:
CPTEVENTS_df

Unnamed: 0,subject_id,ticket_id_seq,sectionheader,subsectionheader
0,10117,1.0,Evaluation and management,Consultations
1,10117,2.0,Evaluation and management,Hospital inpatient services
2,10117,3.0,Medicine,Dialysis
3,10117,4.0,Evaluation and management,Hospital inpatient services
4,10111,1.0,Evaluation and management,Consultations
...,...,...,...,...
1574,42281,,Medicine,Pulmonary
1575,42302,,Medicine,Pulmonary
1576,40595,,Medicine,Pulmonary
1577,40595,,Medicine,Pulmonary


In [35]:
subject_ids = PATIENTS_df['subject_id'].unique()

DRGCODES_df_filtered = DRGCODES_df[DRGCODES_df['subject_id'].isin(subject_ids)]
CPTEVENTS_df_filtered = CPTEVENTS_df[CPTEVENTS_df['subject_id'].isin(subject_ids)]
PRESCRIPTIONS_df_filtered = PRESCRIPTIONS_df[PRESCRIPTIONS_df['subject_id'].isin(subject_ids)]
DIAGNOSES_df_filtered = DIAGNOSES_df[DIAGNOSES_df['subject_id'].isin(subject_ids)]


In [36]:
def default(o):
    if isinstance(o, np.int64):
        return int(o)
    if isinstance(o, np.float64):
        return float(o)
    raise TypeError

for subject_id in subject_ids:
    patient_info = PATIENTS_df[PATIENTS_df['subject_id'] == subject_id].iloc[0].to_dict()
    
    json_object = {
        "subject_id": subject_id,
        "gender": patient_info['gender'],
        "dob": patient_info['dob'],
        "DRGENTRIES": DRGCODES_df_filtered[DRGCODES_df_filtered['subject_id'] == subject_id][['drg_code', 'description']].to_dict('records'),
        "PRESCRIPTIONS": PRESCRIPTIONS_df_filtered[PRESCRIPTIONS_df_filtered['subject_id'] == subject_id].drop(columns=['subject_id']).to_dict('records'),
        "CPTEVENTS": CPTEVENTS_df_filtered[CPTEVENTS_df_filtered['subject_id'] == subject_id].drop(columns=['subject_id']).to_dict('records'),
        "DIAGNOSES": DIAGNOSES_df_filtered[DIAGNOSES_df_filtered['subject_id'] == subject_id][['icd9_code', 'diagnosis']].to_dict('records')
    }

    # Save the JSON object to a file
    file_name = f'DemoData{subject_id}.json'
    with open(file_name, 'w') as f:
        json.dump(json_object, f, indent=4, default=default)

    json_objects.append(json_object)