In [5]:
import pandas as pd 
import numpy as np 
import json

In [6]:
def save_json(data, filename):
    """
    Write a Python object to a JSON file.
    
    Args:
        data: The Python object to be serialized and written to the file.
        filename (str): The name of the JSON file where the data will be saved.
        
    Returns:
        None
    """
    with open(filename, 'w') as json_file:
        json.dump(data, json_file, indent=4)

In [7]:

def read_csv_to_dataframe(file_path):
    try:
        # Read the CSV file into a Pandas DataFrame
        dataframe = pd.read_csv(file_path)
        dataframe.fillna(method='ffill', inplace=True)
        return dataframe
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None


In [8]:
patients_df = read_csv_to_dataframe("data/patients.csv")
prescriptions_df = read_csv_to_dataframe("data/prescriptions.csv")
inputevents_df = read_csv_to_dataframe("data/inputevents.csv")
procedureevents_df = read_csv_to_dataframe("data/procedureevents.csv")
d_icd_diagnoses_df = read_csv_to_dataframe("data/d_icd_diagnoses.csv")
triage_df = read_csv_to_dataframe("data/triage.csv")
vitalsign_df = read_csv_to_dataframe("data/vitalsign.csv")

  dataframe = pd.read_csv(file_path)


In [9]:
prescriptions_df.columns

Index(['subject_id', 'hadm_id', 'pharmacy_id', 'poe_id', 'poe_seq',
       'order_provider_id', 'starttime', 'stoptime', 'drug_type', 'drug',
       'formulary_drug_cd', 'gsn', 'ndc', 'prod_strength', 'form_rx',
       'dose_val_rx', 'dose_unit_rx', 'form_val_disp', 'form_unit_disp',
       'doses_per_24_hrs', 'route'],
      dtype='object')

In [10]:
inputevents_df

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,amount,amountuom,...,ordercomponenttypedescription,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,statusdescription,originalamount,originalrate
0,10000032,29079034,39553978,66056,2180-07-23 21:10:00,2180-07-23 21:11:00,2180-07-23 21:10:00,226452,100.000000,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.0,100.00000
1,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:01:00,2180-07-23 18:56:00,226452,200.000000,ml,...,Main order parameter,Bolus,39.4,200.0,ml,0,0,FinishedRunning,200.0,200.00000
2,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:30:00,2180-07-23 17:02:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.0,100.00000
3,10000032,29079034,39553978,88981,2180-07-23 17:33:00,2180-07-23 18:03:00,2180-07-23 18:16:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.0,100.00000
4,10000032,29079034,39553978,88981,2180-07-23 18:56:00,2180-07-23 18:57:00,2180-07-23 18:56:00,226452,100.000000,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.0,100.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8978888,19999987,23865745,36195440,90295,2145-11-02 23:28:00,2145-11-03 09:32:00,2145-11-02 23:28:00,225942,0.755000,mg,...,Main order parameter,Continuous Med,60.0,50.0,ml,0,0,Paused,2.5,75.00000
8978889,19999987,23865745,36195440,90295,2145-11-02 23:28:00,2145-11-03 09:32:00,2145-11-02 23:28:00,225943,15.100000,ml,...,Mixed solution,Continuous Med,60.0,50.0,ml,0,0,Paused,50.0,1.50000
8978890,19999987,23865745,36195440,90295,2145-11-02 23:38:00,2145-11-03 12:55:00,2145-11-03 14:29:00,225158,1000.000037,ml,...,Main order parameter,Continuous IV,60.0,1000.0,ml,0,0,FinishedRunning,1000.0,75.28231
8978891,19999987,23865745,36195440,91879,2145-11-04 20:35:00,2145-11-04 20:36:00,2145-11-04 20:35:00,226452,200.000000,ml,...,Main order parameter,Bolus,94.0,200.0,ml,0,0,FinishedRunning,200.0,200.00000


In [11]:
d_icd_diagnoses_df

Unnamed: 0,icd_code,icd_version,long_title
0,0010,9,Cholera due to vibrio cholerae
1,0011,9,Cholera due to vibrio cholerae el tor
2,0019,9,"Cholera, unspecified"
3,0020,9,Typhoid fever
4,0021,9,Paratyphoid fever A
...,...,...,...
109770,Z992,10,Dependence on renal dialysis
109771,Z993,10,Dependence on wheelchair
109772,Z998,10,Dependence on other enabling machines and devices
109773,Z9981,10,Dependence on supplemental oxygen


In [12]:
data_pt = pd.merge(patients_df, triage_df, on='subject_id', how='inner')

In [13]:
data_pt

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod,stay_id,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,chiefcomplaint
0,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,97.8,87.0,14.0,97.0,71.0,43.0,7,2.0,Hypotension
1,10000032,F,52,2180,2014 - 2016,2180-09-09,33258284,98.4,70.0,16.0,97.0,106.0,63.0,0,3.0,"Abd pain, Abdominal distention"
2,10000032,F,52,2180,2014 - 2016,2180-09-09,35968195,99.4,105.0,18.0,96.0,106.0,57.0,10,3.0,"n/v/d, Abd pain"
3,10000032,F,52,2180,2014 - 2016,2180-09-09,38112554,98.9,88.0,18.0,97.0,116.0,88.0,10,3.0,Abdominal distention
4,10000032,F,52,2180,2014 - 2016,2180-09-09,39399961,98.7,77.0,16.0,98.0,96.0,50.0,13,2.0,"Abdominal distention, Abd pain, LETHAGIC"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425082,19999784,M,57,2119,2017 - 2019,2162-08-23,37972930,98.0,91.0,16.0,99.0,148.0,90.0,5,2.0,Abnormal MRI
425083,19999828,F,46,2147,2017 - 2019,2162-08-23,30712109,98.1,83.0,18.0,100.0,107.0,75.0,8,2.0,"Abd pain, Wound eval"
425084,19999828,F,46,2147,2017 - 2019,2162-08-23,32917002,96.6,112.0,18.0,100.0,110.0,82.0,4,2.0,"Abd pain, Wound eval"
425085,19999914,F,49,2158,2017 - 2019,2164-09-17,32002659,99.5,81.0,10.0,100.0,93.0,55.0,0,2.0,"Altered mental status, Substance use"


In [14]:
data_pv = pd.merge(patients_df, vitalsign_df, on='subject_id', how='inner')

In [15]:
data_pv

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod,stay_id,charttime,temperature,heartrate,resprate,o2sat,sbp,dbp,rhythm,pain
0,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 16:36:00,,83.0,24.0,97.0,90.0,51.0,,0
1,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 16:43:00,,85.0,22.0,98.0,76.0,39.0,,0
2,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 16:45:00,,84.0,22.0,97.0,75.0,39.0,,0
3,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 17:56:00,,84.0,20.0,99.0,86.0,51.0,,0
4,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 18:37:00,98.4,86.0,20.0,98.0,65.0,37.0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1564605,19999828,F,46,2147,2017 - 2019,2162-08-23,32917002,2149-01-08 17:10:00,98.1,109.0,15.0,96.0,111.0,78.0,nsr,4
1564606,19999914,F,49,2158,2017 - 2019,2164-09-17,32002659,2158-12-24 11:43:00,99.5,81.0,10.0,100.0,93.0,55.0,nsr,0
1564607,19999987,F,57,2145,2011 - 2013,2164-09-17,34731548,2145-11-02 19:40:00,99.5,112.0,18.0,100.0,118.0,83.0,nsr,0
1564608,19999987,F,57,2145,2011 - 2013,2164-09-17,34731548,2145-11-02 20:11:00,99.5,111.0,18.0,100.0,123.0,82.0,nsr,unable


In [16]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.mixture import BayesianGaussianMixture
from sklearn.metrics import pairwise_distances_argmin_min

In [17]:
data_pv

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod,stay_id,charttime,temperature,heartrate,resprate,o2sat,sbp,dbp,rhythm,pain
0,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 16:36:00,,83.0,24.0,97.0,90.0,51.0,,0
1,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 16:43:00,,85.0,22.0,98.0,76.0,39.0,,0
2,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 16:45:00,,84.0,22.0,97.0,75.0,39.0,,0
3,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 17:56:00,,84.0,20.0,99.0,86.0,51.0,,0
4,10000032,F,52,2180,2014 - 2016,2180-09-09,32952584,2180-07-22 18:37:00,98.4,86.0,20.0,98.0,65.0,37.0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1564605,19999828,F,46,2147,2017 - 2019,2162-08-23,32917002,2149-01-08 17:10:00,98.1,109.0,15.0,96.0,111.0,78.0,nsr,4
1564606,19999914,F,49,2158,2017 - 2019,2164-09-17,32002659,2158-12-24 11:43:00,99.5,81.0,10.0,100.0,93.0,55.0,nsr,0
1564607,19999987,F,57,2145,2011 - 2013,2164-09-17,34731548,2145-11-02 19:40:00,99.5,112.0,18.0,100.0,118.0,83.0,nsr,0
1564608,19999987,F,57,2145,2011 - 2013,2164-09-17,34731548,2145-11-02 20:11:00,99.5,111.0,18.0,100.0,123.0,82.0,nsr,unable


In [18]:
# data_pi = pd.merge(data_pv, inputevents_df, on='subject_id', how='inner')

In [19]:
inputevents_df

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,amount,amountuom,...,ordercomponenttypedescription,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,statusdescription,originalamount,originalrate
0,10000032,29079034,39553978,66056,2180-07-23 21:10:00,2180-07-23 21:11:00,2180-07-23 21:10:00,226452,100.000000,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.0,100.00000
1,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:01:00,2180-07-23 18:56:00,226452,200.000000,ml,...,Main order parameter,Bolus,39.4,200.0,ml,0,0,FinishedRunning,200.0,200.00000
2,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:30:00,2180-07-23 17:02:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.0,100.00000
3,10000032,29079034,39553978,88981,2180-07-23 17:33:00,2180-07-23 18:03:00,2180-07-23 18:16:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.0,100.00000
4,10000032,29079034,39553978,88981,2180-07-23 18:56:00,2180-07-23 18:57:00,2180-07-23 18:56:00,226452,100.000000,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.0,100.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8978888,19999987,23865745,36195440,90295,2145-11-02 23:28:00,2145-11-03 09:32:00,2145-11-02 23:28:00,225942,0.755000,mg,...,Main order parameter,Continuous Med,60.0,50.0,ml,0,0,Paused,2.5,75.00000
8978889,19999987,23865745,36195440,90295,2145-11-02 23:28:00,2145-11-03 09:32:00,2145-11-02 23:28:00,225943,15.100000,ml,...,Mixed solution,Continuous Med,60.0,50.0,ml,0,0,Paused,50.0,1.50000
8978890,19999987,23865745,36195440,90295,2145-11-02 23:38:00,2145-11-03 12:55:00,2145-11-03 14:29:00,225158,1000.000037,ml,...,Main order parameter,Continuous IV,60.0,1000.0,ml,0,0,FinishedRunning,1000.0,75.28231
8978891,19999987,23865745,36195440,91879,2145-11-04 20:35:00,2145-11-04 20:36:00,2145-11-04 20:35:00,226452,200.000000,ml,...,Main order parameter,Bolus,94.0,200.0,ml,0,0,FinishedRunning,200.0,200.00000


In [20]:
inputevents_first_10000 = inputevents_df.iloc[:10000]

In [21]:
inputevents_first_10000

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,amount,amountuom,...,ordercomponenttypedescription,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,statusdescription,originalamount,originalrate
0,10000032,29079034,39553978,66056,2180-07-23 21:10:00,2180-07-23 21:11:00,2180-07-23 21:10:00,226452,100.000000,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.000000,100.000000
1,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:01:00,2180-07-23 18:56:00,226452,200.000000,ml,...,Main order parameter,Bolus,39.4,200.0,ml,0,0,FinishedRunning,200.000000,200.000000
2,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:30:00,2180-07-23 17:02:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.000000,100.000000
3,10000032,29079034,39553978,88981,2180-07-23 17:33:00,2180-07-23 18:03:00,2180-07-23 18:16:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.000000,100.000000
4,10000032,29079034,39553978,88981,2180-07-23 18:56:00,2180-07-23 18:57:00,2180-07-23 18:56:00,226452,100.000000,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.000000,100.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10010867,22429197,39880770,22102,2148-01-01 08:12:00,2148-01-01 11:33:00,2148-01-01 11:49:00,225150,199.999992,mcg,...,Main order parameter,Continuous Med,120.0,50.0,ml,0,0,FinishedRunning,200.000000,0.497512
9996,10010867,22429197,39880770,22102,2148-01-01 08:12:00,2148-01-01 11:33:00,2148-01-01 11:49:00,225158,49.999999,ml,...,Mixed solution,Continuous Med,120.0,50.0,ml,0,0,FinishedRunning,50.000000,14.925372
9997,10010867,22429197,39880770,22102,2148-01-01 08:26:00,2148-01-01 09:40:00,2148-01-01 09:02:00,222168,246.120567,mg,...,Main order parameter,Continuous Med,120.0,100.0,ml,0,0,ChangeDose/Rate,999.999939,25.000000
9998,10010867,22429197,39880770,22102,2148-01-01 08:26:00,2148-01-01 09:40:00,2148-01-01 09:02:00,225943,24.612051,ml,...,Mixed solution,Continuous Med,120.0,100.0,ml,0,0,ChangeDose/Rate,100.000000,18.028873


In [22]:
data_pi = pd.merge(data_pv, inputevents_first_10000, left_on='charttime', right_on='endtime', how="inner")

In [23]:
data_pi.iloc[0]

subject_id_x                                     10038882
gender                                                  M
anchor_age                                             89
anchor_year                                          2141
anchor_year_group                             2017 - 2019
dod                                            2152-06-29
stay_id_x                                        30044320
charttime                             2141-04-21 06:10:00
temperature                                          97.8
heartrate                                            72.0
resprate                                             18.0
o2sat                                                99.0
sbp                                                 154.0
dbp                                                  76.0
rhythm                                       Sinus Rhythm
pain                                                    0
subject_id_y                                     10002760
hadm_id       

In [24]:
patients_df

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10000032,F,52,2180,2014 - 2016,2180-09-09
1,10000048,F,23,2126,2008 - 2010,2180-09-09
2,10000068,F,19,2160,2008 - 2010,2180-09-09
3,10000084,M,72,2160,2017 - 2019,2161-02-13
4,10000102,F,27,2136,2008 - 2010,2161-02-13
...,...,...,...,...,...,...
299707,19999828,F,46,2147,2017 - 2019,2162-08-23
299708,19999829,F,28,2186,2008 - 2010,2162-08-23
299709,19999840,M,58,2164,2008 - 2010,2164-09-17
299710,19999914,F,49,2158,2017 - 2019,2164-09-17


In [25]:
d_icd_diagnoses_df

Unnamed: 0,icd_code,icd_version,long_title
0,0010,9,Cholera due to vibrio cholerae
1,0011,9,Cholera due to vibrio cholerae el tor
2,0019,9,"Cholera, unspecified"
3,0020,9,Typhoid fever
4,0021,9,Paratyphoid fever A
...,...,...,...
109770,Z992,10,Dependence on renal dialysis
109771,Z993,10,Dependence on wheelchair
109772,Z998,10,Dependence on other enabling machines and devices
109773,Z9981,10,Dependence on supplemental oxygen


In [26]:
prescriptions_df

Unnamed: 0,subject_id,hadm_id,pharmacy_id,poe_id,poe_seq,order_provider_id,starttime,stoptime,drug_type,drug,...,gsn,ndc,prod_strength,form_rx,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,doses_per_24_hrs,route
0,10000032,22595853,11700683,10000032-34,34.0,P76JEQ,2180-05-07 01:00:00,2180-05-07 22:00:00,MAIN,Acetaminophen,...,004490,9.041989e+08,500mg Tablet,,500,mg,1,TAB,,PO/NG
1,10000032,22595853,14779570,10000032-22,22.0,P76JEQ,2180-05-07 00:00:00,2180-05-07 22:00:00,MAIN,Sodium Chloride 0.9% Flush,...,004490,0.000000e+00,10 mL Syringe,,3,mL,0.3,SYR,3.0,IV
2,10000032,22595853,19796602,10000032-50,50.0,P260SK,2180-05-08 08:00:00,2180-05-07 22:00:00,MAIN,Furosemide,...,008209,5.107901e+10,40mg Tablet,,40,mg,1,TAB,1.0,PO/NG
3,10000032,22595853,20256254,10000032-32,32.0,P76JEQ,2180-05-07 01:00:00,2180-05-07 22:00:00,MAIN,Raltegravir,...,063231,6.022761e+06,400 mg Tablet,,400,mg,1,TAB,2.0,PO
4,10000032,22595853,28781051,10000032-27,27.0,P76JEQ,2180-05-07 00:00:00,2180-05-07 22:00:00,MAIN,Heparin,...,006549,6.332303e+10,5000 Units / mL- 1mL Vial,,5000,UNIT,1,mL,3.0,SC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15416703,19999987,23865745,92839339,19999987-25,25.0,P5542X,2145-11-03 00:00:00,2145-11-03 18:00:00,MAIN,Influenza Virus Vaccine,...,69637.0,3.333200e+10,0.5 mL Syringe,TAB,0.5,mL,1,SYR,0.0,IM
15416704,19999987,23865745,92913309,19999987-150,150.0,P63OX3,2145-11-04 10:00:00,2145-11-11 17:00:00,MAIN,Levothyroxine Sodium,...,6649.0,7.445521e+07,50mcg Tablet,TAB,50,mcg,1,TAB,1.0,PO/NG
15416705,19999987,23865745,95753195,19999987-219,219.0,P77Z1Y,2145-11-10 10:00:00,2145-11-11 17:00:00,MAIN,Duloxetine,...,57892.0,2.324033e+06,30mg Capsule,TAB,60,mg,2,CAP,1.0,PO
15416706,19999987,23865745,96343043,19999987-206,206.0,P77Z1Y,2145-11-09 10:00:00,2145-11-09 16:00:00,MAIN,Venlafaxine XR,...,46405.0,6.808405e+10,150mg XR Capsule,TAB,150,mg,1,CAP,1.0,PO


In [27]:
d_items_df = read_csv_to_dataframe("data/d_items.csv")

File not found: data/d_items.csv


In [28]:
d_items_df

In [29]:
inputevents_first_10000['ordercategorydescription'].unique()

array(['Bolus', 'Continuous IV', 'Drug Push', 'Continuous Med',
       'Non Iv Meds'], dtype=object)

In [30]:
procedureevents_df['ordercategorydescription'].unique()

array(['Task', 'ContinuousProcess'], dtype=object)

In [31]:
inputevents_first_10000['ordercategorydescription'].unique()

array(['Bolus', 'Continuous IV', 'Drug Push', 'Continuous Med',
       'Non Iv Meds'], dtype=object)

In [32]:
procedureevents_df['ordercategoryname'].unique()

array(['Procedures', 'Peripheral Lines', 'Ventilation', 'Communication',
       'Imaging', 'Invasive Lines', 'Intubation/Extubation', 'Tubes',
       'Significant Events', 'Dialysis', 'Continuous Procedures',
       'CRRT Filter Change', '17 - Inhaled Meds', 'Peritoneal Dialysis'],
      dtype=object)

In [33]:
inputevents_first_10000['itemid'].unique()

array([226452, 220862, 225158, 225851, 220949, 225798, 225855, 225879,
       221794, 225152, 225911, 225154, 228315, 226089, 225975, 225883,
       221828, 228316, 222011, 227523, 229072, 226361, 225944, 221385,
       222168, 225943, 225148, 228351, 225799, 226453, 225797, 225899,
       225910, 225906, 226363, 221744, 221662, 225942, 221668, 221393,
       227533, 225909, 222056, 225850, 223258, 220864, 223260, 221749,
       226364, 226372, 225828, 225166, 227522, 221456, 225168, 225845,
       225884, 225913, 225151, 225907, 225930, 225893, 228135, 221906,
       225859, 223262, 225892, 227694, 225917, 225934, 225916, 225947,
       225948, 225834, 228340, 225974, 221833, 226365, 226369, 221289,
       225945, 225825, 225833, 225936, 225835, 222315, 220970, 228339,
       221468, 229295, 225171, 225170, 221429, 221347, 225823, 226368,
       221824, 225885, 229013, 225869, 229069, 220995, 225865, 227525,
       227536, 227526, 227529, 225902, 227975, 225912, 225881, 225863,
      

In [34]:
inputevents_df['ordercategorydescription'].unique()

array(['Bolus', 'Continuous IV', 'Drug Push', 'Continuous Med',
       'Non Iv Meds'], dtype=object)

In [35]:
procedureevents_df

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,value,valueuom,...,orderid,linkorderid,ordercategoryname,ordercategorydescription,patientweight,isopenbag,continueinnextdept,statusdescription,originalamount,originalrate
0,10000032,29079034,39553978,88981.0,2180-07-23 14:43:00,2180-07-23 14:44:00,2180-07-23 14:43:00,225966,1.0,,...,6416557,6416557,Procedures,Task,39.4,0,0,FinishedRunning,1.0,0
1,10000032,29079034,39553978,88981.0,2180-07-23 14:24:00,2180-07-23 23:50:00,2180-07-23 23:50:49.983,224275,566.0,min,...,6497934,6497934,Peripheral Lines,ContinuousProcess,39.4,1,0,FinishedRunning,566.0,1
2,10000032,29079034,39553978,88981.0,2180-07-23 14:24:00,2180-07-23 23:50:00,2180-07-23 23:50:49.983,224277,566.0,min,...,9643097,9643097,Peripheral Lines,ContinuousProcess,39.4,1,0,FinishedRunning,566.0,1
3,10000980,26913865,39765666,88981.0,2189-06-27 09:01:00,2189-06-27 20:38:00,2189-06-27 20:38:29.047,225794,697.0,min,...,5989583,5989583,Ventilation,ContinuousProcess,76.2,1,0,FinishedRunning,697.0,1
4,10000980,26913865,39765666,88981.0,2189-06-27 09:15:00,2189-06-27 20:38:00,2189-06-27 20:38:29.047,224277,683.0,min,...,476764,476764,Peripheral Lines,ContinuousProcess,76.2,1,0,FinishedRunning,683.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696087,19999987,23865745,36195440,90295.0,2145-11-02 23:28:00,2145-11-02 23:29:00,2145-11-02 23:28:00,225966,1.0,,...,6438362,6438362,Procedures,Task,60.0,0,0,FinishedRunning,1.0,0
696088,19999987,23865745,36195440,90295.0,2145-11-02 23:34:00,2145-11-04 04:45:00,2145-11-04 05:24:00,224277,1751.0,min,...,3357951,3357951,Peripheral Lines,ContinuousProcess,60.0,1,0,FinishedRunning,1751.0,1
696089,19999987,23865745,36195440,90295.0,2145-11-02 23:34:00,2145-11-04 07:49:00,2145-11-04 08:20:00,224275,1935.0,min,...,8504627,8504627,Peripheral Lines,ContinuousProcess,60.0,1,0,FinishedRunning,1935.0,1
696090,19999987,23865745,36195440,90295.0,2145-11-02 23:34:00,2145-11-04 19:15:00,2145-11-04 20:39:00,224277,2621.0,min,...,617277,617277,Peripheral Lines,ContinuousProcess,60.0,1,0,FinishedRunning,2621.0,1


In [36]:
inputevents_first_10000['ordercategoryname'].unique()

array(['14-Oral/Gastric Intake', '04-Fluids (Colloids)',
       '08-Antibiotics (IV)', '05-Med Bolus', '01-Drips',
       '11-Prophylaxis (Non IV)', '02-Fluids (Crystalloids)',
       '16-Pre Admission/Non-ICU', '13-Enteral Nutrition',
       '09-Antibiotics (Non IV)', '10-Prophylaxis (IV)',
       '03-IV Fluid Bolus', '06-Insulin (Non IV)', '07-Blood Products',
       '12-Parenteral Nutrition'], dtype=object)

In [37]:
len(inputevents_first_10000['itemid'].unique())

146

In [42]:
features = ["gender", "anchor_age", "temperature", "heartrate", "resprate", "o2sat", "sbp", "dbp", "rhythm"]

def construct_M(df):
    M = []
    for index, row in df.iterrows():
        r = []
        for feature in features:
            if feature == "gender":
                r.append(0 if row[feature] == "M" else 1)
            else:
                r.append(row[feature])
        M.append(r)
    
    save_json(M, "data/process/M.json")
    return np.array(M)





In [43]:
M = construct_M(data_pv)