In [7]:
import pandas as pd
import numpy as np
import json

In [8]:
df = pd.read_csv("processed_data/subgroup_1_predictions.csv", index_col=0)
df.head()

Unnamed: 0,subject_id,hadm_id,subgroup,Combined_Blood_Pressure,charttime,Final_Temperature_F,heart_rate,RespiratoryRate_combined,SpO2,date,hadm_number,hours_in_hospital,sofa,group,predictions
1374,59,104130,1,103.0,2197-11-14 23:00:00,98.6,62.0,14.0,99.0,2197-11-14,1,0,4,1,1
1375,59,104130,1,105.0,2197-11-15 03:00:00,98.5,62.0,9.0,95.0,2197-11-15,1,4,4,1,1
1376,59,104130,1,107.0,2197-11-15 07:00:00,97.800079,63.0,17.0,97.0,2197-11-15,1,8,4,1,1
1377,59,104130,1,117.0,2197-11-15 11:00:00,97.500021,62.0,20.0,98.0,2197-11-15,1,12,4,1,1
1378,59,104130,1,124.0,2197-11-15 16:00:00,99.199997,70.0,21.0,96.0,2197-11-15,1,17,4,1,1


In [13]:
def convert_int64(dataframe):
    for col in dataframe.columns:
        if dataframe[col].dtype == np.int64:
            dataframe[col] = dataframe[col].astype(int)
    return df
df = convert_int64(df)

In [14]:
df.dtypes

subject_id                           int64
hadm_id                              int64
subgroup                             int64
Combined_Blood_Pressure            float64
charttime                   datetime64[ns]
Final_Temperature_F                float64
heart_rate                         float64
RespiratoryRate_combined           float64
SpO2                               float64
date                                object
hadm_number                          int64
hours_in_hospital                    int64
sofa                                 int64
group                                int64
predictions                          int64
dtype: object

In [15]:
def get_latest_vitals(dataframe):
    dataframe['charttime'] = pd.to_datetime(dataframe['charttime'])
    dataframe_sorted = dataframe.sort_values(by=['subject_id', 'charttime'])
    latest_vitals = dataframe_sorted.groupby('subject_id').last().reset_index()
    return latest_vitals

latest_vitals_df = get_latest_vitals(df)

latest_vitals_df.head()

Unnamed: 0,subject_id,hadm_id,subgroup,Combined_Blood_Pressure,charttime,Final_Temperature_F,heart_rate,RespiratoryRate_combined,SpO2,date,hadm_number,hours_in_hospital,sofa,group,predictions
0,59,104130,1,163.0,2197-11-16 19:00:00,97.599998,74.0,16.0,97.0,2197-11-16,1,44,4,1,1
1,109,170149,1,145.0,2141-05-25 19:00:00,97.400002,97.0,26.0,100.0,2141-05-25,6,26,5,1,1
2,130,198214,1,119.0,2119-11-03 04:00:00,98.6,93.0,26.0,97.0,2119-11-03,2,79,3,0,0
3,234,134944,1,109.0,2106-04-13 05:00:00,96.699997,86.0,31.0,100.0,2106-04-13,1,209,4,1,1
4,263,120845,1,146.0,2160-01-11 16:00:00,98.300003,60.0,22.0,98.0,2160-01-11,1,20,1,0,0


In [30]:
latest_vitals_df[latest_vitals_df['predictions'] == 2]

Unnamed: 0,subject_id,hadm_id,subgroup,Combined_Blood_Pressure,charttime,Final_Temperature_F,heart_rate,RespiratoryRate_combined,SpO2,date,hadm_number,hours_in_hospital,sofa,group,predictions
12,433,172593,1,101.0,2164-03-11 12:00:00,97.300043,83.0,11.0,95.0,2164-03-11,4,96,5,1,2
13,453,187961,1,107.0,2156-04-26 15:00:00,99.600079,95.0,19.0,95.0,2156-04-26,1,101,12,2,2
15,527,165891,1,118.0,2175-08-18 07:00:00,100.000043,77.0,20.0,95.0,2175-08-18,1,41,8,2,2
16,584,167135,1,83.0,2114-10-24 03:00:00,97.400002,72.0,21.0,96.0,2114-10-24,1,28,1,0,2
17,588,170452,1,115.0,2200-01-15 04:00:00,97.500021,70.0,16.0,99.0,2200-01-15,1,171,8,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1066,32232,147092,1,123.0,2129-12-17 04:00:00,96.500000,87.0,10.0,100.0,2129-12-17,1,516,13,2,2
1068,32247,131903,1,147.0,2122-01-25 12:00:00,97.800079,99.0,18.0,99.0,2122-01-25,1,338,7,2,2
1069,32296,143945,1,137.0,2102-04-23 15:00:00,95.500043,71.0,20.0,98.0,2102-04-23,1,27,7,2,2
1070,32315,101791,1,123.0,2137-09-07 11:00:00,96.800003,71.0,15.0,100.0,2137-09-07,1,175,7,2,2


In [31]:
def json_converter(dataframe, patient_id):
    data = {
        "name": "John Doe",
    }
    row = dataframe[dataframe['subject_id']==patient_id]
    
    def custom_serializer(obj):
        if isinstance(obj, np.integer):
            return int(obj)  # Convert NumPy integers to Python int
        elif isinstance(obj, pd.Timestamp):
            return obj.isoformat()  # Convert Timestamp to ISO 8601 string
        raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
    
    data["subject_id"] = row["subject_id"].iloc[0]
    data["subgroup"] = row["subgroup"].iloc[0]
    data["latest_blood_pressure"] = row["Combined_Blood_Pressure"].iloc[0]
    data["latest_chart_time"] = row["charttime"].iloc[0]
    data["latest_temp_F"] = row["Final_Temperature_F"].iloc[0]
    data["latest_heart_rate"] = row["heart_rate"].iloc[0]
    data["latest_respiratory_rate"] = row["RespiratoryRate_combined"].iloc[0]
    
    file_name = "processed_data/id" + str(patient_id) + ".json"

    with open(file_name, 'w') as json_file:
        json.dump(data, json_file, indent=4, default=custom_serializer)
        
json_converter(latest_vitals_df, 433)

In [17]:
latest_vitals_df[latest_vitals_df['subject_id']==59]['charttime'].iloc[0]

Timestamp('2197-11-16 19:00:00')