In [1]:
# Import libraries
import numpy as np
import pandas as pd
import time
import os.path
import matplotlib.pyplot as plt
%matplotlib inline
os.chdir('C:\\Users\\anear\\OneDrive - National University of Ireland, Galway\\PhD\\Research Projects\\Ventilation Project')

In [2]:
# Load the patient data
_data = np.load('data/final_patients.npy', allow_pickle=True).tolist()
patients = _data['patients']
print("Loaded the patient data...")

# Load the general features
_data = np.load('data/general_features.npy', allow_pickle=True).tolist()
bicarb = _data['bicarb']
bilirubin = _data['bilirubin']
bp = _data['bp']
fio2 = _data['fio2']
gcs_eyes = _data['gcs_eyes']
gcs_motor = _data['gcs_motor']
gcs_verbal = _data['gcs_verbal']
hr = _data['hr']
pao2 = _data['pao2']
potassium = _data['potassium']
sodium = _data['sodium']
spo2 = _data['spo2']
temp = _data['temp']
urea = _data['urea']
urine = _data['urine']
wbc = _data['wbc']
print("... and the general features...")

# Load the ventilator features
_data = np.load('data/ventilator_features.npy', allow_pickle=True).tolist()
fentanyl_cv = _data['fentanyl_cv']
fentanyl_mv = _data['fentanyl_mv']
ie_ratio = _data['ie_ratio']
insp_flow = _data['insp_flow']
insp_press = _data['insp_press']
peep = _data['peep']
propofol_cv = _data['propofol_cv']
propofol_mv = _data['propofol_mv']
psv = _data['psv']
resp = _data['resp']
tidvol_obs = _data['tidvol_obs']
tidvol_set = _data['tidvol_set']
tidvol_spon = _data['tidvol_spon']
print("... and the ventilator features!")

Loaded the patient data...
... and the general features...
... and the ventilator features!


In [3]:
# Function that creates hourly time series from raw data
def hourly_time_series(feature, hrs=24):
    
    '''
    feature is a list of numpy arrays, where each array contains all measurements for one patient and the times they were taken
    hrs is the number of hours before extubation, with the default set to 24
    Also requires patients, a pandas DataFrame with patient details
    '''
    
    # Initialise a numpy array
    m = len(feature) # number of patients
    processed_feature = np.zeros((m,hrs))
    
    # Iterate over every patient
    for i in range(m):
        
        # Get their data
        raw_time = feature[i][:,0]
        data = feature[i][:,1]
        ext_time = patients.iloc[i]['endtime'] # extubation time
        
        # Re-format time to have time series finishing at time of extubation
        diff_time = raw_time - ext_time
        days = np.array([x.days for x in diff_time])
        seconds = np.array([x.seconds for x in diff_time])
        pre_ext_time = hrs + days*24 + seconds/(60*60)
        
        # Account for patients with no measurements
        if len(data) == 0:
            processed_feature[i,:] = -np.ones((1,hrs))
        
        else:
        
        # Create hourly time series
            for k in range(hrs):
                mask = np.all([pre_ext_time>=k,pre_ext_time<(k+1)], axis=0)
                values = data[mask]
                x = 1
                while len(values) == 0:
                    mask = np.all([pre_ext_time>=(k-x),pre_ext_time<(k+1+x)], axis=0)
                    values = data[mask]
                    x+=1
                mean_values = np.mean(values)
                processed_feature[i,k] = mean_values
        
    # Return the numpy array
    return processed_feature

In [4]:
# Create a list of feature names
features = [
    'bicarb',
    'bilirubin',
    'bp',
    'fio2',
    'gcs_eyes',
    'gcs_motor',
    'gcs_verbal',
    'hr',
    'pao2',
    'potassium',
    'sodium',
    'spo2',
    'temp',
    'urea',
    'urine',
    'wbc',
    'fentanyl_cv',
    'fentanyl_mv',
    'ie_ratio',
    'insp_flow',
    'insp_press',
    'peep',
    'propofol_cv',
    'propofol_mv',
    'psv',
    'resp',
    'tidvol_obs',
    'tidvol_set',
    'tidvol_spon'
]

# Iterate over every feature
for i in features:
    
    # Time how long each feature takes
    start = time.time()
    
    # Define a new variable name
    varname = 'processed_' + i
    
    # Create 24 hour time series for each patient
    exec("{} = hourly_time_series({})".format(varname, i))
    
    # Print the run time
    end = time.time()
    print('{}: {} minutes'.format(i, round((end-start)/60,2)))

bicarb: 0.32 minutes
bilirubin: 1.49 minutes
bp: 0.77 minutes
fio2: 0.36 minutes
gcs_eyes: 0.28 minutes
gcs_motor: 0.34 minutes
gcs_verbal: 0.32 minutes
hr: 0.71 minutes
pao2: 0.46 minutes
potassium: 0.29 minutes
sodium: 0.29 minutes
spo2: 0.73 minutes
temp: 0.34 minutes
urea: 0.31 minutes
urine: 0.78 minutes
wbc: 0.32 minutes
fentanyl_cv: 0.44 minutes
fentanyl_mv: 0.4 minutes
ie_ratio: 0.45 minutes
insp_flow: 1.26 minutes
insp_press: 0.66 minutes
peep: 0.31 minutes
propofol_cv: 0.79 minutes
propofol_mv: 0.56 minutes
psv: 0.35 minutes
resp: 0.72 minutes
tidvol_obs: 0.56 minutes
tidvol_set: 0.77 minutes
tidvol_spon: 0.42 minutes


In [5]:
# Save to processed_features.npy
if not os.path.exists('./data'):
    os.makedirs('./data')

tosave = {
    'bicarb': processed_bicarb,
    'bilirubin': processed_bilirubin,
    'bp': processed_bp,
    'fio2': processed_fio2,
    'gcs_eyes': processed_gcs_eyes,
    'gcs_motor': processed_gcs_motor,
    'gcs_verbal': processed_gcs_verbal,
    'hr': processed_hr,
    'pao2': processed_pao2,
    'potassium': processed_potassium,
    'sodium': processed_sodium,
    'spo2': processed_spo2,
    'temp': processed_temp,
    'urea': processed_urea,
    'urine': processed_urine,
    'wbc': processed_wbc,
    'fentanyl_cv': processed_fentanyl_cv,
    'fentanyl_mv': processed_fentanyl_mv,
    'ie_ratio': processed_ie_ratio,
    'insp_flow': processed_insp_flow,
    'insp_press': processed_insp_press,
    'peep': processed_peep,
    'propofol_cv': processed_propofol_cv,
    'propofol_mv': processed_propofol_mv,
    'psv': processed_psv,
    'resp': processed_resp,
    'tidvol_obs': processed_tidvol_obs,
    'tidvol_set': processed_tidvol_set,
    'tidvol_spon': processed_tidvol_spon
}
np.save('data/processed_features.npy',tosave)
print("Saved!")

Saved!
