In [1]:
# Libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 40)
pd.set_option('display.width', 2000)
import math
import time
import random
import random
import shap
import torch

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set the random seeds for deterministic results.
SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Set device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

<torch._C.Generator at 0x7fa6d0972350>

GPU not available, CPU used


In [3]:
# Import
path = r'switch_data/c22_all_with_diff.csv'
c22_df = pd.read_csv(path)

In [5]:
# Import
path = r'switch_data/antibiotic_po_flag.csv'
antibiotic_df = pd.read_csv(path)

In [6]:
# Rename
antibiotic_df.rename(columns={'ADMINISTRATION_DATETIME': 'date'}, inplace=True)

In [8]:
# Merge features and labels
icare_df = pd.merge(antibiotic_df[['SPELL_IDENTIFIER', 'date', 'ROUTE', 'po_flag', 'iv_treatment_length']], c22_df)

In [9]:
# Order
icare_df.sort_values(by=['SPELL_IDENTIFIER', 'date'], inplace=True)

In [10]:
# Reset index 
icare_df.reset_index(inplace=True, drop=True)

In [25]:
# Save
#icare_df.to_csv('switch_data/icare_switch_data.csv', index=False)

In [6]:
# Import
path = r'switch_data/icare_switch_data.csv'
icare_df = pd.read_csv(path)

In [12]:
mean_col_list = ['Diastolic Blood Pressure22',
 'Diastolic Blood Pressure22_current_stay',
 'Glasgow Coma Score22',
 'Glasgow Coma Score22_current_stay',
 'Heart Rate22',
 'Heart Rate22_current_stay',
 'Mean Arterial Pressure22',
 'Mean Arterial Pressure22_current_stay',
 'NEWS Conscious Level Score22',
 'NEWS Conscious Level Score22_current_stay',
 'NEWS Supplemental Oxygen Calc22',
 'NEWS Supplemental Oxygen Calc22_current_stay',
 'Respiratory Rate22',
 'Respiratory Rate22_current_stay',
 'SpO222',
 'SpO222_current_stay',
 'Systolic Blood Pressure22',
 'Systolic Blood Pressure22_current_stay',
 'Temperature22',
 'Temperature22_current_stay',
 'Diastolic Blood Pressure22_difference',
 'Diastolic Blood Pressure22_current_stay_difference',
 'Glasgow Coma Score22_difference',
 'Glasgow Coma Score22_current_stay_difference',
 'Heart Rate22_difference',
 'Heart Rate22_current_stay_difference',
 'Mean Arterial Pressure22_difference',
 'Mean Arterial Pressure22_current_stay_difference',
 'NEWS Conscious Level Score22_difference',
 'NEWS Conscious Level Score22_current_stay_difference',
 'NEWS Supplemental Oxygen Calc22_difference',
 'NEWS Supplemental Oxygen Calc22_current_stay_difference',
 'Respiratory Rate22_difference',
 'Respiratory Rate22_current_stay_difference',
 'SpO222_difference',
 'SpO222_current_stay_difference',
 'Systolic Blood Pressure22_difference',
 'Systolic Blood Pressure22_current_stay_difference',
 'Temperature22_difference',
 'Temperature22_current_stay_difference']

std_col_list = ['Diastolic Blood Pressure23',
 'Diastolic Blood Pressure23_current_stay',
 'Glasgow Coma Score23',
 'Glasgow Coma Score23_current_stay',
 'Heart Rate23',
 'Heart Rate23_current_stay',
 'Mean Arterial Pressure23',
 'Mean Arterial Pressure23_current_stay',
 'NEWS Conscious Level Score23',
 'NEWS Conscious Level Score23_current_stay',
 'NEWS Supplemental Oxygen Calc23',
 'NEWS Supplemental Oxygen Calc23_current_stay',
 'Respiratory Rate23',
 'Respiratory Rate23_current_stay',
 'SpO223',
 'SpO223_current_stay',
 'Systolic Blood Pressure23',
 'Systolic Blood Pressure23_current_stay',
 'Temperature23',
 'Temperature23_current_stay',
 'Diastolic Blood Pressure23_difference',
 'Diastolic Blood Pressure23_current_stay_difference',
 'Glasgow Coma Score23_difference',
 'Glasgow Coma Score23_current_stay_difference',
 'Heart Rate23_difference',
 'Heart Rate23_current_stay_difference',
 'Mean Arterial Pressure23_difference',
 'Mean Arterial Pressure23_current_stay_difference',
 'NEWS Conscious Level Score23_difference',
 'NEWS Conscious Level Score23_current_stay_difference',
 'NEWS Supplemental Oxygen Calc23_difference',
 'NEWS Supplemental Oxygen Calc23_current_stay_difference',
 'Respiratory Rate23_difference',
 'Respiratory Rate23_current_stay_difference',
 'SpO223_difference',
 'SpO223_current_stay_difference',
 'Systolic Blood Pressure23_difference',
 'Systolic Blood Pressure23_current_stay_difference',
 'Temperature23_difference',
 'Temperature23_current_stay_difference']

In [13]:
### TRIMMING FEATURES ###
# Remove columns based on proportion of values that are the same
# Set a threshold for the proportion of unique values
threshold = 0.1
# Filter columns based on the threshold
columns_to_drop = icare_df.columns[icare_df.apply(lambda col: col.nunique() / len(col) <= threshold)].to_list()
columns_to_drop = [i for i in columns_to_drop if i not in mean_col_list]
columns_to_drop = [i for i in columns_to_drop if i not in std_col_list]
columns_to_drop = [i for i in columns_to_drop if i not in ['SPELL_IDENTIFIER', 'date', 'ROUTE', 'po_flag', 'iv_treatment_length', '24_hour_flag', '48_hour_flag']]

#filtered_columns = list(set(filtered_columns + mean_col_list + std_col_list))
#filtered_columns = ['SPELL_IDENTIFIER', 'date', 'ROUTE', 'po_flag', 'iv_treatment_length', '24_hour_flag', '48_hour_flag'] + filtered_columns

# Drop columns that do not meet the criteria
icare_df_filtered = icare_df.drop(columns=columns_to_drop)

In [34]:
columns_to_drop

['Diastolic Blood Pressure2',
 'Diastolic Blood Pressure3',
 'Diastolic Blood Pressure4',
 'Diastolic Blood Pressure5',
 'Diastolic Blood Pressure6',
 'Diastolic Blood Pressure7',
 'Diastolic Blood Pressure8',
 'Diastolic Blood Pressure9',
 'Diastolic Blood Pressure11',
 'Diastolic Blood Pressure12',
 'Diastolic Blood Pressure13',
 'Diastolic Blood Pressure14',
 'Diastolic Blood Pressure15',
 'Diastolic Blood Pressure16',
 'Diastolic Blood Pressure18',
 'Diastolic Blood Pressure19',
 'Diastolic Blood Pressure20',
 'Diastolic Blood Pressure21',
 'Diastolic Blood Pressure2_current_stay',
 'Diastolic Blood Pressure3_current_stay',
 'Diastolic Blood Pressure4_current_stay',
 'Diastolic Blood Pressure5_current_stay',
 'Diastolic Blood Pressure6_current_stay',
 'Diastolic Blood Pressure8_current_stay',
 'Diastolic Blood Pressure12_current_stay',
 'Diastolic Blood Pressure13_current_stay',
 'Diastolic Blood Pressure14_current_stay',
 'Diastolic Blood Pressure16_current_stay',
 'Diastolic Bloo

In [14]:
len(columns_to_drop)

707