In [176]:
# Libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 16)
#pd.set_option('display.width', 2000)
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import pickle

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [178]:
# Load data
antibiotics = pd.read_csv(r"mimic-iv-2.0/antibiotic.csv")
# Filter for relevant delivery methods
route_list = ['IV', 'PO/NG', 'PO', 'NU', 'ORAL']
antibiotics['flag'] = np.where(antibiotics.route.str.contains('|'.join(route_list), na=False, case=False),1,0)
antibiotics = antibiotics[antibiotics['flag'] == 1]
antibiotics.drop(columns=['flag'], inplace=True)
# Need to remove some others that got through the filter 
antibiotics = antibiotics.groupby('route').filter(lambda x: len(x) > 100)
antibiotics['route'] = antibiotics['route'].replace({'PO/NG':'PO', 'NU':'PO', 'ORAL':'PO'})
antibiotics['starttime'] = pd.to_datetime(antibiotics['starttime'])
antibiotics['stoptime'] = pd.to_datetime(antibiotics['stoptime'])
#antibiotics.stay_id.nunique()

## Filter so only ICU stays given data we are using ##
antibiotics = antibiotics[~antibiotics['stay_id'].isna()]

# Combine so one duration per icu stay and per route
antibiotic_patients = antibiotics.groupby(['stay_id', 'route']).agg({'starttime':'min', 'stoptime':'max'})[['starttime','stoptime']].reset_index()
mask1 = antibiotic_patients['route'].eq('IV')
mask2 = antibiotic_patients['route'].eq('PO')
m = (
    mask1.groupby(antibiotic_patients['stay_id']).transform('any') & 
    mask2.groupby(antibiotic_patients['stay_id']).transform('any') & 
    antibiotic_patients['route'].isin(['IV', 'PO'])
)
filtered_antibiotic_patients = antibiotic_patients[m]
#filtered_antibiotic_patients.stay_id.nunique()
#filtered_antibiotic_patients

# Create df with one row per hosptial stay
filtered_antibiotic_patients_iv = filtered_antibiotic_patients[filtered_antibiotic_patients['route'] == 'IV']
filtered_antibiotic_patients_po = filtered_antibiotic_patients[filtered_antibiotic_patients['route'] == 'PO']
filtered_antibiotic_patients_iv.rename(columns={'starttime':'iv_starttime', 'stoptime':'iv_stoptime'}, inplace=True)
filtered_antibiotic_patients_po.rename(columns={'starttime':'po_starttime', 'stoptime':'po_stoptime'}, inplace=True)
filtered_antibiotic_patients_iv.drop(columns=['route'], inplace=True)
filtered_antibiotic_patients_po.drop(columns=['route'], inplace=True)
new_antibiotic_patients = pd.merge(filtered_antibiotic_patients_iv, filtered_antibiotic_patients_po, how="inner", on=['stay_id'])
new_antibiotic_patients.stay_id.nunique()
#new_antibiotic_patients

# Remove hours from dates
new_antibiotic_patients['iv_starttime'] = pd.to_datetime(new_antibiotic_patients['iv_starttime']).dt.date
new_antibiotic_patients['iv_stoptime'] = pd.to_datetime(new_antibiotic_patients['iv_stoptime']).dt.date
new_antibiotic_patients['po_starttime'] = pd.to_datetime(new_antibiotic_patients['po_starttime']).dt.date
new_antibiotic_patients['po_stoptime'] = pd.to_datetime(new_antibiotic_patients['po_stoptime']).dt.date

# Filter for those who have (stop) oral after IV
new_antibiotic_patients2 = new_antibiotic_patients[new_antibiotic_patients['iv_stoptime'] <= new_antibiotic_patients['po_stoptime']]
new_antibiotic_patients2.stay_id.nunique()

# Work out duration of both and time difference between iv and oral 
new_antibiotic_patients2['iv_duration'] = new_antibiotic_patients2['iv_stoptime'] - new_antibiotic_patients2['iv_starttime']
new_antibiotic_patients2['po_duration'] = new_antibiotic_patients2['po_stoptime'] - new_antibiotic_patients2['po_starttime']
new_antibiotic_patients2['antibiotic_gap'] = new_antibiotic_patients2['po_starttime'] - new_antibiotic_patients2['iv_stoptime']
# Convert to float and unit days
new_antibiotic_patients2['iv_duration'] = new_antibiotic_patients2['iv_duration']/ np.timedelta64(1, 'D')
new_antibiotic_patients2['po_duration'] = new_antibiotic_patients2['po_duration']/ np.timedelta64(1, 'D')
new_antibiotic_patients2['antibiotic_gap'] = new_antibiotic_patients2['antibiotic_gap']/ np.timedelta64(1, 'D')

# Filter for relevant durations 
#new_antibiotic_patients2.stay_id.nunique()
new_antibiotic_patients3 = new_antibiotic_patients2[new_antibiotic_patients2['iv_duration'] >= 0]
#new_antibiotic_patients3.stay_id.nunique()
new_antibiotic_patients3 = new_antibiotic_patients3[new_antibiotic_patients3['po_duration'] >= 0]
#new_antibiotic_patients3.stay_id.nunique()
new_antibiotic_patients3 = new_antibiotic_patients3[new_antibiotic_patients3['iv_duration'] <= 7] # Changed to short IV duration
#new_antibiotic_patients3.stay_id.nunique()
# Long PO duration doesnt actually matter as unlikley to be in the ICU the whole time and so wont have data for those other days 

# Find total duration
new_antibiotic_patients3['total_duration'] = new_antibiotic_patients3['iv_duration'] + new_antibiotic_patients3['antibiotic_gap'] + new_antibiotic_patients3['po_duration']

#new_antibiotic_patients3.stay_id.nunique()
#new_antibiotic_patients3

new_antibiotic_patients4 = new_antibiotic_patients3.copy()

# Create df with date range for IV and PO
IV_dates = new_antibiotic_patients4.copy()
IV_dates['date'] = IV_dates.apply(lambda x:
    pd.date_range(start=x['iv_starttime'],
                end=x['iv_stoptime'],
                #inclusive='both',
                freq='D'), axis=1)
IV_dates = IV_dates.explode('date')
IV_dates['iv_flag'] = 1

PO_dates = new_antibiotic_patients4.copy()
PO_dates['date'] = PO_dates.apply(lambda x:
    pd.date_range(start=x['po_starttime'],
                end=x['po_stoptime'],
                #inclusive='both',
                freq='D'), axis=1)
PO_dates = PO_dates.explode('date')
PO_dates['first_po_flag'] = 1
# Merge
PO_dates = PO_dates[['stay_id', 'date', 'first_po_flag']]

dates = pd.merge(IV_dates, PO_dates, on=['stay_id', 'date'], how='outer')
# Fill in NAN values
#dates['first_po_flag'] = dates['first_po_flag'].fillna(0)

# Order
dates = dates.sort_values(by=['stay_id', 'date'])
# Filter for relevant columns
dates = dates[['stay_id', 'date', 'iv_flag', 'first_po_flag']]
# Get stays
#hadm_id_list = dates['hadm_id'].unique().tolist()

14471

10005

In [179]:
dates

Unnamed: 0,stay_id,date,iv_flag,first_po_flag
0,30000646.0,2194-04-29,1.0,
1,30000646.0,2194-04-30,1.0,
2,30000646.0,2194-05-01,1.0,
3,30000646.0,2194-05-02,1.0,
4,30000646.0,2194-05-03,1.0,1.0
...,...,...,...,...
47008,39996073.0,2175-09-14,,1.0
47009,39996073.0,2175-09-15,,1.0
47010,39996073.0,2175-09-16,,1.0
47011,39996073.0,2175-09-17,,1.0


In [180]:
# Create main po_flag - note this is changed later

dates.reset_index(inplace=True, drop=True)

dates['po_flag'] = 0

pos = -1
for x in range(len(dates)):
    pos += 1
    if dates.iloc[x]['iv_flag'] == 1:
        dates.loc[x,'po_flag'] = 0
    elif dates.iloc[x]['iv_flag'] != dates.iloc[x]['iv_flag']: # check if NaN 
        if dates.iloc[x]['first_po_flag'] == 1:
            dates.loc[x,'po_flag'] = 1
        elif dates.iloc[x]['first_po_flag'] != dates.iloc[x]['first_po_flag']: # check if NaN 
            print('???')
            dates.loc[x,'po_flag'] = 1
        
        dates.loc[x-1,'po_flag'] = 1 # Update so day IV is stopped is also positive flag for switching 


In [None]:
# iv_treatment_length - note this is changed later
cumcount = []
count = 1
pos = -1

for x in range(len(dates)):
    pos += 1
    if pos == len(dates) - 1:
        count += 1 # add 1 to last one and append 
        cumcount.append(count)
    elif dates.iloc[x]['date'] == (dates.iloc[x+1]['date'] - pd.DateOffset(days=1)):
        if dates.iloc[x]['stay_id'] == dates.iloc[x+1]['stay_id']:
            if dates.iloc[x]['iv_flag'] == 1:
                cumcount.append(count)
                count += 1
            else:
                cumcount.append(0)
                count = 1
        else:
            if dates.iloc[x]['iv_flag'] == 1:
                cumcount.append(count)
            else:
                cumcount.append(0)
                count = 1
    elif dates.iloc[x]['iv_flag'] == 1:
        cumcount.append(count)
        count = 1
    else:
        cumcount.append(0)
        count = 1

#print(cumcount)
print(len(cumcount))

dates['iv_treatment_length'] = cumcount
#pd.options.display.max_rows = 100
#dates[0:100]
dates
dates.info()

In [186]:
# Save
dates.to_csv('iv_switch_stayid_dates.csv', index=False)