In [None]:
# Libraries
import pandas as pd
pd.set_option('display.max_columns', 40)
pd.set_option('display.width', 2000)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import json, snowflake.connector

# establish the connection to snowflake
ctx = snowflake.connector.connect( 
    **json.load(open('/opt/ich/python-snowflake-defaults.json')))
    
# verify and test if connection is working
try: 
    cs = ctx.cursor() 
    cs.execute('SELECT current_version(), current_role(), current_warehouse()')
    print(cs.fetchone())
finally: 
    cs.close()

In [3]:
# Import 
query = '''
SELECT * from ICHT_SANDBOX_PROD.COVOAM_22016.ANTI_INFECTIVE_ADMINISTRATION
'''
cur = ctx.cursor().execute(query)
drug_df = pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])

In [4]:
# Import 
query = '''
SELECT * from ICHT_SANDBOX_PROD.COVOAM_22016.ANTI_INFECTIVE_ADMINISTRATION_2023
'''
cur = ctx.cursor().execute(query)
drug_df2 = pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])

In [31]:
# Merge and drop duplicates to only get new examples
# Merge drug_df twice so all of its examples are removed as well as overlap with drug_df2
drug_df3 = pd.concat([drug_df, drug_df, drug_df2])
# Need to filter columns as update column changed
drug_df3 = drug_df3[['SUBJECT', 'PRESCRIPTION_ORDER_ID', 'THERAPEUTICAL_CLASS', 'MEDICATION_NAME_SHORT', 'MEDICATION_NAME', 'ADMINISTRATION_DATETIME', 'DOSAGE_UNIT', 'ROUTE']]
drug_df3.drop_duplicates(keep=False, inplace=True)

In [32]:
# Filter based on date - much easier this way and ensures prospective 
antibiotics = drug_df3.loc[(drug_df3['ADMINISTRATION_DATETIME'] >= '2022')]

In [33]:
antibiotics.ADMINISTRATION_DATETIME.max()
antibiotics.ADMINISTRATION_DATETIME.min()

Timestamp('2023-06-01 00:00:00')

Timestamp('2022-01-01 00:00:00')

In [34]:
# Import
path = r'switch_data/anti_infectives_flag_final.csv'
anti_infectives_flag_final = pd.read_csv(path)

In [35]:
# Filter for relevant delivery methods
antibiotics = antibiotics.groupby('ROUTE').filter(lambda x: len(x) > 3000)
antibiotics['ROUTE'] = antibiotics['ROUTE'].replace({'oral': 'PO', 'enteral':'PO', 'oromucosal':'PO', 'oral/NG':'PO', 'NG': 'PO'})
antibiotics['ADMINISTRATION_DATETIME'] = pd.to_datetime(antibiotics['ADMINISTRATION_DATETIME']).dt.date

In [36]:
# Get list of antibiotics
antibiotic_list = anti_infectives_flag_final[anti_infectives_flag_final['switch_antibiotic_flag'] == 'Yes']['MEDICATION_NAME_SHORT'].to_list()
len(antibiotic_list)

73

In [37]:
# Filter for antibiotics
antibiotics = antibiotics[antibiotics['MEDICATION_NAME_SHORT'].isin(antibiotic_list)]

In [38]:
# Filter columns
antibiotics = antibiotics[['SUBJECT', 'ADMINISTRATION_DATETIME', 'ROUTE']]

In [39]:
antibiotics.SUBJECT.nunique()

25523

In [40]:
antibiotics.ROUTE.value_counts()

IV    153770
PO     92966
Name: ROUTE, dtype: int64

In [41]:
# Filter for those with IV and PO
filtered_antibiotics = antibiotics[antibiotics['SUBJECT'].isin((antibiotics.groupby(['SUBJECT'])['ROUTE'].nunique() > 1).loc[lambda x : x == True].index.to_list())]

In [42]:
filtered_antibiotics.SUBJECT.nunique()

5485

In [48]:
# Import 
query = '''
SELECT * from ICHT_PROD.ICHT_COVID.EPISODES_2023
'''
cur = ctx.cursor().execute(query)
episodes = pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])

In [49]:
# Merge with episodes
antibiotic_episodes = pd.merge(filtered_antibiotics, episodes[['SUBJECT', 'SPELL_IDENTIFIER', 'ADMISSION_DATE_TIME', 'DISCHARGE_DATE_TIME']])

In [50]:
antibiotic_episodes['ADMISSION_DATE_TIME'] = pd.to_datetime(antibiotic_episodes['ADMISSION_DATE_TIME']).dt.date
antibiotic_episodes['DISCHARGE_DATE_TIME'] = pd.to_datetime(antibiotic_episodes['DISCHARGE_DATE_TIME']).dt.date

In [51]:
# Filter for where antibiotic date in spell date range 
antibiotic_episodes = antibiotic_episodes[(antibiotic_episodes['ADMINISTRATION_DATETIME'] >= antibiotic_episodes['ADMISSION_DATE_TIME']) & (antibiotic_episodes['ADMINISTRATION_DATETIME'] <= antibiotic_episodes['DISCHARGE_DATE_TIME'])]
antibiotic_episodes.drop_duplicates(inplace=True)

In [53]:
# Filter for those with IV and PO in same spell
filtered_antibiotic_episodes = antibiotic_episodes[antibiotic_episodes['SPELL_IDENTIFIER'].isin((antibiotic_episodes.groupby(['SUBJECT', 'SPELL_IDENTIFIER'])['ROUTE'].nunique() > 1).loc[lambda x : x == True].index.get_level_values(1).to_list())]

In [None]:
# Order
filtered_antibiotic_episodes.sort_values(by=['SUBJECT', 'SPELL_IDENTIFIER', 'ADMISSION_DATE_TIME', 'ADMINISTRATION_DATETIME', 'ROUTE'], inplace=True)

In [None]:
# Create po_flag
def po_flag_fun(row):
    if row['ROUTE'] == 'IV':
        return 0
    elif row['ROUTE'] == 'PO':
        return 1

filtered_antibiotic_episodes['po_flag'] =  filtered_antibiotic_episodes.apply (lambda row: po_flag_fun(row), axis=1)

filtered_antibiotic_episodes.drop_duplicates(subset=['SUBJECT', 'ADMINISTRATION_DATETIME', 'SPELL_IDENTIFIER', 'ADMISSION_DATE_TIME', 'DISCHARGE_DATE_TIME'], inplace=True)

In [57]:
# Check nonthing weird got thorugh
filtered_antibiotic_episodes[(filtered_antibiotic_episodes['po_flag'] == 1) & (filtered_antibiotic_episodes['ROUTE'] == 'IV')]

Unnamed: 0,SUBJECT,ADMINISTRATION_DATETIME,ROUTE,SPELL_IDENTIFIER,ADMISSION_DATE_TIME,DISCHARGE_DATE_TIME,po_flag


In [58]:
def drop_po_first_fun(data):
    bad_codes = []
    for stay_id, sub_df in data.groupby('SPELL_IDENTIFIER'):
        #print(stay_id)
        sub_df.reset_index(inplace=True, drop=True)
        for x in range(len(sub_df)):
            if sub_df.iloc[x]['po_flag'] == 1:
                #print(sub_df.iloc[x])
                bad_codes.append(sub_df.iloc[x]['SPELL_IDENTIFIER'])
            else:
                break
    new_data = data[~data['SPELL_IDENTIFIER'].isin(bad_codes)]
    new_data.reset_index(inplace= True, drop=True)
    return new_data

In [59]:
# Remove those who have oral first
filtered_antibiotic_episodes_2 = drop_po_first_fun(filtered_antibiotic_episodes)

In [60]:
filtered_antibiotic_episodes_2.SPELL_IDENTIFIER.nunique()
filtered_antibiotic_episodes_2.SUBJECT.nunique()

4606

4226

In [61]:
# Filter for those with IV and PO in same spell
# Need to do this again as combined days where IV and PO may have been given together 
filtered_antibiotic_episodes_2 = filtered_antibiotic_episodes_2[filtered_antibiotic_episodes_2['SPELL_IDENTIFIER'].isin((filtered_antibiotic_episodes_2.groupby('SPELL_IDENTIFIER')['po_flag'].nunique() > 1).loc[lambda x : x == True].index.to_list())]

In [62]:
filtered_antibiotic_episodes_2.SPELL_IDENTIFIER.nunique()
filtered_antibiotic_episodes_2.SUBJECT.nunique()

3910

3618

In [63]:
# Change iv_treatment length to prior days treatment length 

# For only having one positive switch day per stay
def iv_treatment_length_fun(data):
    # iv_treatment_length
    cumcount = []
    count = 0
    pos = -1
    flag = 0

    for x in range(len(data)):
        pos += 1
        if pos == len(data) - 1:
            cumcount.append(count) # add count to last one
            break # end
        elif pos == 0:
            cumcount.append(count) # add 0 to first one
            count += 1
        elif data.iloc[x]['SPELL_IDENTIFIER'] == data.iloc[x+1]['SPELL_IDENTIFIER']:
            if data.iloc[x]['ROUTE'] == 'IV':
                cumcount.append(count)
                count += 1
                flag = 0
            elif flag == 1:
                cumcount.append(999)
                count = 0
                flag = 1
            elif data.iloc[x]['SPELL_IDENTIFIER'] != data.iloc[x-1]['SPELL_IDENTIFIER']:
                cumcount.append(999)
                count = 0
                print('hi')
                print(x)
            else:
                cumcount.append(count)
                count = 0
                flag = 1
        else:
            if data.iloc[x]['ROUTE'] == 'IV':
                cumcount.append(count)
                count = 0
            elif flag == 1:
                cumcount.append(999)
                count = 0
                flag = 0
            else:
                cumcount.append(count)
                count = 0
                flag = 0
                
    print(len(cumcount))

    data['iv_treatment_length'] = cumcount
    
    return data

In [64]:
filtered_antibiotic_episodes_2 = iv_treatment_length_fun(filtered_antibiotic_episodes_2)

27219


In [65]:
# Re order columns
filtered_antibiotic_episodes_2 = filtered_antibiotic_episodes_2[['SUBJECT', 'SPELL_IDENTIFIER', 'ADMISSION_DATE_TIME', 'DISCHARGE_DATE_TIME', 'ADMINISTRATION_DATETIME', 'ROUTE', 'po_flag', 'iv_treatment_length']]

In [66]:
# Set max iv treatment length
long_stay_list = filtered_antibiotic_episodes_2[(filtered_antibiotic_episodes_2['iv_treatment_length'] > 8) & (filtered_antibiotic_episodes_2['iv_treatment_length'] < 999)]['SPELL_IDENTIFIER'].unique().tolist()
len(long_stay_list)
# Remove those with iv longer than 7 days
filtered_antibiotic_episodes_3 = filtered_antibiotic_episodes_2[~filtered_antibiotic_episodes_2['SPELL_IDENTIFIER'].isin(long_stay_list)]

270

In [67]:
def iv_post_po_fun(data):

    cumcount = []
    flag = 0
    pos = -1

    for x in range(len(data)):
        pos += 1
        if pos == len(data) - 1:
            continue # end
        if data.iloc[x]['SPELL_IDENTIFIER'] == data.iloc[x+1]['SPELL_IDENTIFIER']:
            if data.iloc[x]['ROUTE'] == 'IV':
                if flag == 1:
                    cumcount.append(data.iloc[x]['SPELL_IDENTIFIER'])
                else:
                    continue
            elif data.iloc[x]['ROUTE'] == 'PO':
                flag = 1
                continue
        else:
            if data.iloc[x]['ROUTE'] == 'IV':
                if flag == 1:
                    cumcount.append(data.iloc[x]['SPELL_IDENTIFIER'])
                    flag = 0
                else:
                    continue
                    flag = 0
            elif data.iloc[x]['ROUTE'] == 'PO':
                flag = 0
                continue

    print(len(cumcount))

    data = data[~data['SPELL_IDENTIFIER'].isin(cumcount)]
    
    return data

In [68]:
# Remove those who have IV after oral
filtered_antibiotic_episodes_4 = iv_post_po_fun(filtered_antibiotic_episodes_3)

724


In [71]:
filtered_antibiotic_episodes_4.SUBJECT.nunique()
filtered_antibiotic_episodes_4.SPELL_IDENTIFIER.nunique()
filtered_antibiotic_episodes_4.po_flag.value_counts()
filtered_antibiotic_episodes_4.iv_treatment_length.value_counts()

3217

3431

1    9672
0    9443
Name: po_flag, dtype: int64

999    6240
0      3432
1      3431
2      2211
3      1557
4      1023
5       615
6       352
7       185
8        69
Name: iv_treatment_length, dtype: int64

In [72]:
# Save
#filtered_antibiotic_episodes_4.to_csv('switch_data/antibiotic_po_flag_2023.csv', index=False)