In [None]:
import pickle
import pandas as pd
import networkx as nx
import numpy as np
import datetime
from tqdm import tqdm

# HF patients on spironolactone

In [None]:
with open('/projects/RALES TRIAL/1A/spiro dataextraction/spiro data/first_mention_spiro.pickle', 'rb') as f:
    first_mention_spiro = pickle.load(f)
len(first_mention_spiro)

In [None]:
spiro = list(first_mention_spiro.keys())

# Spironolactone patients on ACE inhibitors and loop diuretics  (unstructured data)

In [None]:
def graph_from_onto(onto):
    G = nx.DiGraph()
    cl_edges = []
    for s, ts in onto.items():
        for t in ts:
            cl_edges.append((s.replace('S-', ''), t.replace('S-', '')))
    G.add_edges_from(cl_edges)
    return G

def load_onto(o):
    with open(f'/projects/data/GS/{o}', 'rb') as f:
        onto = pickle.load(f)
    G = graph_from_onto(onto)
    return G

def expand_codes(onto, codes):
    expanded = {}
    for name, top_codes in codes.items():
        expanded[name] = set()
        for c in top_codes:
            expanded[name].add(c)
            if c in onto:
                expanded[name].update(nx.ancestors(onto, c))
            else:
                print("NOT FOUND:", c, name)
        print(name, len(top_codes), len(expanded[name]))
    return expanded

In [None]:
medications = {
    'ace_inhibitor' : ['372733002', '41549009', '108564000', '108569005', '777142001', '108562001', '29439004'],
    'loop_diuretic' : ['830118008', '372691009', '86647004', '81609008', '777810005']
}

In [None]:
onto = load_onto('isa_rela_ch2pt_202009.pickle')
medications = expand_codes(onto,medications)

In [None]:
%%time
with open('/projects/data/GS/pt2cui_pos_dates.pickle', 'rb') as f:
    pt2cui_pos_dates = pickle.load(f)

In [None]:
def dates_ex(pt_data, codes):
    dates = None
    for x in codes:
        if x in pt_data and len(pt_data[x])>=2:
            if dates == None:
                dates = pt_data[x]
            else: 
                dates = dates.union(pt_data[x])
    return dates

rows = []
for pt in spiro:
    pt_data = pt2cui_pos_dates.get(pt, {})
    row = {'client_idcode': pt, 'first_mention_spiro': first_mention_spiro[pt]} 
    for concept, codes in medications.items():
        dates = dates_ex(pt_data, codes)
        if dates == None:
            row[f"{concept}_date"] = np.nan
        else:
            row[f"{concept}_date"] = dates
    rows.append(row)

In [None]:
df = pd.DataFrame(rows)
df.shape

In [None]:
%%time
df = df[['client_idcode']].join((df[i].explode() for i in df.iloc[:,1:]))

In [None]:
%%time
for k in medications.keys():
    df[f'{k}_date'] = pd.to_datetime(df[f'{k}_date']).dt.date

In [None]:
%%time
for k in medications.keys():
    df[f'{k}_delta'] = pd.to_datetime(df['first_mention_spiro']) - pd.to_datetime(df[f'{k}_date'])

In [None]:
df.head()

In [None]:
min_time = datetime.timedelta(days=1) 
max_time = datetime.timedelta(days=183)
for k in medications.keys():
    t1 = df[f'{k}_delta'] >= min_time 
    t2 = df[f'{k}_delta'] <= max_time
    in_window = t1 & t2 
    df[f'{k}_in_window'] = in_window

In [None]:
df.head()

In [None]:
for k in medications.keys():
    df[f'{k}_true_count'] = (
        df[f'{k}_in_window'].eq(True)
            .groupby(df['client_idcode']).transform('sum')
    )

In [None]:
df = df.drop_duplicates('client_idcode') 

In [None]:
df[df['ace_inhibitor_true_count'] >= 2].groupby('client_idcode')['ace_inhibitor_true_count'].count().sum()

In [None]:
df[df['loop_diuretic_true_count'] >= 2].groupby('client_idcode')['loop_diuretic_true_count'].count().sum()

In [None]:
ACE_inhibitors_ids = set()
for i,row in tqdm(df.iterrows()):
    if df.loc[i,'ace_inhibitor_true_count'] >= 2:
        ACE_inhibitors_ids.add(df.loc[i,'client_idcode'])
len(ACE_inhibitors_ids)

In [None]:
loop_diuretics_ids = set()
for i,row in tqdm(df.iterrows()):
    if df.loc[i,'loop_diuretic_true_count'] >= 2:
        loop_diuretics_ids.add(df.loc[i,'client_idcode'])
len(loop_diuretics_ids)

# Spironolactone patients on ACE  inhibitors  and loop diuretics (structured data)

In [None]:
df = pd.read_csv('/projects/data/GS/HF/gs_all_orders.csv') # load the orders table
df = df[['client_idcode', 'order_name', 'order_summaryline', 'order_entered', 'order_arrivaldtm']]
spiro = df['client_idcode'].isin(spiro)
df = df[spiro]

In [None]:
df['drug_name'] = df['order_name'].str.upper()

In [None]:
df['first_spiro_prescription'] = df['client_idcode'].map(first_mention_spiro)
df.head()

In [None]:
df['order_arrivaldtm'] = pd.to_datetime(df['order_arrivaldtm'], utc=True).dt.date 
df['difference'] = pd.to_datetime(df['first_spiro_prescription']) - pd.to_datetime(df['order_arrivaldtm'])

In [None]:
df.head()

In [None]:
min_time = datetime.timedelta(days=1) 
max_time = datetime.timedelta(days=183)
t1 = df['difference'] >= min_time 
t2 = df['difference'] <= max_time
in_window = t1 & t2 
df['in_window'] = in_window

In [None]:
df = df.loc[df['in_window'] == True]

### ACE inhibitors

In [None]:
%%time
drug_names = ['RAMIPRIL', 'FOSINOPRIL', 'PERINDOPRIL', 'LISINOPRIL', 'TRANDOLAPRIL', 'CAPTOPRIL', 'ENALAPRIL']
keep = []
for index, row in df.iterrows():
    keep.append(any([x in row['drug_name'] for x  in drug_names]))

In [None]:
ace_inhibitors = df.loc[keep]

In [None]:
duplicates = ace_inhibitors['client_idcode'].duplicated()
duplicates.value_counts()

In [None]:
ace_inhibitors = ace_inhibitors.drop_duplicates('client_idcode') 
ace_inhibitors.shape

In [None]:
ACE_inhibitors_ids_structured_data = set(ace_inhibitors['client_idcode'])

### Loop diuretics

In [None]:
%%time
drug_names = ['BUMETANIDE', 'FUROSEMIDE']
keep = []
for index, row in df.iterrows():
    keep.append(any([x in row['drug_name'] for x  in drug_names]))

In [None]:
loop_diuretics = df.loc[keep]

In [None]:
duplicates = loop_diuretics['client_idcode'].duplicated()
duplicates.value_counts()

In [None]:
loop_diuretics = loop_diuretics.drop_duplicates('client_idcode') 
loop_diuretics.shape

In [None]:
loop_diuretics_structured_data = set(loop_diuretics['client_idcode'])

# Comparing the structured and unstructured data

In [None]:
ACE_inhibitors = ACE_inhibitors_ids.union(ACE_inhibitors_ids_structured_data)
len(ACE_inhibitors)

In [None]:
Loop_diuretics = loop_diuretics_ids.union(loop_diuretics_structured_data)
len(Loop_diuretics)

In [None]:
with open ('/projects/RALES TRIAL/1A/spiro dataextraction/spiro data/loop_diuretics.pickle', 'wb') as f:
    pickle.dump(Loop_diuretics,f)
    
with open ('/projects/RALES TRIAL/1A/spiro dataextraction/spiro data/ACE_inhibitors.pickle', 'wb') as f:
    pickle.dump(ACE_inhibitors,f)