In [24]:
"""Subtract indications from FAERS pickle"""

'Subtract indications from FAERS pickle'

In [25]:
import pickle
import itertools
import datetime

In [26]:
basedir = '/scratch/ias41'
indications_file = basedir + '/ae_code/indications/results/molregno2meddra_inds_via_hlt.pkl'

with open(basedir + '/ae_code/faers_aes/results/20191216_no_PSM_molregno2aes_PRR3_chi4_faers_min5drugs_random_controls_200v.pkl', 'rb') as f:
    faers_aes = pickle.load(f)

In [27]:
with open(indications_file, 'rb') as f:
    indications = pickle.load(f)

# remove where no indications
to_remove = [key for key, value in indications.items() if len(value) < 1]
for key in to_remove:
    del(indications[key])

In [28]:
faers_aes[139281]

{'ACUTE RESPIRATORY FAILURE',
 'ANAEMIA',
 'ANGINA PECTORIS',
 'ANHEDONIA',
 'ANURIA',
 'APNOEA',
 'ARTERIOSCLEROSIS',
 'ARTERIOSCLEROSIS CORONARY ARTERY',
 'ASCITES',
 'ATELECTASIS',
 'ATRIAL FIBRILLATION',
 'BLOOD ALBUMIN DECREASED',
 'BLOOD CREATININE INCREASED',
 'BLOOD POTASSIUM INCREASED',
 'BLOOD SODIUM DECREASED',
 'BLOOD UREA INCREASED',
 'BRADYCARDIA',
 'CARDIAC FAILURE',
 'CARDIAC FAILURE CONGESTIVE',
 'CARDIAC MURMUR',
 'CARDIOGENIC SHOCK',
 'CARDIOMEGALY',
 'CARDIOMYOPATHY',
 'CELLULITIS',
 'CHRONIC OBSTRUCTIVE PULMONARY DISEASE',
 'CIRCUMSTANCE OR INFORMATION CAPABLE OF LEADING TO MEDICATION ERROR',
 'CLOSTRIDIUM DIFFICILE COLITIS',
 'COAGULOPATHY',
 'CORONARY ARTERY BYPASS',
 'CYANOSIS',
 'CYTOLYTIC HEPATITIS',
 'DEHYDRATION',
 'DERMATITIS EXFOLIATIVE',
 'DIABETES MELLITUS INADEQUATE CONTROL',
 'DIALYSIS',
 'DIVERTICULUM',
 'DRUG INTERACTION',
 'DRUG RASH WITH EOSINOPHILIA AND SYSTEMIC SYMPTOMS',
 'DYSPNOEA EXERTIONAL',
 'ECONOMIC PROBLEM',
 'ECZEMA',
 'EJECTION FRACTION

In [29]:
indications[139281]

{'ACANTHAMEBA INFECTION NOS',
 'ACCESSORY KIDNEY',
 'ACUTE KIDNEY FAILURE',
 'ACUTE KIDNEY INJURY',
 'ACUTE LEFT VENTRICULAR FAILURE',
 'ACUTE PRERENAL FAILURE',
 'ACUTE PULMONARY OEDEMA',
 'ACUTE RENAL FAILURE',
 'ACUTE RENAL FAILURE, UNSPECIFIED',
 'ACUTE RENAL INSUFFICIENCY',
 'AEROMONA INFECTION',
 "ALPORT'S SYNDROME",
 'AMEBIC INFECTIONS',
 'AMENORRHEA NOS',
 'ANEMIA NOS',
 'ANEMIA PERNICIOUS',
 'ANGIOEDEMA AND URTICARIA',
 'ANURIA',
 'ARREST OF URINARY OUTPUT',
 'ASCITES',
 'ATTENTION DEFICIT AND DISRUPTIVE BEHAVIOR DISORDERS',
 'AUTOIMMUNE HEMOLYTIC ANEMIA NOS',
 'BACTERIAL INFECTIOUS DISORDERS',
 'BENIGN FAMILIAL HAEMATURIA',
 'BETA THALASSEMIA',
 'BETA-THALASSEMIA',
 'BRIGHTS DISEASE',
 'CAPUT MEDUSAE',
 'CARD',
 'CARDIAC ARRHYTHMIAS',
 'CARDIAC ASTHMA',
 'CARDIAC CIRRHOSIS',
 'CARDIAC DISORDERS',
 'CARDIAC FAILURE',
 'CARDIAC FAILURE (NOS)',
 'CARDIAC FAILURE ACUTE',
 'CARDIAC FAILURE CHRONIC',
 'CARDIAC FAILURE CONGESTIVE',
 'CARDIAC FAILURE HIGH OUTPUT',
 'CARDIAC FAILURE L

In [30]:
faers_aes_filtered = dict()

for molregno in faers_aes:
    
    filtered_aes = set()
    aes = faers_aes[molregno]
    for ae in aes:
        try:
            if ae in indications[molregno]:
                continue
            else:
                filtered_aes.add(ae.upper())
        except KeyError:
            continue
   
    faers_aes_filtered[molregno] = filtered_aes

In [31]:
len(faers_aes[139281]), len(faers_aes_filtered[139281])

(140, 125)

In [32]:
def restrict_min5drugs_per_ae(molregno2ae_dict):
    """Remove AEs with less than 5 significantly associated drugs and return new dictionary"""

    all_aes = set([i for i in itertools.chain(*[molregno2ae_dict[molregno] for molregno in molregno2ae_dict.keys()])])

    # Make new dictionary reversed and make empty set for each AE
    ae2molregnos = dict()
    for ae in all_aes:
        ae2molregnos[ae] = set()
    # Loop over molregnos in molregno2aes dictionary, and add the molregno to the ae2molregno dictionary
    for molregno in molregno2ae_dict:
        for ae in molregno2ae_dict[molregno]:
            ae2molregnos[ae].add(molregno)
    
    # Loop over ae2molregnos dictionary and find those AEs with less than 5 compounds
    aes_without_5_drugs = set()
    for ae in ae2molregnos:
        if len(ae2molregnos[ae]) < 5:
            aes_without_5_drugs.add(ae)
    
    # Make a new dictionary and remove those AEs with less than 5 compounds from the sets in molregno2ae
    restricted_molregno2ae_dict = molregno2ae_dict.copy()
    for ae in aes_without_5_drugs:
        for molregno in restricted_molregno2ae_dict:
            try:
                restricted_molregno2ae_dict[molregno].remove(ae)
            except KeyError:
                continue
    
    return restricted_molregno2ae_dict

In [33]:
faers_aes_filtered_min5 = restrict_min5drugs_per_ae(faers_aes_filtered)

# Find and remove compounds with no significantly associated AEs
no_info = [key for key, value in faers_aes_filtered_min5.items() if len(value)==0]

for molregno in no_info:
    del(faers_aes_filtered_min5[molregno])

In [34]:
len(faers_aes_filtered), len(faers_aes_filtered_min5)

(1129, 985)

In [35]:
# Save pickle
current_date = datetime.date.today().strftime("%Y%m%d")
with open(basedir + '/ae_code/faers_aes/results/{}_no_PSM_molregno2aes_PRR3_chi4_faers_min5drugs_random_controls_200v_minus_inds.pkl'.format(current_date), 'wb') as f:
    pickle.dump(faers_aes_filtered_min5, f)