In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.max_rows', 1000)

In [3]:
basedir = '/scratch/ias41/ae_code/prev_reported_safety_associations'

In [4]:
flattened_aes = set()

### Lynch et al. Abbvie panel

In [8]:
lynch_table = pd.read_excel(basedir + '/data/Lynch_table_manually_fixed_mapped.xlsx')

In [9]:
# Combined agonism and antagonism, and other fixes
lynch_table['Original effects'] = lynch_table['Agonism/activation effects'] + '; ' + lynch_table['Antagonism/inhibition effects']
lynch_table['Original effects'] = lynch_table['Original effects'].apply(lambda x: x.replace('Insufficient data; ', '').replace('; Insufficient data', ''))
lynch_table['Original effects'] = lynch_table['Original effects'].apply(lambda x: x.replace('\xa0', ' '))
lynch_table['PubMed ID'] = 'PMID28216264'
lynch_table['Reference'] = "Lynch JJ 3rd, Van Vleet TR, Mittelstadt SW, Blomme EAG. Potential functional and pathological side effects related to off-target pharmacological activity. J Pharmacol Toxicol Methods. 2017 Sep;87:108-126."

In [10]:
def lynch_transformations(effect):
    # replace time arrows
    temp_effects = set()
    
    pattern = re.compile('(↓ → ↑|↑ → ↓) ([a-z 0-9]+)', re.IGNORECASE)
    match = re.match(pattern, effect)
    if match:
        new_effects = [match.group(2) + ' increased', match.group(2) + ' decreased']
        for new_effect in new_effects:
            temp_effects.add(new_effect)
    else:
        temp_effects.add(effect)
            
    # replace either/or arrows
    temp_effects2 = set()
    for effect in temp_effects:
        pattern = re.compile('(↓/↑|↑/↓)([a-z 0-9]+)', re.IGNORECASE)
        match = re.match(pattern, effect)
        if match:
            new_effects = [match.group(2) + ' increased', match.group(2) + ' decreased']
            for new_effect in new_effects:
                temp_effects2.add(new_effect)
        else:
            temp_effects2.add(effect)
            
    # replace up arrows
    temp_effects3 = set()
    for effect in temp_effects2:
        pattern = re.compile('↑ ?([a-z 0-9]+)', re.IGNORECASE)
        match = re.match(pattern, effect)
        if match:
            new_effect= match.group(1) + ' increased'
            temp_effects3.add(new_effect)
        else:
            temp_effects3.add(effect)
    
    # replace down arrows
    temp_effects4 = set()
    for effect in temp_effects3:
        pattern = re.compile('↓ ?([a-z 0-9]+)', re.IGNORECASE)
        match = re.match(pattern, effect)
        if match:
            new_effect= match.group(1) + ' decreased'
            temp_effects4.add(new_effect)
        else:
            temp_effects4.add(effect)

    # replace abbreviations
    temp_effects5 = set()
    for effect in temp_effects4:
        new_effect = effect.replace('AV', 'atrioventricular').replace('BP', 'blood pressure').replace('CNS', 'central nervous system').replace('CV', 'cardiovascular').replace('GI', 'gastrointestinal').replace('HR', 'heart rate').replace('NS', 'nervous system').replace('PNS','peripheral nervous system')
        temp_effects5.add(new_effect.strip(' '))
        
    return temp_effects5

In [11]:
for row in lynch_table.iterrows():
    accession = row[1]['Accession']
    target = row[1]['ChEMBL target name']
    reference = row[1]['Reference']
    pmid = row[1]['PubMed ID']
    
    effects = re.split('; |;', row[1]['Original effects'])
    
    for original_effect in effects:
        formatted_effects = lynch_transformations(original_effect)
        for formatted_effect in formatted_effects:
            # capitalise
            new_effect = formatted_effect[:1].upper() + formatted_effect[1:]
            flattened_aes.add((accession, target, original_effect, new_effect, pmid, reference))

#### Bowes et al. 

In [12]:
bowes_table = pd.read_excel(basedir + '/data/Bowes_table_manually_fixed_mapped.xlsx')

In [13]:
bowes_table['Original effects'] = bowes_table['Agonism or activation'] + '; ' + bowes_table['Antagonism or inhibition']
bowes_table['Original effects'] = bowes_table['Original effects'].apply(lambda x: x.replace('Insufficient information; ', '').replace('; Insufficient information', ''))
bowes_table['PubMed ID'] = 'PMID23197038'
bowes_table['Reference'] = "Bowes J, Brown AJ, Hamon J, Jarolimek W, Sridhar A, Waldron G, Whitebread S. Reducing safety-related drug attrition: the use of in vitro pharmacological profiling. Nat Rev Drug Discov. 2012 Dec;11(12):909-22."

In [14]:
def bowes_transformations(effect):

    # replace down arrows, keep rest of words
    pattern = re.compile('↓ in ([a-z 0-9]+)', re.IGNORECASE)
    def replace_arrow(match):
        value = match.group(1).strip(' ') + ' decreased '
        return value
    new_effect = pattern.sub(replace_arrow, effect).strip(' ')
    
    # replace up arrows, keep rest of words
    pattern = re.compile('↑ in ([a-z 0-9]+)', re.IGNORECASE)
    def replace_arrow(match):
        value = match.group(1).strip(' ') + ' increased '
        return value
    new_effect2 = pattern.sub(replace_arrow, new_effect).strip(' ')
    
    # replace abbreviations
    new_effect3 = new_effect2.replace('BP', 'blood pressure').replace('CNS', 'central nervous system').replace('CVS', 'cardiovascular system').replace('GI', 'gastrointestinal').replace('HR', 'heart rate').replace('CO', 'cardiac output').replace('REM', 'rapid eye movement').replace('DDI', 'drug-drug interaction').replace('ACTH', 'adrenocorticotropic hormone').replace('SCID', 'severe-combined immunodeficiency')
    
    return new_effect3.strip(' ')

In [15]:
for row in bowes_table.iterrows():
    accession = row[1]['Accession']
    target = row[1]['ChEMBL target name']
    reference = row[1]['Reference']
    pmid = row[1]['PubMed ID']
    
    effects = re.split('; | and |, ', row[1]['Original effects'])
    
    for original_effect in effects:
        formatted_effect = bowes_transformations(original_effect)
        new_effect = formatted_effect[:1].upper() + formatted_effect[1:]
        flattened_aes.add((accession, target, original_effect, new_effect, pmid, reference))

#### Whitebread et al. 

In [16]:
whitebread_table = pd.read_excel(basedir + '/data/Whitebread_table_manually_fixed_mapped.xlsx')

In [17]:
whitebread_table['Possible ADRs'] = whitebread_table['Possible ADRs'].apply(lambda x: x.strip('.'))
whitebread_table['PubMed ID'] = 'PMID16243262'
whitebread_table['Reference'] = "Whitebread S, Hamon J, Bojanic D, Urban L. Keynote review: in vitro safety pharmacology profiling: an essential tool for successful drug development. Drug Discov Today. 2005 Nov 1;10(21):1421-33."

In [18]:
def whitebread_transformations(effect):
    """Return set of effects after replacing (enhances|inhibits|decreases|reduces) etc. with increased/decreased using regular expressions.
    kwargs: effect_list -- list of effects from table"""
    
    # replace 'increased' words, keep rest of words
    pattern = re.compile('(?:increases?|enhances?|induces?|facilitates?|exacerbates?|facilitation of) ([a-z 0-9]+)', re.IGNORECASE)
    def replace_arrow(match):
        value = match.group(1) + ' increased'
        return value
    new_effect = pattern.sub(replace_arrow, effect)
    
    # replace 'decreased' words, keep rest of words
    pattern = re.compile('(?:inhibits?|decreases?|reduces?|impairs?) ([a-z 0-9]+)', re.IGNORECASE)
    def replace_arrow(match):
        value = match.group(1) + ' decreased'
        return value
    new_effect2 = pattern.sub(replace_arrow, new_effect)
    
    # replace abbreviations
    new_effect3 = new_effect2.replace('CNS', 'central nervous system')
    
    return new_effect3.strip(' ')

In [19]:
for row in whitebread_table.iterrows():
    accession = row[1]['Accession']
    target = row[1]['ChEMBL target name']
    reference = row[1]['Reference']
    pmid = row[1]['PubMed ID']
    
    effects = re.split(', |\. | and ', row[1]['Possible ADRs'])
    
    for original_effect in effects:
        formatted_effect = whitebread_transformations(original_effect)
        new_effect = formatted_effect[:1].upper() + formatted_effect[1:]
        flattened_aes.add((accession, target, original_effect, new_effect, pmid, reference))

In [20]:
my_columns = ['Accession', 'ChEMBL target name', 'Original effect', 'Formatted effect', 'PubMed ID', 'Reference']
complete_ae_df = pd.DataFrame.from_records(list(flattened_aes), columns = my_columns).sort_values(by=['PubMed ID', 'ChEMBL target name', 'Original effect'])
complete_ae_df.to_excel(basedir + '/data/safety_original2formatted_effects.xls', index=False)