### Make tables with quantified associations focusing on NS, cardio, respiratory, and hepato

In [2]:
import pandas as pd
import pickle

In [3]:
pd.set_option('display.max_columns', 200)

In [4]:
basedir = '/scratch/ias41/ae_code'

In [5]:
with open(basedir + '/analysis/data/dirs_info.pkl', 'rb') as f:
    dirs = pickle.load(f)
    
faers_data_unbound = dirs['20200110_faers_unbound_margin_pred_005_PRR2']
sider_data_unbound = dirs['20200110_sider_unbound_margin_pred']
faers_data_cutoff = dirs['20200110_faers_cutoff6_pred_005_PRR2']
sider_data_cutoff = dirs['20200110_sider_cutoff6_pred']

In [6]:
# Target information
target_info = pd.read_csv(basedir + '/ae_target_links/data/target_names.txt', sep='\t')
target_info = target_info.loc[target_info['accession_organism']=='Homo sapiens',:]

# Previously reported associations
# Known associations, merge with known hierarchy HLT
known_associations = pd.read_excel(basedir + '/prev_reported_safety_associations/data/safety_meddra_annotated_effects.xls')
known_associations['Annotated MedDRA PT'] = known_associations['Annotated MedDRA PT'].apply(lambda x: x.upper())
known_meddra_hier = pd.read_excel(basedir + '/prev_reported_safety_associations/data/safety_meddra_annotated_effects_for_hierarchy_output.xlsx', skiprows=4)
known_meddra_hier['PT'] = known_meddra_hier['PT'].apply(lambda x: x.upper())
known_meddra_hier[' Term'] = known_meddra_hier[' Term'].apply(lambda x: x.upper())
known_meddra_hier['HLT'] = known_meddra_hier['HLT'].apply(lambda x: x.upper())
known_meddra_hier_selection = known_meddra_hier.loc[known_meddra_hier['Primary SOC']=='Y',['PT','HLT',' Term']].drop_duplicates()
known_merged = known_associations.merge(known_meddra_hier_selection, left_on='Annotated MedDRA PT', right_on=' Term')

hlt_manual = pd.read_excel(basedir + '/prev_reported_safety_associations/data/safety_meddra_manually_annotated_hlt_effects.xls', index=False)
hlt_manual.rename(columns={'Annotated MedDRA HLT': 'HLT'}, inplace=True)
hlt_manual['HLT'] = hlt_manual['HLT'].apply(lambda x: x.upper())
hlt_manual.drop(columns=['Annotated MedDRA HLT Code'])

known_merged = pd.concat([known_merged, hlt_manual], sort=False).reset_index(drop=True)

# MedDRA hierchy
meddra_hier = pd.read_excel(basedir + '/analysis/data/all_faers_and_sider_aes_hier_output.xlsx', skiprows=4)
meddra_hier_selection = meddra_hier.loc[meddra_hier['Primary SOC']=='Y',[' Term','HLT','SOC','PT']].drop_duplicates()
meddra_hier_selection['HLT'] = meddra_hier_selection['HLT'].apply(lambda x: x.upper())

# Known associations
known_hlt_tuples = set([(x[1]['Accession'], x[1]['HLT']) for x in known_merged.iterrows()])
known_pt_tuples = set([(x[1]['Accession'], x[1]['PT']) for x in known_merged.loc[~known_merged['PT'].isnull()].iterrows()])

In [7]:
target_class = pd.read_csv(basedir + '/analysis/data/target_classification_all_levels_r.txt', sep='\t')

def find_integrated(x):
    if x['level_2'] == 'Not available':
        return x['level_1']
    else:
        return x['level_2']

target_class['integrated_level'] = target_class.apply(find_integrated, axis=1)

In [8]:
faers_perf_unbound = pd.read_csv(basedir + '/ae_target_links/output/' + faers_data_unbound['dir'] + '/sign_assoc_performance.txt', sep='\t').merge(target_class, on='accession')
sider_perf_unbound = pd.read_csv(basedir + '/ae_target_links/output/' + sider_data_unbound['dir'] + '/sign_assoc_performance.txt', sep='\t').merge(target_class, on='accession')
faers_perf_cutoff = pd.read_csv(basedir + '/ae_target_links/output/' + faers_data_cutoff['dir'] + '/sign_assoc_performance.txt', sep='\t').merge(target_class, on='accession')
sider_perf_cutoff = pd.read_csv(basedir + '/ae_target_links/output/' + sider_data_cutoff['dir'] + '/sign_assoc_performance.txt', sep='\t').merge(target_class, on='accession')

In [9]:
faers_perf_unbound_merged = faers_perf_unbound.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
faers_perf_unbound_merged['dataset'] = 'FAERS'
sider_perf_unbound_merged = sider_perf_unbound.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
sider_perf_unbound_merged['dataset'] = 'SIDER'
faers_perf_cutoff_merged = faers_perf_cutoff.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
faers_perf_cutoff_merged['dataset'] = 'FAERS'
sider_perf_cutoff_merged = sider_perf_cutoff.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
sider_perf_cutoff_merged['dataset'] = 'SIDER'

In [10]:
sider_perf_unbound_merged.loc[(sider_perf_unbound_merged['ae_hit_rate']>0.5)&(sider_perf_unbound_merged['PPV']>0.5)]

Unnamed: 0,accession,nr compounds,nr compounds with AE,ae_hit_rate,nr compounds without AE,nae_hit_rate,nr compounds active,nr compounds inactive,Adverse Event,Likelihood Ratio,p-value,activity_vector,ae_vector,molregnos,active_molregnos,predicted_vector,corrected p-value,tid,pref_name,target_type,accession_organism,target_organism,PPV,ae_prevalence,specificity,PRU,improvement_over_prevalence,level_1,level_2,level_3,level_4,level_5,level_6,integrated_level,Term,HLT,SOC,PT,dataset


In [11]:
sider_perf_unbound_merged['PPV'].median(), faers_perf_unbound_merged['PPV'].median()

(0.375, 0.2307692307692308)

In [12]:
def find_pt_known(row):
    if ((row['accession'],row['Adverse Event'])) in known_pt_tuples:
        return 1
    else:
        return 0

def find_hlt_known(row):
    if ((row['accession'],row['HLT'])) in known_hlt_tuples:
        return 1
    else:
        return 0

for df in [faers_perf_unbound_merged, sider_perf_unbound_merged, faers_perf_cutoff_merged, sider_perf_cutoff_merged]:
    df['known_pt'] = df.apply(find_pt_known, axis=1)
    df['known_hlt'] = df.apply(find_hlt_known, axis=1)

In [13]:
# Combine FAERS and SIDER

unbound_combined = pd.concat([faers_perf_unbound_merged, sider_perf_unbound_merged])
cutoff_combined = pd.concat([faers_perf_cutoff_merged, sider_perf_cutoff_merged])

### Most highly ranked (by PPV) target- AE associations in SOC priority classes

In [14]:
top10_faers = faers_perf_unbound_merged.loc[faers_perf_unbound_merged['SOC'].isin(['Vascular disorders','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders'])].sort_values(by='improvement_over_prevalence', ascending=False).head(10)[['pref_name', 'integrated_level','Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'dataset', 'improvement_over_prevalence']]
top10_faers

Unnamed: 0,pref_name,integrated_level,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,dataset,improvement_over_prevalence
129,Angiotensin-converting enzyme,Protease,HYPOVOLAEMIC SHOCK,Vascular disorders,0.6,43.8,0,0.033113,0.3,10,FAERS,0.566887
219,Delta opioid receptor,Family A G protein-coupled receptor,RESPIRATORY DEPRESSION,"Respiratory, thoracic and mediastinal disorders",0.454545,20.438596,1,0.039175,0.263158,19,FAERS,0.41537
91,Alpha-1b adrenergic receptor,Family A G protein-coupled receptor,ORTHOSTATIC HYPOTENSION,Vascular disorders,0.454545,15.198413,1,0.05198,0.238095,21,FAERS,0.402565
216,HERG,Voltage-gated ion channel,TORSADE DE POINTES,Cardiac disorders,0.466667,9.515625,0,0.084211,0.175,40,FAERS,0.382456
218,Kappa opioid receptor,Family A G protein-coupled receptor,RESPIRATORY DEPRESSION,"Respiratory, thoracic and mediastinal disorders",0.416667,17.030075,0,0.040254,0.263158,19,FAERS,0.376412
130,Carbonic anhydrase IV,Lyase,HYPERAMMONAEMIC ENCEPHALOPATHY,Nervous system disorders,0.375,36.6,0,0.016129,0.6,5,FAERS,0.358871
59,Alpha-1a adrenergic receptor,Family A G protein-coupled receptor,CEREBROVASCULAR DISORDER,Nervous system disorders,0.363636,18.909091,0,0.029333,0.363636,11,FAERS,0.334303
60,Dopamine D4 receptor,Family A G protein-coupled receptor,CEREBROVASCULAR DISORDER,Nervous system disorders,0.363636,16.843537,0,0.032813,0.190476,21,FAERS,0.330824
144,Dopamine D4 receptor,Family A G protein-coupled receptor,NEUROLEPTIC MALIGNANT SYNDROME,Nervous system disorders,0.363636,13.494505,0,0.040625,0.153846,26,FAERS,0.323011
90,Alpha-1a adrenergic receptor,Family A G protein-coupled receptor,ORTHOSTATIC HYPOTENSION,Vascular disorders,0.363636,11.333333,1,0.048,0.222222,18,FAERS,0.315636


In [19]:
faers_perf_unbound_pos_merged.loc[(faers_perf_unbound_pos_merged['Adverse Event'].str.contains('NEUROLEPTIC MALIGNANT'))&(faers_perf_unbound_pos_merged['pref_name'].str.contains('Dopam'))][['pref_name', 'Adverse Event', 'PPV', 'Likelihood Ratio', 'corrected p-value']]

Unnamed: 0,pref_name,Adverse Event,PPV,Likelihood Ratio,corrected p-value
17879,Dopamine D1 receptor,NEUROLEPTIC MALIGNANT SYNDROME,0.153846,2.928571,0.504538
17882,Dopamine D3 receptor,NEUROLEPTIC MALIGNANT SYNDROME,0.321429,8.336842,0.003539
17893,Dopamine transporter,NEUROLEPTIC MALIGNANT SYNDROME,0.083333,1.465035,0.537476
17895,Dopamine D5 receptor,NEUROLEPTIC MALIGNANT SYNDROME,0.142857,4.015152,0.293899
17897,Dopamine D4 receptor,NEUROLEPTIC MALIGNANT SYNDROME,0.363636,13.494505,0.042142
17914,Dopamine D2 receptor,NEUROLEPTIC MALIGNANT SYNDROME,0.296296,6.446461,0.013126


In [20]:
faers_perf_cutoff_merged.loc[faers_perf_cutoff_merged['SOC'].isin(['Vascular disorders','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders'])].sort_values(by='PPV', ascending=False).head(10)[['pref_name', 'Adverse Event','integrated_level', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'accession', 'improvement_over_prevalence']]

Unnamed: 0,pref_name,Adverse Event,integrated_level,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,accession,improvement_over_prevalence
550,Farnesyl diphosphate synthase,ISCHAEMIA,Transferase,Vascular disorders,0.8,126.5,0,0.030651,0.5,8,P14324,0.769349
1060,Receptor protein-tyrosine kinase erbB-2,COUGH,Kinase,"Respiratory, thoracic and mediastinal disorders",0.8,137.2,0,0.028329,0.2,20,P04626,0.771671
245,Serine/threonine-protein kinase B-raf,HEPATIC FUNCTION ABNORMAL,Kinase,Hepatobiliary disorders,0.714286,30.5,0,0.075758,0.142857,35,P15056,0.638528
592,Farnesyl diphosphate synthase,MYELOPATHY,Transferase,Nervous system disorders,0.6,63.75,0,0.022989,0.5,6,P14324,0.577011
322,Fibroblast growth factor receptor 1,HYPERTENSION,Kinase,Vascular disorders,0.6,28.071429,0,0.050725,0.142857,21,P11362,0.549275
651,Farnesyl diphosphate synthase,SINUS CONGESTION,Transferase,"Respiratory, thoracic and mediastinal disorders",0.6,42.0,0,0.034483,0.333333,9,P14324,0.565517
649,Farnesyl diphosphate synthase,NASAL SEPTUM DEVIATION,Transferase,"Respiratory, thoracic and mediastinal disorders",0.6,42.0,0,0.034483,0.333333,9,P14324,0.565517
636,Farnesyl diphosphate synthase,MITRAL VALVE INCOMPETENCE,Transferase,Cardiac disorders,0.6,42.0,0,0.034483,0.333333,9,P14324,0.565517
630,Farnesyl diphosphate synthase,HEPATIC STEATOSIS,Transferase,Hepatobiliary disorders,0.6,47.4375,0,0.030651,0.375,8,P14324,0.569349
619,Farnesyl diphosphate synthase,RALES,Transferase,"Respiratory, thoracic and mediastinal disorders",0.6,47.4375,0,0.030651,0.375,8,P14324,0.569349


In [21]:
faers_perf_cutoff_merged.loc[(faers_perf_cutoff_merged['SOC'].isin(['Investigations','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(faers_perf_cutoff_merged['pref_name']=='HERG')].sort_values(by='PPV', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE


In [22]:
top10_sider = sider_perf_unbound_merged.loc[sider_perf_unbound_merged['SOC'].isin(['Vascular disorders','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders'])].sort_values(by='improvement_over_prevalence', ascending=False).head(10)[['pref_name', 'integrated_level', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE','dataset', 'improvement_over_prevalence']]
top10_sider

Unnamed: 0,pref_name,integrated_level,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,dataset,improvement_over_prevalence
239,Carbonic anhydrase VB,Lyase,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.833333,42.75,0,0.104712,0.25,20,SIDER,0.728621
4,Muscarinic acetylcholine receptor M3,Family A G protein-coupled receptor,TREMOR,Nervous system disorders,0.875,29.677083,0,0.190855,0.072917,96,SIDER,0.684145
282,Muscarinic acetylcholine receptor M2,Family A G protein-coupled receptor,SOMNOLENCE,Nervous system disorders,1.0,inf,0,0.31746,0.038889,180,SIDER,0.68254
6,Muscarinic acetylcholine receptor M5,Family A G protein-coupled receptor,TREMOR,Nervous system disorders,0.875,27.708333,0,0.201681,0.097222,72,SIDER,0.673319
0,Muscarinic acetylcholine receptor M1,Family A G protein-coupled receptor,TREMOR,Nervous system disorders,0.875,27.513889,0,0.202817,0.097222,72,SIDER,0.672183
5,Muscarinic acetylcholine receptor M2,Family A G protein-coupled receptor,TREMOR,Nervous system disorders,0.857143,23.842105,1,0.201058,0.052632,114,SIDER,0.656085
29,Muscarinic acetylcholine receptor M2,Family A G protein-coupled receptor,NASAL CONGESTION,"Respiratory, thoracic and mediastinal disorders",0.714286,32.9375,0,0.070547,0.125,40,SIDER,0.643739
272,Carbonic anhydrase VA,Lyase,PARAESTHESIA,Nervous system disorders,1.0,inf,0,0.393519,0.070588,85,SIDER,0.606481
271,Carbonic anhydrase VB,Lyase,PARAESTHESIA,Nervous system disorders,1.0,inf,0,0.418848,0.075,80,SIDER,0.581152
241,Carbonic anhydrase VA,Lyase,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.666667,17.636364,0,0.101852,0.181818,22,SIDER,0.564815


In [23]:
sider_perf_unbound_merged.loc[(sider_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders','Investigations']))&(sider_perf_cutoff_merged['pref_name']=='HERG')].sort_values(by='PPV', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE


In [24]:
sider_perf_cutoff_merged.loc[(sider_perf_cutoff_merged['SOC'].isin(['Investigations','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(sider_perf_cutoff_merged['pref_name']=='HERG')].sort_values(by='PPV', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'HLT']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,HLT
2114,HERG,VENTRICULAR TACHYCARDIA,Cardiac disorders,0.411765,10.052,0,0.065104,0.28,25,VENTRICULAR ARRHYTHMIAS AND CARDIAC ARREST
2116,HERG,VENTRICULAR ARRHYTHMIA,Cardiac disorders,0.352941,9.428571,0,0.054688,0.285714,21,VENTRICULAR ARRHYTHMIAS AND CARDIAC ARREST
2115,HERG,TORSADE DE POINTES,Cardiac disorders,0.294118,11.011905,0,0.036458,0.357143,14,VENTRICULAR ARRHYTHMIAS AND CARDIAC ARREST
1396,HERG,ELECTROCARDIOGRAM QT PROLONGED,Investigations,0.235294,12.820513,1,0.023438,0.444444,9,ECG INVESTIGATIONS


In [25]:
sider_perf_cutoff_merged.loc[(sider_perf_cutoff_merged['SOC'].isin(['Investigations','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(sider_perf_cutoff_merged['pref_name']=='HERG')].sort_values(by='PPV', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE
2114,HERG,VENTRICULAR TACHYCARDIA,Cardiac disorders,0.411765,10.052,0,0.065104,0.28,25
2116,HERG,VENTRICULAR ARRHYTHMIA,Cardiac disorders,0.352941,9.428571,0,0.054688,0.285714,21
2115,HERG,TORSADE DE POINTES,Cardiac disorders,0.294118,11.011905,0,0.036458,0.357143,14
1396,HERG,ELECTROCARDIOGRAM QT PROLONGED,Investigations,0.235294,12.820513,1,0.023438,0.444444,9


In [26]:
sider_perf_cutoff_merged.loc[sider_perf_cutoff_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders'])].sort_values(by='PPV', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE
367,Muscarinic acetylcholine receptor M5,DIZZINESS,Nervous system disorders,0.904762,7.399038,0,0.562162,0.091346,208
664,Steryl-sulfatase,MYOCARDIAL INFARCTION,Cardiac disorders,0.857143,38.461538,0,0.134948,0.153846,39
667,Steryl-sulfatase,ACUTE CORONARY SYNDROME,Cardiac disorders,0.857143,37.35,0,0.138408,0.15,40
874,Tyrosine-protein kinase receptor UFO,DYSGEUSIA,Nervous system disorders,0.833333,22.297297,0,0.183168,0.135135,37
1587,Tyrosine-protein kinase receptor UFO,MEDIASTINAL DISORDER,"Respiratory, thoracic and mediastinal disorders",0.833333,29.827586,0,0.143564,0.172414,29
1574,Tyrosine-protein kinase SRC,DYSPNOEA,"Respiratory, thoracic and mediastinal disorders",0.833333,14.948454,0,0.250646,0.051546,97
1597,Tyrosine-protein kinase SRC,MEDIASTINAL DISORDER,"Respiratory, thoracic and mediastinal disorders",0.833333,42.195122,0,0.105943,0.121951,41
1640,Tyrosine-protein kinase receptor UFO,NERVOUS SYSTEM DISORDER,Nervous system disorders,0.833333,16.489362,0,0.232673,0.106383,47
1404,Tyrosine-protein kinase receptor UFO,HEPATOBILIARY DISEASE,Hepatobiliary disorders,0.833333,37.083333,0,0.118812,0.208333,24
363,Muscarinic acetylcholine receptor M1,DIZZINESS,Nervous system disorders,0.8125,4.150218,0,0.565432,0.117904,229


In [27]:
# Save copy of top 10 for use in writing

In [28]:
faers_sider_tops = pd.concat([top10_sider, top10_faers], sort=False)

In [29]:
faers_sider_tops.shape

(20, 12)

In [30]:
faers_sider_tops.rename(columns={'pref_name': 'Target', 'Likelihood Ratio':'LR','integrated_level': 'Target class', 'known_hlt': 'Previously reported', 'ae_prevalence': 'Prevalence', 'ae_hit_rate': 'Hit rate of drugs with AE', 'nr compounds with AE': 'Drugs with AE (n)', 'improvement_over_prevalence': 'PPV - Prevalence'}, inplace=True)

In [31]:
for column in ['PPV', 'Prevalence', 'Hit rate of drugs with AE', 'PPV - Prevalence']:
    faers_sider_tops[column] = faers_sider_tops[column].apply(lambda x: '{:.2f}'.format(x))
for column in ['LR']:
    faers_sider_tops[column] = faers_sider_tops[column].apply(lambda x: '{:.1f}'.format(x))


In [32]:
faers_sider_tops[['Target', 'Adverse Event', 'SOC', 'PPV','Prevalence', 'PPV - Prevalence', 'LR',
       'Previously reported', 'Hit rate of drugs with AE', 'Drugs with AE (n)',
       'dataset']].to_csv(basedir + '/analysis/results/unbound_margin_pred_faers_vs_sider/top10_PPVminPrev.txt', sep='\t', index=False)

In [34]:
faers_sider_tops[['Target', 'Adverse Event', 'SOC', 'PPV','Prevalence', 'PPV - Prevalence', 'LR',
       'Previously reported', 'Drugs with AE (n)',
       'dataset']].shape

(20, 10)

In [35]:
faers_sider_tops.columns

Index(['Target', 'Target class', 'Adverse Event', 'SOC', 'PPV', 'LR',
       'Previously reported', 'Prevalence', 'Hit rate of drugs with AE',
       'Drugs with AE (n)', 'dataset', 'PPV - Prevalence'],
      dtype='object')

### Associations with novel targets - no family members

In [None]:
faers_perf_unbound_merged['SOC'].isin(['Vascular disorders','Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders'])].sort_values(by='improvement_over_prevalence', ascending=False).head(10)[['pref_name', 'integrated_level','Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'dataset', 'improvement_over_prevalence']]
sider_perf_unbound_merged

In [16]:
novel_faers = top10_novel_faers = faers_perf_unbound_merged.loc[(faers_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(~faers_perf_unbound_merged['accession'].isin(known_targets))].sort_values(by='improvement_over_prevalence', ascending=False)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'improvement_over_prevalence']]
novel_faers['dataset'] = 'FAERS'

novel_sider = sider_perf_unbound_merged.loc[(sider_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(~sider_perf_unbound_merged['accession'].isin(known_targets))].sort_values(by='improvement_over_prevalence', ascending=False)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'improvement_over_prevalence']]
novel_sider['dataset'] = 'SIDER'

novel_both = pd.concat([novel_faers, novel_sider])

In [48]:
novel_both['pref_name'].drop_duplicates()

130                 Carbonic anhydrase IV
60                   Dopamine D4 receptor
197    Microtubule-associated protein tau
142                  Dopamine D3 receptor
3                   Carbonic anhydrase II
101          Alpha-1d adrenergic receptor
239                 Carbonic anhydrase VB
272                 Carbonic anhydrase VA
2            Serotonin 6 (5-HT6) receptor
240                Carbonic anhydrase XII
236                 Carbonic anhydrase IX
Name: pref_name, dtype: object

In [42]:
novel_both_no_family = novel_both.loc[novel_both['pref_name'].str.contains('Microtubule-associated protein tau|Carbonic')]

In [43]:
novel_both_no_family.sort_values(by='improvement_over_prevalence', inplace=True, ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [34]:
novel_both_no_family

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,improvement_over_prevalence,dataset
239,Carbonic anhydrase VB,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.833333,42.75,0,0.104712,0.25,20,0.728621,SIDER
272,Carbonic anhydrase VA,PARAESTHESIA,Nervous system disorders,1.0,inf,0,0.393519,0.070588,85,0.606481,SIDER
271,Carbonic anhydrase VB,PARAESTHESIA,Nervous system disorders,1.0,inf,0,0.418848,0.075,80,0.581152,SIDER
241,Carbonic anhydrase VA,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.666667,17.636364,0,0.101852,0.181818,22,0.564815,SIDER
240,Carbonic anhydrase XII,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.545455,11.4,0,0.095238,0.214286,28,0.450216,SIDER
270,Carbonic anhydrase VB,PULMONARY OEDEMA,"Respiratory, thoracic and mediastinal disorders",0.5,13.692308,0,0.068063,0.230769,13,0.431937,SIDER
130,Carbonic anhydrase IV,HYPERAMMONAEMIC ENCEPHALOPATHY,Nervous system disorders,0.375,36.6,0,0.016129,0.6,5,0.358871,FAERS
236,Carbonic anhydrase IX,HEPATIC NECROSIS,Hepatobiliary disorders,0.333333,19.875,0,0.02454,0.375,8,0.308793,SIDER
197,Microtubule-associated protein tau,LIVER INJURY,Hepatobiliary disorders,0.363636,6.233766,0,0.083969,0.727273,11,0.279667,FAERS
238,Carbonic anhydrase IX,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.333333,10.53913,0,0.070552,0.173913,23,0.262781,SIDER


In [44]:
novel_both_no_family.rename(columns={'pref_name': 'Target'
                                     , 'Likelihood Ratio':'Likelihood Ratio','integrated_level': 'Target class'
                                     , 'known_hlt': 'Previously reported'
                                     , 'ae_prevalence': 'Fraction of drugs with AE'
                                     , 'ae_hit_rate': 'Fraction of AE-associated drugs that are active'
                                     , 'nr compounds with AE': 'Number of drugs showing adverse event'
                                     , 'improvement_over_prevalence': 'Value-added PPV'}, inplace=True)

In [45]:
for column in ['PPV', 'Fraction of drugs with AE', 'Fraction of AE-associated drugs that are active', 'Value-added PPV']:
    novel_both_no_family[column] = novel_both_no_family[column].apply(lambda x: '{:.2f}'.format(x))
for column in ['Likelihood Ratio']:
    novel_both_no_family[column] = novel_both_no_family[column].apply(lambda x: '{:.1f}'.format(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [47]:
novel_both_no_family[['Target', 'Adverse Event', 'SOC', 'PPV','Fraction of drugs with AE','Value-added PPV', 'Likelihood Ratio',
       'Previously reported', 'Fraction of AE-associated drugs that are active', 'Number of drugs showing adverse event',
       'dataset']].to_csv(basedir + '/analysis/results/unbound_margin_pred_faers_vs_sider/novel_no_family.txt', sep='\t', index=False)

### Most highly ranked associations for novel targets

In [15]:
known_targets = set(known_merged['Accession'])

In [37]:
# Priority SOCs
top10_novel_faers = faers_perf_unbound_merged.loc[(faers_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(~faers_perf_unbound_merged['accession'].isin(known_targets))].sort_values(by='improvement_over_prevalence', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'improvement_over_prevalence']]
top10_novel_faers['dataset'] = 'FAERS'
top10_novel_faers

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,improvement_over_prevalence,dataset
130,Carbonic anhydrase IV,HYPERAMMONAEMIC ENCEPHALOPATHY,Nervous system disorders,0.375,36.6,0,0.016129,0.6,5,0.358871,FAERS
60,Dopamine D4 receptor,CEREBROVASCULAR DISORDER,Nervous system disorders,0.363636,16.843537,0,0.032813,0.190476,21,0.330824,FAERS
144,Dopamine D4 receptor,NEUROLEPTIC MALIGNANT SYNDROME,Nervous system disorders,0.363636,13.494505,0,0.040625,0.153846,26,0.323011,FAERS
197,Microtubule-associated protein tau,LIVER INJURY,Hepatobiliary disorders,0.363636,6.233766,0,0.083969,0.727273,11,0.279667,FAERS
142,Dopamine D3 receptor,NEUROLEPTIC MALIGNANT SYNDROME,Nervous system disorders,0.321429,8.336842,0,0.053763,0.36,25,0.267665,FAERS
205,Dopamine D4 receptor,PLEUROTHOTONUS,Nervous system disorders,0.272727,26.291667,0,0.014063,0.333333,9,0.258665,FAERS
3,Carbonic anhydrase II,SIMPLE PARTIAL SEIZURES,Nervous system disorders,0.214286,30.545455,0,0.00885,0.5,6,0.205436,FAERS
101,Alpha-1d adrenergic receptor,CARDIOTOXICITY,Cardiac disorders,0.24,8.727273,0,0.034921,0.272727,22,0.205079,FAERS
164,Dopamine D3 receptor,CONDUCTION DISORDER,Cardiac disorders,0.214286,7.187166,0,0.036559,0.352941,17,0.177727,FAERS


In [38]:
top10_novel_sider = sider_perf_unbound_merged.loc[(sider_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(~sider_perf_unbound_merged['accession'].isin(known_targets))].sort_values(by='improvement_over_prevalence', ascending=False).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'improvement_over_prevalence']]
top10_novel_sider['dataset'] = 'SIDER'
top10_novel_sider

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,improvement_over_prevalence,dataset
239,Carbonic anhydrase VB,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.833333,42.75,0,0.104712,0.25,20,0.728621,SIDER
272,Carbonic anhydrase VA,PARAESTHESIA,Nervous system disorders,1.0,inf,0,0.393519,0.070588,85,0.606481,SIDER
271,Carbonic anhydrase VB,PARAESTHESIA,Nervous system disorders,1.0,inf,0,0.418848,0.075,80,0.581152,SIDER
241,Carbonic anhydrase VA,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.666667,17.636364,0,0.101852,0.181818,22,0.564815,SIDER
2,Serotonin 6 (5-HT6) receptor,TREMOR,Nervous system disorders,0.714286,11.428571,0,0.179487,0.05102,98,0.534799,SIDER
240,Carbonic anhydrase XII,JAUNDICE CHOLESTATIC,Hepatobiliary disorders,0.545455,11.4,0,0.095238,0.214286,28,0.450216,SIDER
270,Carbonic anhydrase VB,PULMONARY OEDEMA,"Respiratory, thoracic and mediastinal disorders",0.5,13.692308,0,0.068063,0.230769,13,0.431937,SIDER
37,Serotonin 6 (5-HT6) receptor,NEUROLEPTIC MALIGNANT SYNDROME,Nervous system disorders,0.428571,24.84375,0,0.029304,0.1875,16,0.399267,SIDER
32,Alpha-1d adrenergic receptor,NASAL CONGESTION,"Respiratory, thoracic and mediastinal disorders",0.461538,12.05042,0,0.066406,0.176471,34,0.395132,SIDER
73,Dopamine D3 receptor,TARDIVE DYSKINESIA,Nervous system disorders,0.384615,27.890625,0,0.021918,0.625,8,0.362698,SIDER


In [39]:
top10_novel = pd.concat([top10_novel_sider, top10_novel_faers])

In [40]:
top10_novel.rename(columns={'pref_name': 'Target', 'Likelihood Ratio':'LR','integrated_level': 'Target class', 'known_hlt': 'Previously reported', 'ae_prevalence': 'Prevalence', 'ae_hit_rate': 'Hit rate', 'nr compounds with AE': 'Drugs with AE (n)', 'improvement_over_prevalence': 'PPV - Prevalence'}, inplace=True)

In [41]:
for column in ['PPV', 'Prevalence', 'Hit rate', 'PPV - Prevalence']:
    top10_novel[column] = top10_novel[column].apply(lambda x: '{:.2f}'.format(x))
for column in ['LR']:
    top10_novel[column] = top10_novel[column].apply(lambda x: '{:.1f}'.format(x))


In [42]:
top10_novel[['Target', 'Adverse Event', 'SOC', 'PPV','Prevalence','PPV - Prevalence', 'LR',
       'Previously reported', 'Hit rate', 'Drugs with AE (n)',
       'dataset']].to_csv(basedir + '/analysis/results/unbound_margin_pred_faers_vs_sider/top10_novel_PPVminPrev.txt', sep='\t', index=False)

In [43]:
top10_novel.shape

(19, 11)

#### Novel targets, top 3 associations each

In [None]:
novel_targets = list(unbound_combined.loc[~unbound_combined['accession'].isin(known_targets),'accession'].drop_duplicates())
novel_targets

In [None]:
novel_top_df = pd.DataFrame()
for target in novel_targets:
    target_df = unbound_combined.loc[(unbound_combined['accession']==target)&(unbound_combined['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders'])),:].sort_values(by='improvement_over_prevalence', ascending=False)[:2]
    novel_top_df = pd.concat([novel_top_df, target_df])

In [None]:
novel_top_df.sort_values(by='improvement_over_prevalence', ascending=False)

In [None]:
novel_top_df.rename(columns={'pref_name': 'Target', 'Likelihood Ratio':'LR','integrated_level': 'Target class', 'known_hlt': 'Previously reported', 'ae_prevalence': 'Prevalence', 'ae_hit_rate': 'Hit rate', 'nr compounds with AE': 'Drugs with AE (n)', 'improvement_over_prevalence': 'PPV - Prevalence'}, inplace=True)

In [None]:
for column in ['PPV', 'Prevalence', 'Hit rate', 'PPV - Prevalence']:
    novel_top_df[column] = novel_top_df[column].apply(lambda x: '{:.2f}'.format(x))
for column in ['LR']:
    novel_top_df[column] = novel_top_df[column].apply(lambda x: '{:.1f}'.format(x))


In [None]:
novel_top_df[['Target', 'Adverse Event', 'SOC', 'PPV', 'Prevalence', 'PPV - Prevalence', 'LR', 'Hit rate', 'Drugs with AE (n)',
       'dataset']].to_csv(basedir + '/analysis/results/unbound_margin_pred_faers_vs_sider/top2perTarget_novel_PPVminPrev.txt', sep='\t', index=False)

In [None]:
novel_top_df[['Target', 'Adverse Event', 'SOC', 'PPV', 'LR', 'Prevalence', 'Hit rate', 'Drugs with AE (n)',
       'dataset']].shape

### Novel target relationships - are they informative or other targets more predictive for same event?

In [44]:
faers_novel_target_AEs = list(faers_perf_unbound_merged.loc[(faers_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(~faers_perf_unbound_merged['accession'].isin(known_targets))]['Adverse Event'])
faers_novel_target_AEs

['SIMPLE PARTIAL SEIZURES',
 'CEREBROVASCULAR DISORDER',
 'CARDIOTOXICITY',
 'HYPERAMMONAEMIC ENCEPHALOPATHY',
 'NEUROLEPTIC MALIGNANT SYNDROME',
 'NEUROLEPTIC MALIGNANT SYNDROME',
 'CONDUCTION DISORDER',
 'LIVER INJURY',
 'PLEUROTHOTONUS']

In [47]:
faers_novel_plus_others = faers_perf_unbound_merged.loc[faers_perf_unbound_merged['Adverse Event'].isin(faers_novel_target_AEs)].sort_values(by=['Adverse Event','improvement_over_prevalence','pref_name'])[['pref_name', 'Adverse Event', 'SOC', 'improvement_over_prevalence','PPV', 'Likelihood Ratio','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'corrected p-value','accession']]

In [48]:
faers_novel_plus_others['novel target'] = faers_novel_plus_others['accession'].apply(lambda x: 0 if x in known_targets else 1)

In [51]:
faers_novel_plus_others.sort_values(by=['Adverse Event','PPV'])

Unnamed: 0,pref_name,Adverse Event,SOC,improvement_over_prevalence,PPV,Likelihood Ratio,ae_prevalence,ae_hit_rate,nr compounds with AE,corrected p-value,accession,novel target
98,Serotonin 2a (5-HT2a) receptor,CARDIOTOXICITY,Cardiac disorders,0.129143,0.162162,5.668203,0.033019,0.285714,21,0.041887,P28223,0
99,Serotonin 2b (5-HT2b) receptor,CARDIOTOXICITY,Cardiac disorders,0.16461,0.212121,5.397436,0.047511,0.333333,21,0.048521,P41595,0
97,Serotonin transporter,CARDIOTOXICITY,Cardiac disorders,0.192524,0.225806,8.471591,0.033283,0.318182,22,0.010683,P31645,0
101,Alpha-1d adrenergic receptor,CARDIOTOXICITY,Cardiac disorders,0.205079,0.24,8.727273,0.034921,0.272727,22,0.044693,P25100,1
100,Serotonin 2c (5-HT2c) receptor,CARDIOTOXICITY,Cardiac disorders,0.218002,0.259259,8.133333,0.041257,0.333333,21,0.01633,P28335,0
60,Dopamine D4 receptor,CEREBROVASCULAR DISORDER,Nervous system disorders,0.330824,0.363636,16.843537,0.032813,0.190476,21,0.030253,P21917,1
59,Alpha-1a adrenergic receptor,CEREBROVASCULAR DISORDER,Nervous system disorders,0.334303,0.363636,18.909091,0.029333,0.363636,11,0.01196,P35348,0
165,Serotonin 2a (5-HT2a) receptor,CONDUCTION DISORDER,Cardiac disorders,0.13386,0.162162,6.645161,0.028302,0.333333,18,0.023492,P28223,0
164,Dopamine D3 receptor,CONDUCTION DISORDER,Cardiac disorders,0.177727,0.214286,7.187166,0.036559,0.352941,17,0.043396,P35462,1
166,Serotonin 2c (5-HT2c) receptor,CONDUCTION DISORDER,Cardiac disorders,0.186859,0.222222,7.793651,0.035363,0.333333,18,0.036161,P28335,0


In [52]:
len(faers_novel_target_AEs)

9

In [None]:
# 9, take away 3 effects (no family members) > left with 6 unique AEs. Higher LR or PPV in 2 cases (pleurothotonus, NMS)

In [270]:
sider_perf_unbound_merged.loc[sider_perf_unbound_merged['Adverse Event']=='PLEUROTHOTONUS']

Unnamed: 0,accession,nr compounds,nr compounds with AE,ae_hit_rate,nr compounds without AE,nae_hit_rate,nr compounds active,nr compounds inactive,Adverse Event,Likelihood Ratio,p-value,activity_vector,ae_vector,molregnos,active_molregnos,predicted_vector,corrected p-value,tid,pref_name,target_type,accession_organism,target_organism,PPV,ae_prevalence,specificity,PRU,improvement_over_prevalence,level_1,level_2,level_3,level_4,level_5,level_6,integrated_level,Term,HLT,SOC,PT,dataset,known_pt,known_hlt


In [55]:
sider_novel_target_AEs = list(sider_perf_unbound_merged.loc[(sider_perf_unbound_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(~sider_perf_unbound_merged['accession'].isin(known_targets))]['Adverse Event'])
sider_novel_target_AEs

['TREMOR',
 'NASAL CONGESTION',
 'NEUROLEPTIC MALIGNANT SYNDROME',
 'NEUROLEPTIC MALIGNANT SYNDROME',
 'NEUROLEPTIC MALIGNANT SYNDROME',
 'EXTRAPYRAMIDAL DISORDER',
 'TARDIVE DYSKINESIA',
 'TARDIVE DYSKINESIA',
 'ASPHYXIA',
 'BRAIN OEDEMA',
 'THROAT TIGHTNESS',
 'THROAT TIGHTNESS',
 'PARKINSONISM',
 'LARYNGEAL OEDEMA',
 'HEPATIC NECROSIS',
 'HEPATIC NECROSIS',
 'JAUNDICE CHOLESTATIC',
 'JAUNDICE CHOLESTATIC',
 'JAUNDICE CHOLESTATIC',
 'JAUNDICE CHOLESTATIC',
 'PULMONARY OEDEMA',
 'PARAESTHESIA',
 'PARAESTHESIA',
 'OROPHARYNGEAL PAIN',
 'OROPHARYNGEAL DISCOMFORT',
 'LOSS OF CONSCIOUSNESS']

In [56]:
pd.set_option('display.max_rows',100)

In [57]:
sider_novel_plus_others = sider_perf_unbound_merged.loc[sider_perf_unbound_merged['Adverse Event'].isin(sider_novel_target_AEs)].sort_values(by=['Adverse Event','improvement_over_prevalence','pref_name'])[['pref_name', 'Adverse Event', 'SOC', 'improvement_over_prevalence','PPV', 'Likelihood Ratio','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'corrected p-value', 'known_hlt', 'accession']]

In [58]:
sider_novel_plus_others['novel target'] = sider_novel_plus_others['accession'].apply(lambda x: 0 if x in known_targets else 1)

In [59]:
sider_novel_plus_others

Unnamed: 0,pref_name,Adverse Event,SOC,improvement_over_prevalence,PPV,Likelihood Ratio,ae_prevalence,ae_hit_rate,nr compounds with AE,corrected p-value,known_hlt,accession,novel target
118,Dopamine D2 receptor,ASPHYXIA,"Respiratory, thoracic and mediastinal disorders",0.186631,0.2,18.45,0.013369,0.6,5,0.005620594,0,P14416,0
114,Muscarinic acetylcholine receptor M1,ASPHYXIA,"Respiratory, thoracic and mediastinal disorders",0.235915,0.25,23.333333,0.014085,0.4,5,0.04293683,0,P11229,0
119,Muscarinic acetylcholine receptor M5,ASPHYXIA,"Respiratory, thoracic and mediastinal disorders",0.235994,0.25,23.466667,0.014006,0.4,5,0.04424855,0,P08912,0
117,Muscarinic acetylcholine receptor M3,ASPHYXIA,"Respiratory, thoracic and mediastinal disorders",0.24006,0.25,33.2,0.00994,0.4,5,0.03160968,0,P20309,0
116,Serotonin 6 (5-HT6) receptor,ASPHYXIA,"Respiratory, thoracic and mediastinal disorders",0.276557,0.285714,43.28,0.009158,0.4,5,0.04140547,0,P50406,1
115,Dopamine D1 receptor,ASPHYXIA,"Respiratory, thoracic and mediastinal disorders",0.386631,0.4,49.2,0.013369,0.4,5,0.02288373,0,P21728,0
206,Serotonin 2a (5-HT2a) receptor,BRAIN OEDEMA,Nervous system disorders,0.175092,0.190476,15.058824,0.015385,0.5,8,0.009862467,0,P28223,0
205,Dopamine D3 receptor,BRAIN OEDEMA,Nervous system disorders,0.208851,0.230769,13.3875,0.021918,0.375,8,0.03506849,0,P35462,1
207,Dopamine D2 receptor,BRAIN OEDEMA,Nervous system disorders,0.245276,0.266667,16.636364,0.02139,0.5,8,0.002216152,0,P14416,0
204,Dopamine D1 receptor,BRAIN OEDEMA,Nervous system disorders,0.37861,0.4,30.5,0.02139,0.25,8,0.04052638,0,P21728,0


In [60]:
len(set(sider_novel_target_AEs))

17

In [None]:
# 17, take away 5 (HEPATIC NECROSIS, JAUNDICE CHOLESTATIC, PARAESTHESIA, PULMONARY OEDEMA, LOSS OF CONSCIOUSNESS), higher PPV or LR in 0 cases ()

In [63]:
1 - (2 / (12 + 6))

0.8888888888888888

In [288]:
known_merged.loc[(~known_merged['PT'].isnull())&(known_merged['PT'].str.contains('SEDATION'))]

Unnamed: 0,Accession,ChEMBL target name,Original effect,Formatted effect,PubMed ID,Reference,Annotated MedDRA PT,Annotated MedDRA PT Code,PT,HLT,Term,Annotated MedDRA HLT Code
646,P08913,Alpha-2a adrenergic receptor,sedation,Sedation,PMID23197038,"Bowes J, Brown AJ, Hamon J, Jarolimek W, Sridh...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
647,P08913,Alpha-2a adrenergic receptor,sedation,Sedation,PMID28216264,"Lynch JJ 3rd, Van Vleet TR, Mittelstadt SW, Bl...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
648,P18089,Alpha-2b adrenergic receptor,sedation,Sedation,PMID28216264,"Lynch JJ 3rd, Van Vleet TR, Mittelstadt SW, Bl...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
649,P14867,GABA receptor alpha-1 subunit,sedation,Sedation,PMID23197038,"Bowes J, Brown AJ, Hamon J, Jarolimek W, Sridh...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
650,P35367,Histamine H1 receptor,Sedation,Sedation,PMID23197038,"Bowes J, Brown AJ, Hamon J, Jarolimek W, Sridh...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
651,P35367,Histamine H1 receptor,sedation,Sedation,PMID28216264,"Lynch JJ 3rd, Van Vleet TR, Mittelstadt SW, Bl...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
652,Q9Y5N1,Histamine H3 receptor,causes sedation,Causes sedation,PMID16243262,"Whitebread S, Hamon J, Bojanic D, Urban L. Key...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
653,P41145,Kappa opioid receptor,sedation,Sedation,PMID23197038,"Bowes J, Brown AJ, Hamon J, Jarolimek W, Sridh...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
654,P35372,Mu opioid receptor,Sedation,Sedation,PMID23197038,"Bowes J, Brown AJ, Hamon J, Jarolimek W, Sridh...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,
655,P35372,Mu opioid receptor,sedation,Sedation,PMID28216264,"Lynch JJ 3rd, Van Vleet TR, Mittelstadt SW, Bl...",SEDATION,10039897.0,SEDATION,DISTURBANCES IN CONSCIOUSNESS NEC,SEDATION,


### Examples of known associations lowest ranking /not significant (of positive ones tested).

In [16]:
faers_perf_unbound_pos = pd.read_csv(basedir + '/ae_target_links/output/' + faers_data_unbound['dir'] + '/pos_assoc_performance.txt', sep='\t')
sider_perf_unbound_pos = pd.read_csv(basedir + '/ae_target_links/output/' + sider_data_unbound['dir'] + '/pos_assoc_performance.txt', sep='\t')
faers_perf_cutoff_pos = pd.read_csv(basedir + '/ae_target_links/output/' + faers_data_cutoff['dir'] + '/pos_assoc_performance.txt', sep='\t')
sider_perf_cutoff_pos = pd.read_csv(basedir + '/ae_target_links/output/' + sider_data_cutoff['dir'] + '/pos_assoc_performance.txt', sep='\t')

In [17]:
faers_perf_unbound_pos_merged = faers_perf_unbound_pos.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
sider_perf_unbound_pos_merged = sider_perf_unbound_pos.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
faers_perf_cutoff_pos_merged = faers_perf_cutoff_pos.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')
sider_perf_cutoff_pos_merged = sider_perf_cutoff_pos.merge(meddra_hier_selection, left_on='Adverse Event', right_on=' Term')

In [18]:
for df in [faers_perf_unbound_pos_merged, sider_perf_unbound_pos_merged, faers_perf_cutoff_pos_merged, sider_perf_cutoff_pos_merged]:
    df['known_pt'] = df.apply(find_pt_known, axis=1)
    df['known_hlt'] = df.apply(find_hlt_known, axis=1)

In [52]:
faers_perf_unbound_pos_merged.loc[(faers_perf_unbound_pos_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(faers_perf_unbound_pos_merged['known_pt']==1)].sort_values(by='PPV', ascending=True).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'corrected p-value', 'known_pt']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,corrected p-value,known_pt
38154,Muscarinic acetylcholine receptor M2,CARDIAC FAILURE,Cardiac disorders,0.0,1.469298,1,0.034582,0.041667,24,0.600691,1
17251,Acetylcholinesterase,COMA,Nervous system disorders,0.0,2.629252,1,0.059611,0.020408,49,0.366785,1
58860,Serotonin transporter,TREMOR,Nervous system disorders,0.032258,1.969697,1,0.016641,0.090909,11,0.596211,1
38160,Dopamine D2 receptor,CARDIAC FAILURE,Cardiac disorders,0.037037,1.174359,1,0.031712,0.066667,15,0.670959,1
26154,Alpha-2a adrenergic receptor,SEDATION,Nervous system disorders,0.037037,1.012146,1,0.036609,0.052632,19,1.0,1
24811,Muscarinic acetylcholine receptor M3,INCREASED BRONCHIAL SECRETION,"Respiratory, thoracic and mediastinal disorders",0.045455,5.857143,1,0.008065,0.2,5,0.6372,1
38159,Alpha-2b adrenergic receptor,CARDIAC FAILURE,Cardiac disorders,0.045455,1.56044,1,0.029613,0.076923,13,0.645424,1
26153,Alpha-2b adrenergic receptor,SEDATION,Nervous system disorders,0.045455,1.445578,1,0.031891,0.071429,14,0.645424,1
25047,Muscarinic acetylcholine receptor M3,COUGH,"Respiratory, thoracic and mediastinal disorders",0.045455,2.904762,1,0.016129,0.1,10,0.6372,1
25037,Muscarinic acetylcholine receptor M1,COUGH,"Respiratory, thoracic and mediastinal disorders",0.047619,4.46,1,0.011086,0.2,5,0.641945,1


In [251]:
sider_perf_unbound_pos_merged.loc[(sider_perf_unbound_pos_merged['SOC'].isin(['Nervous system disorders', 'Hepatobiliary disorders','Cardiac disorders', 'Respiratory, thoracic and mediastinal disorders']))&(sider_perf_unbound_pos_merged['known_pt']==1)].sort_values(by='PPV', ascending=True).head(10)[['pref_name', 'Adverse Event', 'SOC', 'PPV', 'Likelihood Ratio', 'known_hlt','ae_prevalence','ae_hit_rate', 'nr compounds with AE', 'corrected p-value', 'known_pt']]

Unnamed: 0,pref_name,Adverse Event,SOC,PPV,Likelihood Ratio,known_hlt,ae_prevalence,ae_hit_rate,nr compounds with AE,corrected p-value,known_pt
5792,Dopamine D1 receptor,CARDIAC FAILURE,Cardiac disorders,0.0,2.974138,1,0.07754,0.034483,29,0.430987,1
10759,Serotonin 2a (5-HT2a) receptor,MYOCLONUS,Nervous system disorders,0.047619,3.2,1,0.015385,0.125,8,0.467008,1
11389,Serotonin 2a (5-HT2a) receptor,SEROTONIN SYNDROME,Nervous system disorders,0.047619,5.15,1,0.009615,0.2,5,0.42181,1
5809,Alpha-2b adrenergic receptor,CARDIAC FAILURE,Cardiac disorders,0.083333,1.016234,1,0.082111,0.035714,28,1.0,1
4412,Serotonin 2a (5-HT2a) receptor,HYPERREFLEXIA,Nervous system disorders,0.095238,5.976608,1,0.017308,0.222222,9,0.253689,1
3407,Delta opioid receptor,RESPIRATORY DEPRESSION,"Respiratory, thoracic and mediastinal disorders",0.111111,1.940217,1,0.060526,0.043478,23,0.619775,1
8988,Serotonin 1b (5-HT1b) receptor,MEMORY IMPAIRMENT,Nervous system disorders,0.125,3.805195,1,0.036184,0.090909,11,0.495199,1
11417,Muscarinic acetylcholine receptor M2,PULMONARY CONGESTION,"Respiratory, thoracic and mediastinal disorders",0.142857,15.583333,1,0.010582,0.166667,6,0.255513,1
7915,Acetylcholinesterase,INCREASED BRONCHIAL SECRETION,"Respiratory, thoracic and mediastinal disorders",0.142857,21.333333,1,0.007752,0.2,5,0.398461,1
6242,Alpha-1a adrenergic receptor,ARRHYTHMIA,Cardiac disorders,0.142857,1.022222,1,0.140187,0.022222,45,1.0,1


In [296]:
sider_perf_unbound_merged.loc[sider_perf_unbound_merged['pref_name']=='Muscarinic acetylcholine receptor M2']

Unnamed: 0,accession,nr compounds,nr compounds with AE,ae_hit_rate,nr compounds without AE,nae_hit_rate,nr compounds active,nr compounds inactive,Adverse Event,Likelihood Ratio,p-value,activity_vector,ae_vector,molregnos,active_molregnos,predicted_vector,corrected p-value,tid,pref_name,target_type,accession_organism,target_organism,PPV,ae_prevalence,specificity,PRU,improvement_over_prevalence,level_1,level_2,level_3,level_4,level_5,level_6,integrated_level,Term,HLT,SOC,PT,dataset,known_pt,known_hlt
5,P08172,567,114,0.052632,453,0.002208,7.0,560.0,TREMOR,23.842105,0.000347,"[1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, ...","[1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, ...","[573, 3965, 6216, 6643, 6968, 8917, 27368, 276...","[573, 3965, 6216, 27368, 27629, 77779, 429737]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.033645,47,Muscarinic acetylcholine receptor M2,SINGLE PROTEIN,Homo sapiens,Homo sapiens,0.857143,0.201058,0.997792,0.821192,0.656085,Membrane receptor,Family A G protein-coupled receptor,Small molecule receptor (family A GPCR),Monoamine receptor,Acetylcholine receptor,,Family A G protein-coupled receptor,TREMOR,TREMOR (EXCL CONGENITAL),Nervous system disorders,Tremor,SIDER,1,1
29,P08172,567,40,0.125,527,0.003795,7.0,560.0,NASAL CONGESTION,32.9375,2.6e-05,"[1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, ...","[1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, ...","[573, 3965, 6216, 6643, 6968, 8917, 27368, 276...","[573, 3965, 6216, 27368, 27629, 77779, 429737]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.007534,47,Muscarinic acetylcholine receptor M2,SINGLE PROTEIN,Homo sapiens,Homo sapiens,0.714286,0.070547,0.996205,0.6926,0.643739,Membrane receptor,Family A G protein-coupled receptor,Small molecule receptor (family A GPCR),Monoamine receptor,Acetylcholine receptor,,Family A G protein-coupled receptor,NASAL CONGESTION,NASAL CONGESTION AND INFLAMMATIONS,"Respiratory, thoracic and mediastinal disorders",Nasal congestion,SIDER,0,0
282,P08172,567,180,0.038889,387,0.0,7.0,560.0,SOMNOLENCE,inf,0.0003,"[1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, ...","[1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, ...","[573, 3965, 6216, 6643, 6968, 8917, 27368, 276...","[573, 3965, 6216, 27368, 27629, 77779, 429737]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.033645,47,Muscarinic acetylcholine receptor M2,SINGLE PROTEIN,Homo sapiens,Homo sapiens,1.0,0.31746,1.0,1.0,0.68254,Membrane receptor,Family A G protein-coupled receptor,Small molecule receptor (family A GPCR),Monoamine receptor,Acetylcholine receptor,,Family A G protein-coupled receptor,SOMNOLENCE,DISTURBANCES IN CONSCIOUSNESS NEC,Nervous system disorders,Somnolence,SIDER,0,0
