In [27]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
import pandas as pd
import numpy as np
import re
from src.config import *

# Filter LINCS coumpound matedata
- Read in metadata and ligand-receptor interaction resource
- Set sign to activators and inhibitors and filter items where direction of interaction is not known
- Save modified metadata for LINCS coupounds

## Read in data

In [29]:
cp_info = pd.read_table(f'{LINCS_DATA_DIR}/compoundinfo_beta.txt', low_memory=False)
cp_info = cp_info.dropna(subset = 'moa')
cp_info_rec = cp_info[cp_info.moa.str.contains('receptor', case = False)]

In [30]:
# omnipath signed ligand-receptor interactions
lr_interactions = pd.read_csv('data/receptor_ligand_association/liana_omni_receptor_ligand_interactions_curated.csv', index_col =0)


In [31]:
targets = set(lr_interactions.target_genesymbol)
moas = set(cp_info_rec.moa)

In [32]:
data = cp_info[cp_info.target.isin(targets) | cp_info.moa.isin(moas)]

## Set sign

In [33]:
activators=['agonist', 'activator', 'stimulant', 'enhancer', 'reactivator', 'inducer', 'positive']
inhibitors=['inhibitor', 'antagonist', 'blocker', 'downregulator', 'destabilizer', 'negative']

In [34]:
def add_sign(s):
    if s['activator'] ==1:
        return 1
    elif s['inhibitor'] ==1:
        return -1
    else: 
        return 0

In [35]:
data["activator"] = data['moa'].apply(lambda x: 1 if any(i in activators for i in re.split('-| ', x.lower())) else 0)
data["inhibitor"] = data['moa'].apply(lambda x: 1 if any(i in inhibitors for i in re.split('-| ', x.lower())) else 0)
data['sign'] = data.apply(add_sign, axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["activator"] = data['moa'].apply(lambda x: 1 if any(i in activators for i in re.split('-| ', x.lower())) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["inhibitor"] = data['moa'].apply(lambda x: 1 if any(i in inhibitors for i in re.split('-| ', x.lower())) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#re

In [36]:
# delete rows with sign = 0 - inhibitor or activator synonyms not in rows
data[(data['cmap_name'] == 'AMN-082') & (data['target'] == 'GRM7')]
# be careful with it, because in some cases on the same receptor is in modulator (sign not given) and also activator... eg. 

Unnamed: 0,pert_id,cmap_name,target,moa,canonical_smiles,inchi_key,compound_aliases,activator,inhibitor,sign
36840,BRD-K23335153,AMN-082,GRM7,Glutamate receptor modulator,C(CNC(c1ccccc1)c1ccccc1)NC(c1ccccc1)c1ccccc1,DTZDSNQYNPNCPK-UHFFFAOYSA-N,,0,0,0
39285,BRD-K23335153,AMN-082,GRM7,Glutamate receptor positive allosteric modulator,C(CNC(c1ccccc1)c1ccccc1)NC(c1ccccc1)c1ccccc1,DTZDSNQYNPNCPK-UHFFFAOYSA-N,,1,0,1


In [37]:
# delete rows with sign = 0 - inhibitor or activator synonyms not in rows
data = data[data['sign'] != 0]

In [38]:
# There are some drugs that are agonists and antagonists on the same receptor target wg. melatonin/tramadol/TFMPP (20)
data[(data['cmap_name'] == 'TFMPP') & (data['target'] == 'HTR1D')]


Unnamed: 0,pert_id,cmap_name,target,moa,canonical_smiles,inchi_key,compound_aliases,activator,inhibitor,sign
774,BRD-K94887716,TFMPP,HTR1D,Serotonin receptor agonist,FC(F)(F)c1cccc(c1)N2CCNCC2,KKIMDKMETPPURN-UHFFFAOYSA-N,tfmpp,1,0,1
825,BRD-K94887716,TFMPP,HTR1D,Serotonin receptor antagonist,FC(F)(F)c1cccc(c1)N2CCNCC2,KKIMDKMETPPURN-UHFFFAOYSA-N,tfmpp,0,1,-1


Filter out drugs with multiple sign on the same receptor

In [39]:
drugs_to_delete = data[['cmap_name', 'target', 'sign']].groupby(['cmap_name', 'target']).nunique('sign')
drugs_to_delete = list(drugs_to_delete[drugs_to_delete.sign > 1].index.get_level_values(0))

In [40]:
data = data[~data.cmap_name.isin(drugs_to_delete)]

In [41]:
data.head(3)

Unnamed: 0,pert_id,cmap_name,target,moa,canonical_smiles,inchi_key,compound_aliases,activator,inhibitor,sign
604,BRD-K39381259,DMH1,ACVR1,ALK inhibitor,CC(C)Oc1ccc(cc1)-c1cnc2c(cnn2c1)-c1ccnc2ccccc12,JMIFGARJSWXZSH-UHFFFAOYSA-N,DMH-1,0,1,-1
627,BRD-K42828737,sunitinib,FLT1,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
628,BRD-K42828737,sunitinib,FLT3,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1


## Save data

In [43]:
data

Unnamed: 0,pert_id,cmap_name,target,moa,canonical_smiles,inchi_key,compound_aliases,activator,inhibitor,sign
604,BRD-K39381259,DMH1,ACVR1,ALK inhibitor,CC(C)Oc1ccc(cc1)-c1cnc2c(cnn2c1)-c1ccnc2ccccc12,JMIFGARJSWXZSH-UHFFFAOYSA-N,DMH-1,0,1,-1
627,BRD-K42828737,sunitinib,FLT1,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
628,BRD-K42828737,sunitinib,FLT3,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
629,BRD-K42828737,sunitinib,FLT4,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
630,BRD-K42828737,sunitinib,KDR,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
...,...,...,...,...,...,...,...,...,...,...
39309,BRD-A61599461,BRD-A61599461,TSHR,Thyroid-stimulating hormone receptor inverse a...,COc1ccc(cc1COc1c(C)cccc1C)C1Nc2ccccc2C(=O)N1Cc...,ODFGSMOTQLYMHU-UHFFFAOYSA-N,,1,0,1
39310,BRD-A81177136,KN-62,P2RX7,Calcium/calmodulin dependent protein kinase in...,CN(C(Cc1ccc(OS(=O)(=O)c2cccc3cnccc23)cc1)C(=O)...,RJVLFQBBRSMWHX-UHFFFAOYSA-N,,0,1,-1
39315,BRD-K99504665,goserelin-acetate,GNRHR,Gonadotropin releasing factor hormone receptor...,CC(C)C[C@H](NC(=O)[C@@H](COC(C)(C)C)NC(=O)[C@H...,BLCLNMBMMGCOAS-URPVMXJPSA-N,,1,0,1
39316,BRD-K62685538,triptorelin,GNRHR,Gonadotropin releasing factor hormone receptor...,CC(C)C[C@H](NC(=O)[C@@H](Cc1c[nH]c2ccccc12)NC(...,VXKHXGOKWPXYNA-PGBVPBMZSA-N,,1,0,1


In [44]:
data.to_csv('data/filtered_lincs_meta/filtered_coumpound_info_to_receptor_perturbation_signatures_signed.csv')

In [None]:
data.head()

Unnamed: 0,pert_id,cmap_name,target,moa,canonical_smiles,inchi_key,compound_aliases,activator,inhibitor,sign
604,BRD-K39381259,DMH1,ACVR1,ALK inhibitor,CC(C)Oc1ccc(cc1)-c1cnc2c(cnn2c1)-c1ccnc2ccccc12,JMIFGARJSWXZSH-UHFFFAOYSA-N,DMH-1,0,1,-1
627,BRD-K42828737,sunitinib,FLT1,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
628,BRD-K42828737,sunitinib,FLT3,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
629,BRD-K42828737,sunitinib,FLT4,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
630,BRD-K42828737,sunitinib,KDR,KIT inhibitor,CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2/C(=O)Nc3ccc(F)...,WINHZLLDWRZWRT-ATVHPVEESA-N,sunitinib-malate,0,1,-1
