# Tissues

Assign tissue from assay descriptions, where possible.

In [1]:
from itertools import chain, combinations

In [2]:
def insert_last_after(df, col):

    cols = data.columns.values.tolist()

    position = cols.index(col) + 1

    return df[cols[:position] + [cols[-1]] + cols[position:-1]]

In [3]:
# Load curve data (IC50, Ki, Kd etc)...

data = pd.read_pickle('data.pkl')

data = data.loc[:, data.columns.values != 'tissue'] # Make sure 'tissue' column, if already present, is not included

data.shape

(1317, 29)

In [4]:
description = data['description'].drop_duplicates()

description.size

135

In [5]:
terms = {
      'Brain':           ['brain', 'cortex', 'cortical', 'cerebral', 'cerebrum', 'neuron', 'neuronal', 'ganglion']
    , 'Heart':           ['heart', 'cardiac', 'myocardium', 'myocardial', 'myocytes', 'ventricle', 'atrium', 'atrial'] # 'verapamil', 'diltiazem'
    , 'Smooth Muscle':   ['smooth muscle', 'artery', 'aortic', 'aorta', 'ileum'] # 'nitrendipine', 'dihydropyridine', 'DHP'
    , 'Skeletal Muscle': ['skeletal muscle', 'striated muscle']
    , 'Secretory':       ['pituitary', 'GH4C1']
}

hits = {x: description[description.str.contains(r'\b(?:' + '|'.join(y) + r')\b', case=False)].tolist() for x, y in terms.items()}

In [6]:
# Descriptions unmatched...

description.size - len(set(chain.from_iterable(hits.values())))

54

In [7]:
description[~description.isin(set(chain.from_iterable(hits.values())))]

336                                                 Reduction in amplitude of calcium-dependent slow-response action potential by blocking voltage sensitive calcium channel
144     Inhibition of human Cav1.3 channel in human SH-SY5Y cells assessed as 70 mM K+ induced calcium elevation compound treated 15 mins before stimulus by Fluo-4/AM assay
810                                                                                                   Displacement of [3H]nitrendipine from calcium channel receptors (CCRs)
811                                                                                                             Inhibitory constant against calcium channel receptors (CCRs)
813                                                                                                           Displacement of [3H]nitrendipine from calcium channel receptor
469                                           Antagonist activity at rabbit voltage-dependent L-type calcium channel Cav1.2alpha-1C exp

In [8]:
# Overlaps...

[(x, y, n) for x, y, n in ((x, y, len(set(hits[x]).intersection(set(hits[y])))) for x, y in combinations(terms.keys(), 2)) if n > 0]

[]

In [9]:
tissue = pd.DataFrame(list(chain.from_iterable(((y, x) for y in hits[x]) for x in hits.keys())), columns=[u'description', u'tissue']).set_index(u'description')

tissue.head()

Unnamed: 0_level_0,tissue
description,Unnamed: 1_level_1
Calcium antagonistic activity by measuring [3H]nitrendipine displacement at L-type [Ca2+] channel in rat cortex homogenate,Brain
Inhibition of [3H]nitrendipine binding at L-type [Ca2+] channel in rat cortex homogenate by 50%.,Brain
Ability to inhibit [3H]nitrendipine binding to the calcium channel receptor(CCR) in rat cerebral cortex homogenate.,Brain
Displacement of [3H]desmethoxy-verapamil from L-type calcium channel of bovine frontal cortex membranes,Brain
Displacement of [3H]nitrendipine from L-type calcium channel of bovine frontal cortex membranes.,Brain


In [10]:
description.size, tissue.shape[0]

(135, 81)

In [11]:
# Save...

tissue.to_pickle('tissues.pkl')

### Merging with data

Investigate merging the tissue assignments with the main data table.

In [12]:
data = data.merge(tissue, how='left', left_on='description', right_index=True)

data = insert_last_after(data, 'species')

data.shape

(1317, 30)

In [15]:
HTML(data.head(2).to_html())

Unnamed: 0,target_chemblid,pref_name,target_type,organism,species,tissue,relationship_type,assay_chemblid,description,assay_organism,parent_cmpd_chemblid,compound_class,standard_type,standard_relation,standard_value,standard_units,pchembl_value,activity_comment,data_validity_comment,potential_duplicate,cmpd_chemblid,compound_key,published_type,published_relation,published_value,published_units,doc_chemblid,pubmed_id,reference,active
0,CHEMBL1940,Voltage-gated L-type calcium channel alpha-1C subunit,SINGLE PROTEIN,Homo sapiens,Cat,Heart,H,CHEMBL656260,Inhibition of (-)-[3H]- D-888 binding to L-type calcium channels in kitten heart ventricle membranes,Felis catus,CHEMBL138302,PAA,IC50,=,260,nM,6.59,,,,CHEMBL138302,2d,IC50,=,0.26,uM,CHEMBL1127038,8474099,"J. Med. Chem., v. 36, p. 439 (1993)",1
1,CHEMBL1940,Voltage-gated L-type calcium channel alpha-1C subunit,SINGLE PROTEIN,Homo sapiens,Cat,Heart,H,CHEMBL656260,Inhibition of (-)-[3H]- D-888 binding to L-type calcium channels in kitten heart ventricle membranes,Felis catus,CHEMBL138302,PAA,IC50,=,390,nM,6.41,,,,CHEMBL138302,2a,IC50,=,0.39,uM,CHEMBL1127038,8474099,"J. Med. Chem., v. 36, p. 439 (1993)",1


Note that those assays for which a tissue cannot be determined end up with NaN in the `tissues` column...

In [16]:
data['tissue'].isnull().sum()

649

In [17]:
data[data['tissue'].isnull()].head(2)

Unnamed: 0,target_chemblid,pref_name,target_type,organism,species,tissue,relationship_type,assay_chemblid,description,assay_organism,parent_cmpd_chemblid,compound_class,standard_type,standard_relation,standard_value,standard_units,pchembl_value,activity_comment,data_validity_comment,potential_duplicate,cmpd_chemblid,compound_key,published_type,published_relation,published_value,published_units,doc_chemblid,pubmed_id,reference,active
336,CHEMBL2830,Voltage-gated L-type calcium channel alpha-1C subunit,SINGLE PROTEIN,Oryctolagus cuniculus,Rabbit,,D,CHEMBL653771,Reduction in amplitude of calcium-dependent slow-response action potential by blocking voltage sensitive calcium channel,Oryctolagus cuniculus,CHEMBL158853,BTZ,IC50,=,140,nM,6.85,,,,CHEMBL158853,8,IC50,=,0.14,uM,CHEMBL1123712,2435903,"J. Med. Chem., v. 30, p. 635 (1987)",1
144,CHEMBL4138,Voltage-gated L-type calcium channel alpha-1D subunit,SINGLE PROTEIN,Homo sapiens,Human,,D,CHEMBL3270056,Inhibition of human Cav1.3 channel in human SH-SY5Y cells assessed as 70 mM K+ induced calcium elevation compound treated 15 mins before stimulus by Fluo-4/AM assay,Homo sapiens,CHEMBL193,DHP,IC50,=,1350,nM,5.87,,,,CHEMBL193,nifedipine,IC50,=,1.35,uM,CHEMBL3259712,24754640,"J. Med. Chem., v. 57, p. 4313 (2014)",1


This can lead to problems on rendering or export of the data.

However, these can be fixed by replacing the NaNs with the empty string...

In [18]:
data['tissue'].fillna('', inplace=True)

In [19]:
data['tissue'].isnull().sum()

0