# Multi-target symbols

Symbols corresponding to multiple ChEMBL targets.

In [1]:
from itertools import product

In [2]:
targets = pd.read_pickle('chembl_targets.pkl')

targets.head()

Unnamed: 0,symbol,approved_name,targets,n_target,chembl_id,target_type,pref_name,species
0,ABCB1,"ATP-binding cassette, sub-family B (MDR/TAP), member 1","MDR1;7|MDR1;8|ATP-binding cassette, sub-family B (MDR/TAP), member 1;11",1,CHEMBL4302,SINGLE PROTEIN,P-glycoprotein 1,Human
1,ABCB1,"ATP-binding cassette, sub-family B (MDR/TAP), member 1","MDR1;7|MDR1;8|ATP-binding cassette, sub-family B (MDR/TAP), member 1;11",1,CHEMBL1075229,SINGLE PROTEIN,Multidrug resistance protein 1,Rat
2,ABCB11,"ATP-binding cassette, sub-family B (MDR/TAP), member 11",BSEP;6|BSEP;7|BSEP;8,1,CHEMBL6020,SINGLE PROTEIN,Bile salt export pump,Human
3,ABCB11,"ATP-binding cassette, sub-family B (MDR/TAP), member 11",BSEP;6|BSEP;7|BSEP;8,1,CHEMBL2073674,SINGLE PROTEIN,Bile salt export pump,Rat
4,ABCB4,"ATP-binding cassette, sub-family B (MDR/TAP), member 4",MDR3;7|MDR3;8,1,CHEMBL1743129,SINGLE PROTEIN,Multidrug resistance protein 3,Human


In [3]:
def f(df):
    
    record = {'symbol': df.iloc[0]['symbol']}
        
    for species in ['Human', 'Rat']:
        
        targets = df[df['species'] == species][['pref_name', 'target_type', 'chembl_id']].values
        
        record[species + '_targets'] = tuple(tuple(x) for x in targets) if targets.any() else ''
        
        record[species + '_N'] = len(record[species + '_targets'])
    
    return record

tmp = targets.groupby('symbol').apply(f)

cols = ['symbol'] + ['{}_{}'.format(x, y) for x, y in product(['Human', 'Rat'], ['targets', 'N'])]

targets = pd.DataFrame(list(tmp), columns=cols).set_index('symbol')

del tmp

In [4]:
HTML(targets.to_html())

Unnamed: 0_level_0,Human_targets,Human_N,Rat_targets,Rat_N
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABCB1,"((P-glycoprotein 1, SINGLE PROTEIN, CHEMBL4302),)",1,"((Multidrug resistance protein 1, SINGLE PROTEIN, CHEMBL1075229),)",1
ABCB11,"((Bile salt export pump, SINGLE PROTEIN, CHEMBL6020),)",1,"((Bile salt export pump, SINGLE PROTEIN, CHEMBL2073674),)",1
ABCB4,"((Multidrug resistance protein 3, SINGLE PROTEIN, CHEMBL1743129),)",1,"((Multidrug resistance protein 3, SINGLE PROTEIN, CHEMBL2073706),)",1
ABCC2,"((Canalicular multispecific organic anion transporter 1, SINGLE PROTEIN, CHEMBL5748),)",1,"((Canalicular multispecific organic anion transporter 1, SINGLE PROTEIN, CHEMBL2073676),)",1
ABCC3,"((Canalicular multispecific organic anion transporter 2, SINGLE PROTEIN, CHEMBL5918),)",1,"((Canalicular multispecific organic anion transporter 2, SINGLE PROTEIN, CHEMBL2073682),)",1
ABCC4,"((Multidrug resistance-associated protein 4, SINGLE PROTEIN, CHEMBL1743128),)",1,"((Multidrug resistance-associated protein 4, SINGLE PROTEIN, CHEMBL2073711),)",1
ABCC6,"((Multidrug resistance-associated protein 6, SINGLE PROTEIN, CHEMBL2073661),)",1,"((Multidrug resistance-associated protein 6, SINGLE PROTEIN, CHEMBL2073712),)",1
ABCG2,"((ATP-binding cassette sub-family G member 2, SINGLE PROTEIN, CHEMBL5393),)",1,,0
ABL1,"((Tyrosine-protein kinase ABL, SINGLE PROTEIN, CHEMBL1862),)",1,,0
ACE,"((Angiotensin-converting enzyme, SINGLE PROTEIN, CHEMBL1808),)",1,"((Angiotensin-converting enzyme, SINGLE PROTEIN, CHEMBL2625),)",1


In [5]:
# Rows where one or other species has more rthan one target for the symbol...

multi_target_symbols = targets.query("Human_N > 1 or Rat_N > 1")

multi_target_symbols

Unnamed: 0_level_0,Human_targets,Human_N,Rat_targets,Rat_N
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AKT1,"((AKT/p21CIP1, PROTEIN COMPLEX, CHEMBL3038463), (Serine/threonine-protein kinase AKT, SINGLE PROTEIN, CHEMBL4282))",2,"((RAC-alpha serine/threonine-protein kinase, SINGLE PROTEIN, CHEMBL1075215),)",1
CALCR,"((Amylin receptor AMY1, CALCR/RAMP1, PROTEIN COMPLEX, CHEMBL2111189), (Amylin receptor AMY2; CALCR/RAMP2, PROTEIN COMPLEX, CHEMBL2364173), (Amylin receptor AMY3; CALCR/RAMP3, PROTEIN COMPLEX, CHEMBL2111190), (Calcitonin receptor, SINGLE PROTEIN, CHEMBL1832))",4,"((Amylin receptor AMY1, CALCR/RAMP1, PROTEIN COMPLEX, CHEMBL2107837), (Amylin receptor AMY2; CALCR/RAMP2, PROTEIN COMPLEX, CHEMBL2109231), (Amylin receptor AMY3; CALCR/RAMP3, PROTEIN COMPLEX, CHEMBL2109235), (Calcitonin receptor, SINGLE PROTEIN, CHEMBL2204))",4
CALCRL,"((Adrenomedullin receptor AM1; CALCRL/RAMP2, PROTEIN COMPLEX, CHEMBL2109232), (Adrenomedullin receptor, AM2; CALCRL/RAMP3, PROTEIN COMPLEX, CHEMBL2111191), (Calcitonin-gene-related peptide receptor, CALCRL/RAMP1, PROTEIN COMPLEX, CHEMBL2107838), (Calcitonin gene-related peptide type 1 receptor, SINGLE PROTEIN, CHEMBL3798))",4,"((Adrenomedullin receptor AM1; CALCRL/RAMP2, PROTEIN COMPLEX, CHEMBL2103731), (Adrenomedullin receptor, AM2; CALCRL/RAMP3, PROTEIN COMPLEX, CHEMBL2109239), (Calcitonin-gene-related peptide receptor; CALCRL/RAMP1, PROTEIN COMPLEX, CHEMBL2109240), (Calcitonin gene-related peptide type 1 receptor, SINGLE PROTEIN, CHEMBL4755))",4
CDK2,"((CDK2/Cyclin A, PROTEIN COMPLEX, CHEMBL3038469), (CDK2/Cyclin A1, PROTEIN COMPLEX, CHEMBL3038470), (Cyclin-dependent kinase 2/cyclin A, PROTEIN COMPLEX, CHEMBL2094128), (Cyclin-dependent kinase 2/cyclin E, PROTEIN COMPLEX, CHEMBL2094126), (Cyclin-dependent kinase 2/cyclin E1, PROTEIN COMPLEX, CHEMBL1907605), (Cyclin-dependent kinase 2, SINGLE PROTEIN, CHEMBL301))",6,,0
CDK4,"((CDK4/Cyclin D3, PROTEIN COMPLEX, CHEMBL3038472), (Cyclin-dependent kinase 4/cyclin D, PROTEIN COMPLEX, CHEMBL2095942), (Cyclin-dependent kinase 4/cyclin D1, PROTEIN COMPLEX, CHEMBL1907601), (Cyclin-dependent kinase 4/cyclin D2, PROTEIN COMPLEX, CHEMBL3301385), (Cyclin-dependent kinase 4, SINGLE PROTEIN, CHEMBL331))",5,,0
CHRNA1,"((Acetylcholine receptor; alpha1/beta1/delta/gamma, PROTEIN COMPLEX, CHEMBL1907588), (Acetylcholine receptor protein alpha chain, SINGLE PROTEIN, CHEMBL4808))",2,,0
CHRNA4,"((Neuronal acetylcholine receptor; alpha4/beta2, PROTEIN COMPLEX, CHEMBL1907589), (Neuronal acetylcholine receptor; alpha4/beta4, PROTEIN COMPLEX, CHEMBL1907591), (Nicotinic acetylcholine receptor alpha4/beta2/alpha5, PROTEIN COMPLEX, CHEMBL3038461), (Neuronal acetylcholine receptor protein alpha-4 subunit, SINGLE PROTEIN, CHEMBL1882))",4,"((Neuronal acetylcholine receptor; alpha4/beta2, PROTEIN COMPLEX, CHEMBL1907596), (Neuronal acetylcholine receptor; alpha4/beta4, PROTEIN COMPLEX, CHEMBL1907595), (Neuronal acetylcholine receptor protein alpha-4 subunit, SINGLE PROTEIN, CHEMBL307))",3
DRD2,"((Dopamine receptor D2L/neurotensin receptor NTS1, PROTEIN COMPLEX, CHEMBL3038478), (Dopamine D2 receptor, SINGLE PROTEIN, CHEMBL217))",2,"((Dopamine D2 receptor, SINGLE PROTEIN, CHEMBL339),)",1
GRIN1,"((Glutamate NMDA receptor; GRIN1/GRIN2A, PROTEIN COMPLEX, CHEMBL1907604), (Glutamate NMDA receptor; GRIN1/GRIN2B, PROTEIN COMPLEX, CHEMBL1907603), (Ionotropic glutamate receptor NMDA 1/2C, PROTEIN COMPLEX, CHEMBL3038504), (Ionotropic glutamate receptor NMDA 1/2D, PROTEIN COMPLEX, CHEMBL3038505), (Glutamate (NMDA) receptor subunit zeta 1, SINGLE PROTEIN, CHEMBL2015))",5,"((Glutamate NMDA receptor; Grin1/Grin2a, PROTEIN COMPLEX, CHEMBL2096680), (Glutamate NMDA receptor; Grin1/Grin2b, PROTEIN COMPLEX, CHEMBL2096666), (Glutamate NMDA receptor; Grin1/Grin2c, PROTEIN COMPLEX, CHEMBL2096911), (Ionotropic glutamate receptor NMDA 1/2D, PROTEIN COMPLEX, CHEMBL3038506), (Glutamate (NMDA) receptor subunit zeta 1, SINGLE PROTEIN, CHEMBL330))",5
JAK2,"((JAK1/JAK2/TYK2, PROTEIN COMPLEX, CHEMBL3301390), (JAK2/JAK1, PROTEIN COMPLEX, CHEMBL3038492), (JAK2/TYK2, PROTEIN COMPLEX, CHEMBL3301392), (Tyrosine-protein kinase JAK2, SINGLE PROTEIN, CHEMBL2971))",4,"((Tyrosine-protein kinase JAK2, SINGLE PROTEIN, CHEMBL1075225),)",1


In [6]:
# multi_target_symbols.to_pickle('multi_target_symbols.pkl')

In [7]:
for species in ['Human', 'Rat']:
    
    print("Species: {}\n".format(species))
    
    for n, (symbol, record) in enumerate(multi_target_symbols[[species + '_targets', species + '_N']].iterrows(), 1):
        
        if record[species + '_N'] < 2: continue

        print("{:2d}) {}".format(n, symbol))

        targets = record[species + '_targets']

        for m, (pref_name, target_type, target_chemblid) in enumerate(targets, 1):

            print("  {:2d}) {:75s} {:20s} {:12s}".format(m, pref_name, target_type, target_chemblid))

        print('-' * 100)
        
    print('=' * 200)

Species: Human

 1) AKT1
   1) AKT/p21CIP1                                                                 PROTEIN COMPLEX      CHEMBL3038463
   2) Serine/threonine-protein kinase AKT                                         SINGLE PROTEIN       CHEMBL4282  
----------------------------------------------------------------------------------------------------
 2) CALCR
   1) Amylin receptor AMY1, CALCR/RAMP1                                           PROTEIN COMPLEX      CHEMBL2111189
   2) Amylin receptor AMY2; CALCR/RAMP2                                           PROTEIN COMPLEX      CHEMBL2364173
   3) Amylin receptor AMY3; CALCR/RAMP3                                           PROTEIN COMPLEX      CHEMBL2111190
   4) Calcitonin receptor                                                         SINGLE PROTEIN       CHEMBL1832  
----------------------------------------------------------------------------------------------------
 3) CALCRL
   1) Adrenomedullin receptor AM1; CALCRL/RAMP2     