In [None]:
"""Combine Schulz et al. plasma concentration dataset with Cmax from ChEMBL, convert various plasma concentration units, take medians per compound and save for further analysis"""

In [2]:
import pandas as pd
import numpy as np
import datetime
import seaborn as sns

In [3]:
pd.set_option('display.max_rows',2000)
pd.set_option('display.max_colwidth', 200)

In [4]:
basedir = '/scratch/ias41/ae_code/plasma_concentrations'

In [5]:
plasma = pd.read_csv(basedir + '/results/interim/Schulz_ea_mapped_upper_plasma_concentrations.txt', sep='\t')
plasma['Type'] = 'Normal/therapeutic blood-plasma concentration'
plasma.head()

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,"Blood-plasma concentration, therapeutic (normal) upper limit",Unit,Type
0,ABACAVIR,ABACAVIR,CHEMBL1380,321707,286.34,3.9,mg/L,Normal/therapeutic blood-plasma concentration
1,ACAMPROSATE,ACAMPROSATE,CHEMBL1201293,675244,181.21,0.7,mg/L,Normal/therapeutic blood-plasma concentration
2,ACEBUTOLOL,ACEBUTOLOL,CHEMBL642,27347,336.43,2.0,mg/L,Normal/therapeutic blood-plasma concentration
3,ACENOCOUMAROL,ACENOCOUMAROL,CHEMBL397420,394206,353.33,0.5,mg/L,Normal/therapeutic blood-plasma concentration
4,PARACETAMOL,ACETAMINOPHEN,CHEMBL112,16450,151.16,25.0,mg/L,Normal/therapeutic blood-plasma concentration


In [11]:
plasma_select = plasma.copy()
plasma_select.columns = ['Original source Substance','pref_name', 'chembl_id', 'molregno','mw_freebase', 'value', 'source_unit', 'type']

In [12]:
plasma_select.head()

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type
0,ABACAVIR,ABACAVIR,CHEMBL1380,321707,286.34,3.9,mg/L,Normal/therapeutic blood-plasma concentration
1,ACAMPROSATE,ACAMPROSATE,CHEMBL1201293,675244,181.21,0.7,mg/L,Normal/therapeutic blood-plasma concentration
2,ACEBUTOLOL,ACEBUTOLOL,CHEMBL642,27347,336.43,2.0,mg/L,Normal/therapeutic blood-plasma concentration
3,ACENOCOUMAROL,ACENOCOUMAROL,CHEMBL397420,394206,353.33,0.5,mg/L,Normal/therapeutic blood-plasma concentration
4,PARACETAMOL,ACETAMINOPHEN,CHEMBL112,16450,151.16,25.0,mg/L,Normal/therapeutic blood-plasma concentration


In [13]:
cmax_initial = pd.read_csv(basedir + '/data/chembl_cmax_approved.txt', sep='\t')
cmax = cmax_initial.loc[(cmax_initial['assay_organism']=='Homo sapiens')&(cmax_initial['assay_tissue'].isin(['Plasma', 'Blood', 'Serum',np.nan]))&(cmax_initial['assay_cell_type'].isnull())&(cmax_initial['data_validity_comment']!='Outside typical range'),:]
cmax.head()

Unnamed: 0,activity_id,parent_molregno,parent_chembl_id,mw_freebase,parent_pref_name,version_molregno,version_chembl_id,version_pref_name,published_value,published_units,...,data_validity_comment,activity_comment,description,assay_organism,assay_tissue,assay_cell_type,assay_chembl_id,src_description,pubmed_id,title
46,765248,17224,CHEMBL584,567.8,NELFINAVIR,213505,CHEMBL1205,NELFINAVIR MESYLATE,313.0,ug ml-1,...,,,The maximum plasma concentration (100 mg/kg) administered orally in human,Homo sapiens,Plasma,,CHEMBL626235,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease."
47,765249,17224,CHEMBL584,567.8,NELFINAVIR,213505,CHEMBL1205,NELFINAVIR MESYLATE,439.0,ug ml-1,...,,,The maximum plasma concentration (200 mg/kg) administered orally in human,Homo sapiens,Plasma,,CHEMBL622412,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease."
48,765250,17224,CHEMBL584,567.8,NELFINAVIR,213505,CHEMBL1205,NELFINAVIR MESYLATE,1577.0,ug ml-1,...,,,The maximum plasma concentration (400 mg/kg) administered orally in human,Homo sapiens,Plasma,,CHEMBL623118,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease."
49,765251,17224,CHEMBL584,567.8,NELFINAVIR,213505,CHEMBL1205,NELFINAVIR MESYLATE,3163.0,ug ml-1,...,,,The maximum plasma concentration (800 mg/kg) administered orally in human,Homo sapiens,Plasma,,CHEMBL623286,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease."
119,1828306,143284,CHEMBL1082,365.41,AMOXICILLIN,143284,CHEMBL1082,AMOXICILLIN,19.0,ug ml-1,...,,,"Cmax in human at 1 g, po after 0.5 hrs",Homo sapiens,,,CHEMBL855217,Scientific Literature,17060515.0,Amoxicillin is effective against penicillin-resistant Streptococcus pneumoniae strains in a mouse pneumonia model simulating human pharmacokinetics.


In [14]:
cmax['ines_comment0'] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [15]:
# By inspecting drugs with more than 1 log unit SD, I noticed a mistakes in various assays so correcting that here (emailed ChEMBL about these)
cmax.loc[cmax['assay_chembl_id']=='CHEMBL3889380',['standard_value', 'ines_comment0']] = 4.2, 'corrected from ChEMBL'
cmax.loc[cmax['assay_chembl_id']=='CHEMBL3889380',['published_units', 'ines_comment0']] = 'ug/mL', 'corrected from ChEMBL'
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1656401',['published_value', 'ines_comment0']] = 1185, 'corrected from ChEMBL'
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1656401',['standard_value', 'ines_comment0']] = 1.185, 'corrected from ChEMBL'

# In these assays the units should have been per mg of dose. So I'm multiplying by the dose so that we can still use the values.
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681301',['published_value', 'ines_comment0']] = 200*cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681301','published_value'], 'corrected from ChEMBL'
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681301',['standard_value', 'ines_comment0']] = 200*cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681301','standard_value'], 'corrected from ChEMBL'

cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681307',['published_value', 'ines_comment0']] = 300*cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681307','published_value'], 'corrected from ChEMBL'
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681307',['standard_value', 'ines_comment0']] = 300*cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681307','standard_value'], 'corrected from ChEMBL'

# Highest dose in this assay was 300 (n=4 in total, 2 had 200, 2 had 300, this is average of all)
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681295',['published_value', 'ines_comment0']] = 300*cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681295','published_value'], 'corrected from ChEMBL'
cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681295',['standard_value', 'ines_comment0']] = 300*cmax.loc[cmax['assay_chembl_id']=='CHEMBL1681295','standard_value'], 'corrected from ChEMBL'

# Assay CHEMBL1681318 is redundant (replication of previous value)
cmax = cmax[cmax.assay_chembl_id != 'CHEMBL1681318']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [16]:
cmax.loc[~cmax['ines_comment0'].isnull()]

Unnamed: 0,activity_id,parent_molregno,parent_chembl_id,mw_freebase,parent_pref_name,version_molregno,version_chembl_id,version_pref_name,published_value,published_units,...,activity_comment,description,assay_organism,assay_tissue,assay_cell_type,assay_chembl_id,src_description,pubmed_id,title,ines_comment0
688,5158104,29097,CHEMBL682,355.87,AMODIAQUINE,476314,CHEMBL1630,AMODIAQUINE HYDROCHLORIDE,1185.0,ng/ml,...,,"Cmax in children with uncomplicated malaria assessed as desethylamodiaquine at 10 mg/kg, po administered as single dose",Homo sapiens,,,CHEMBL1656401,Scientific Literature,18779360.0,Effect of concomitant artesunate administration and cytochrome P4502C8 polymorphisms on the pharmacokinetics of amodiaquine in Ghanaian children with uncomplicated malaria.,corrected from ChEMBL
829,5228330,27307,CHEMBL129,267.25,ZIDOVUDINE,27307,CHEMBL129,ZIDOVUDINE,2274.0,ng/ml,...,,Cmax in HIV-1 infected patient on day 10 by LC-MS/MS analysis,Homo sapiens,,,CHEMBL1681295,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,corrected from ChEMBL
830,5228336,27307,CHEMBL129,267.25,ZIDOVUDINE,27307,CHEMBL129,ZIDOVUDINE,868.0,ng/ml,...,,"Cmax in HIV-1 infected patient at 200 mg, po bid on day 10 coadministered with 500 mg, po bid amdoxovir by LC-MS/MS analysis",Homo sapiens,,,CHEMBL1681301,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,corrected from ChEMBL
831,5228342,27307,CHEMBL129,267.25,ZIDOVUDINE,27307,CHEMBL129,ZIDOVUDINE,2022.0,ng/ml,...,,"Cmax in HIV-1 infected patient at 300 mg, po bid on day 10 coadministered with 500 mg, po bid amdoxovir by LC-MS/MS analysis",Homo sapiens,,,CHEMBL1681307,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,corrected from ChEMBL
1559,17805888,16450,CHEMBL112,151.16,ACETAMINOPHEN,16450,CHEMBL112,ACETAMINOPHEN,4.2,ug/mL,...,,Evalaution of pharmacokinetic parameters of Acetaminophen following single oral co-administration of 325 mg Acetaminophen tablets with 37.5 mg Tramadol.,Homo sapiens,,,CHEMBL3889380,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=39945812-f776-4b13-8199-e846e37aa101 | TRAMADOL HYDROCHLORIDE AND ACETAMINOPHEN - tramadol hydrochloride and acetaminophen tablet, film coated|Cadila Healthcare Limited",corrected from ChEMBL


In [17]:
cmax.columns

Index(['activity_id', 'parent_molregno', 'parent_chembl_id', 'mw_freebase',
       'parent_pref_name', 'version_molregno', 'version_chembl_id',
       'version_pref_name', 'published_value', 'published_units',
       'standard_type', 'standard_value', 'standard_upper_value',
       'standard_units', 'standard_text_value', 'data_validity_comment',
       'activity_comment', 'description', 'assay_organism', 'assay_tissue',
       'assay_cell_type', 'assay_chembl_id', 'src_description', 'pubmed_id',
       'title', 'ines_comment0'],
      dtype='object')

In [18]:
# Select and rename columns
cmax_select = cmax[
    ['activity_id'
     , 'parent_pref_name'
     , 'parent_chembl_id'
     , 'parent_molregno'
     , 'mw_freebase'
     , 'published_value'
     , 'published_units'
     , 'standard_value'
     , 'standard_units'
     , 'standard_type'
     , 'activity_comment'
     , 'assay_chembl_id'
     , 'src_description'
     , 'pubmed_id'
     , 'title'
     , 'description'
    , 'ines_comment0']].drop_duplicates()
cmax_select.columns = [
    'activity_id'
    , 'pref_name'
    , 'chembl_id'
    , 'molregno'
    , 'mw_freebase'
    , 'published_value'
    , 'published_units'
    , 'value'
    , 'source_unit'
    , 'type'
    , 'activity_comment'
    , 'assay_chembl_id'
    , 'src_description'
    , 'pubmed_id'
    , 'title'
    , 'description'
    , 'ines_comment0']

In [19]:
cmax_select.head()

Unnamed: 0,activity_id,pref_name,chembl_id,molregno,mw_freebase,published_value,published_units,value,source_unit,type,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,ines_comment0
46,765248,NELFINAVIR,CHEMBL584,17224,567.8,313.0,ug ml-1,313.0,ug.mL-1,Cmax,,CHEMBL626235,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease.",The maximum plasma concentration (100 mg/kg) administered orally in human,
47,765249,NELFINAVIR,CHEMBL584,17224,567.8,439.0,ug ml-1,439.0,ug.mL-1,Cmax,,CHEMBL622412,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease.",The maximum plasma concentration (200 mg/kg) administered orally in human,
48,765250,NELFINAVIR,CHEMBL584,17224,567.8,1577.0,ug ml-1,1577.0,ug.mL-1,Cmax,,CHEMBL623118,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease.",The maximum plasma concentration (400 mg/kg) administered orally in human,
49,765251,NELFINAVIR,CHEMBL584,17224,567.8,3163.0,ug ml-1,3163.0,ug.mL-1,Cmax,,CHEMBL623286,Scientific Literature,9397180.0,"Viracept (nelfinavir mesylate, AG1343): a potent, orally bioavailable inhibitor of HIV-1 protease.",The maximum plasma concentration (800 mg/kg) administered orally in human,
119,1828306,AMOXICILLIN,CHEMBL1082,143284,365.41,19.0,ug ml-1,19.0,ug.mL-1,Cmax,,CHEMBL855217,Scientific Literature,17060515.0,Amoxicillin is effective against penicillin-resistant Streptococcus pneumoniae strains in a mouse pneumonia model simulating human pharmacokinetics.,"Cmax in human at 1 g, po after 0.5 hrs",


In [22]:
combined = pd.concat([plasma_select, cmax_select], sort=False, ignore_index=True)

In [23]:
combined[['type', 'source_unit']].drop_duplicates()

Unnamed: 0,type,source_unit
0,Normal/therapeutic blood-plasma concentration,mg/L
180,Normal/therapeutic blood-plasma concentration,umol/L
681,Cmax,ug.mL-1
700,Cmax,nM


In [24]:
def mgperL_to_M(x, mw):
    in_gperL = x * (10**-3)
    in_M = in_gperL/mw
    return in_M

In [25]:
def umolperL_to_M(x):
    in_M = x * (10**-6)
    return in_M

In [26]:
def nM_to_M(x):
    return (x * (10**-9))

In [27]:
combined.head()

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_id,published_value,published_units,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,ines_comment0
0,ABACAVIR,ABACAVIR,CHEMBL1380,321707,286.34,3.9,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,
1,ACAMPROSATE,ACAMPROSATE,CHEMBL1201293,675244,181.21,0.7,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,
2,ACEBUTOLOL,ACEBUTOLOL,CHEMBL642,27347,336.43,2.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,
3,ACENOCOUMAROL,ACENOCOUMAROL,CHEMBL397420,394206,353.33,0.5,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,
4,PARACETAMOL,ACETAMINOPHEN,CHEMBL112,16450,151.16,25.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,


In [29]:
def row_convert(x):
    if x['source_unit'] in ['mg/L','ug.mL-1']:
        return(mgperL_to_M(x["value"], x["mw_freebase"]))
    elif x["source_unit"] == "umol/L":
        return(umolperL_to_M(x["value"]))
    elif x["source_unit"] == "nM":
        return(nM_to_M(x["value"]))
    else:
        print("ERROR HAS OCCURED")

In [30]:
combined['Molar_value'] = combined.apply(row_convert, axis =1)

In [31]:
combined['pMolar_value'] = combined['Molar_value'].apply(lambda x: -np.log10(x))

In [32]:
combined.head()

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_id,published_value,published_units,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,ines_comment0,Molar_value,pMolar_value
0,ABACAVIR,ABACAVIR,CHEMBL1380,321707,286.34,3.9,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,,1.4e-05,4.865817
1,ACAMPROSATE,ACAMPROSATE,CHEMBL1201293,675244,181.21,0.7,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,,4e-06,5.413084
2,ACEBUTOLOL,ACEBUTOLOL,CHEMBL642,27347,336.43,2.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,,6e-06,5.225865
3,ACENOCOUMAROL,ACENOCOUMAROL,CHEMBL397420,394206,353.33,0.5,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,,1e-06,5.849211
4,PARACETAMOL,ACETAMINOPHEN,CHEMBL112,16450,151.16,25.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,,,,,0.000165,3.781497


In [34]:
combined.tail()

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_id,published_value,published_units,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,ines_comment0,Molar_value,pMolar_value
1581,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,26.2,ug.mL-1,Cmax,17806486.0,26.2,mcg/mL,,CHEMBL3889455,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=d7fbf301-0b08-4b19-bfa0-e0c0067cb5ab | MYCOPHENOLIC ACID - mycophenolic acid tablet, delayed release|Mylan Instituinal Inc.","Mean pharmacokinetic parameters of Mycophenolic acid following multiple dose administration of 720 mg Mycophenolic acid delayed-release tablets to adult renal transplant patients on Cyclosporine, ...",,8.2e-05,4.08731
1582,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,24.1,ug.mL-1,Cmax,17806489.0,24.1,mcg/mL,,CHEMBL3889455,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=d7fbf301-0b08-4b19-bfa0-e0c0067cb5ab | MYCOPHENOLIC ACID - mycophenolic acid tablet, delayed release|Mylan Instituinal Inc.","Mean pharmacokinetic parameters of Mycophenolic acid following multiple dose administration of 720 mg Mycophenolic acid delayed-release tablets to adult renal transplant patients on Cyclosporine, ...",,7.5e-05,4.123594
1583,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,18.9,ug.mL-1,Cmax,17806492.0,18.9,mcg/mL,,CHEMBL3889455,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=d7fbf301-0b08-4b19-bfa0-e0c0067cb5ab | MYCOPHENOLIC ACID - mycophenolic acid tablet, delayed release|Mylan Instituinal Inc.","Mean pharmacokinetic parameters of Mycophenolic acid following multiple dose administration of 720 mg Mycophenolic acid delayed-release tablets to adult renal transplant patients on Cyclosporine, ...",,5.9e-05,4.229149
1584,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,31.2,ug.mL-1,Cmax,17806495.0,31.2,mcg/mL,,CHEMBL3889455,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=d7fbf301-0b08-4b19-bfa0-e0c0067cb5ab | MYCOPHENOLIC ACID - mycophenolic acid tablet, delayed release|Mylan Instituinal Inc.","Mean pharmacokinetic parameters of Mycophenolic acid following multiple dose administration of 720 mg Mycophenolic acid delayed-release tablets to adult renal transplant patients on Cyclosporine, ...",,9.7e-05,4.011457
1585,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,15.0,ug.mL-1,Cmax,17806499.0,15.0,mcg/mL,Two Weeks Post-Transplant Administration.,CHEMBL3889455,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=d7fbf301-0b08-4b19-bfa0-e0c0067cb5ab | MYCOPHENOLIC ACID - mycophenolic acid tablet, delayed release|Mylan Instituinal Inc.","Mean pharmacokinetic parameters of Mycophenolic acid following multiple dose administration of 720 mg Mycophenolic acid delayed-release tablets to adult renal transplant patients on Cyclosporine, ...",,4.7e-05,4.32952


In [35]:
# Insert PMID for other source
def insert_pmid(x):
    if x['type'] == 'Normal/therapeutic blood-plasma concentration':
        return 22835221
    else:
        return x['pubmed_id']
def insert_title(x):
    if x['type'] == 'Normal/therapeutic blood-plasma concentration':
        return 'Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics'
    else:
        return x['title']

In [36]:
combined['pubmed_id'] = combined.apply(insert_pmid,axis=1)
combined['title'] = combined.apply(insert_title, axis=1)

In [37]:
combined.head()

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_id,published_value,published_units,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,ines_comment0,Molar_value,pMolar_value
0,ABACAVIR,ABACAVIR,CHEMBL1380,321707,286.34,3.9,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,,1.4e-05,4.865817
1,ACAMPROSATE,ACAMPROSATE,CHEMBL1201293,675244,181.21,0.7,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,,4e-06,5.413084
2,ACEBUTOLOL,ACEBUTOLOL,CHEMBL642,27347,336.43,2.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,,6e-06,5.225865
3,ACENOCOUMAROL,ACENOCOUMAROL,CHEMBL397420,394206,353.33,0.5,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,,1e-06,5.849211
4,PARACETAMOL,ACETAMINOPHEN,CHEMBL112,16450,151.16,25.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,,0.000165,3.781497


#### Inspect activity comments

In [38]:
combined.loc[(~combined['activity_comment'].isnull()) & combined['activity_comment'].str.contains('Dose', regex=False),:]

Unnamed: 0,Original source Substance,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_id,published_value,published_units,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,ines_comment0,Molar_value,pMolar_value
1435,,ISOTRETINOIN,CHEMBL547,13928,300.44,0.5733,ug.mL-1,Cmax,17805724.0,573.25,ng/mL,Dose-Normalised,CHEMBL3889357,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=517810a4-76bb-4739-893f-1f2ba94a5d0c | ZENATANE -isotretinoin capsule, gelatine coated",Mean pharmacokinetics parameters for Isotretinoin following single oral administration of Isotretinoin to healthy subjects.,,1.908201e-06,5.719376
1436,,ISOTRETINOIN,CHEMBL547,13928,300.44,0.732,ug.mL-1,Cmax,17805729.0,731.98,ng/mL,Dose-Normalised,CHEMBL3889358,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=517810a4-76bb-4739-893f-1f2ba94a5d0c | ZENATANE -isotretinoin capsule, gelatine coated","Steady-state pharmacokinetic parameters of Isotretinoin following multiple oral administration of Isotretinoin to 38 pediatric patients, age ramge (0 to 12 years), after a non-standardised meal.",,2.436427e-06,5.613247
1526,,OMEPRAZOLE,CHEMBL1503,419601,345.42,0.288,ug.mL-1,Cmax,17806280.0,288.0,ng/mL,Plasma Concentration Adjusted To An Oral Dose Of 1Mg/Kg,CHEMBL3889418,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=93ec1327-22b1-4a5d-a793-e014f6dffd33 | OMEPRAZOLE - omeprazole capsule, delayed release|STAT Rx USA LLC",Pharmacokinetic parameters of Omeprazole following single oral administration in pediatric population compared with adult subjects.,,8.337676e-07,6.078955
1527,,OMEPRAZOLE,CHEMBL1503,419601,345.42,0.495,ug.mL-1,Cmax,17806282.0,495.0,ng/mL,Plasma Concentration Adjusted To An Oral Dose Of 1Mg/Kg,CHEMBL3889418,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=93ec1327-22b1-4a5d-a793-e014f6dffd33 | OMEPRAZOLE - omeprazole capsule, delayed release|STAT Rx USA LLC",Pharmacokinetic parameters of Omeprazole following single oral administration in pediatric population compared with adult subjects.,,1.433038e-06,5.843742
1528,,OMEPRAZOLE,CHEMBL1503,419601,345.42,0.668,ug.mL-1,Cmax,17806284.0,668.0,ng/mL,Plasma Concentration Adjusted To An Oral Dose Of 1Mg/Kg,CHEMBL3889418,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=93ec1327-22b1-4a5d-a793-e014f6dffd33 | OMEPRAZOLE - omeprazole capsule, delayed release|STAT Rx USA LLC",Pharmacokinetic parameters of Omeprazole following single oral administration in pediatric population compared with adult subjects.,,1.933878e-06,5.713571
1529,,OMEPRAZOLE,CHEMBL1503,419601,345.42,1.458,ug.mL-1,Cmax,17806285.0,1458.0,ng/mL,Plasma Concentration Adjusted To An Oral Dose Of 1Mg/Kg,CHEMBL3889420,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=93ec1327-22b1-4a5d-a793-e014f6dffd33 | OMEPRAZOLE - omeprazole capsule, delayed release|STAT Rx USA LLC",Pharmacokinetic parameters for Omeprazole measured after repeated dose administration of 20 mg of Omeprazole to adults of mean body weight 76 kg and children > 20 kg.,,4.220948e-06,5.37459
1530,,OMEPRAZOLE,CHEMBL1503,419601,345.42,0.539,ug.mL-1,Cmax,17806287.0,539.0,ng/mL,Plasma Concentration Adjusted To An Oral Dose Of 1Mg/Kg,CHEMBL3889420,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=93ec1327-22b1-4a5d-a793-e014f6dffd33 | OMEPRAZOLE - omeprazole capsule, delayed release|STAT Rx USA LLC",Pharmacokinetic parameters for Omeprazole measured after repeated dose administration of 20 mg of Omeprazole to adults of mean body weight 76 kg and children > 20 kg.,,1.560419e-06,5.806759
1531,,OMEPRAZOLE,CHEMBL1503,419601,345.42,0.851,ug.mL-1,Cmax,17806290.0,851.0,ng/mL,Plasma Concentration Adjusted To An Oral Dose Of 1Mg/Kg,CHEMBL3889420,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=93ec1327-22b1-4a5d-a793-e014f6dffd33 | OMEPRAZOLE - omeprazole capsule, delayed release|STAT Rx USA LLC",Pharmacokinetic parameters for Omeprazole measured after repeated dose administration of 20 mg of Omeprazole to adults of mean body weight 76 kg and children > 20 kg.,,2.463667e-06,5.608418
1566,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,23.8,ug.mL-1,Cmax,17806435.0,23.8,mcg/mL,Dose Adjusted To 600 Mg/M2,CHEMBL3889390,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=56428c8f-8a4b-4cbf-93f1-28d8b5cfb51a | MYCOPHENOLATE MOFETIL- mycophenolate mofetil capsule|MYCOPHENOLATE MOFETIL- mycophenolate mofetil tablet, film coated | Cadila Healthcare Limited","Pharmacokinetic parameters for Mycophenolic acid following multiple administration of Mycophenolate Mofetil to hepatic, cardiac and renal transplant patients, at least 5 days after transplant.",,7.429606e-05,4.129034
1567,,MYCOPHENOLIC ACID,CHEMBL866,68396,320.34,25.6,ug.mL-1,Cmax,17806438.0,25.6,mcg/mL,Dose Adjusted To 600 Mg/M2,CHEMBL3889390,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=56428c8f-8a4b-4cbf-93f1-28d8b5cfb51a | MYCOPHENOLATE MOFETIL- mycophenolate mofetil capsule|MYCOPHENOLATE MOFETIL- mycophenolate mofetil tablet, film coated | Cadila Healthcare Limited","Pharmacokinetic parameters for Mycophenolic acid following multiple administration of Mycophenolate Mofetil to hepatic, cardiac and renal transplant patients, at least 5 days after transplant.",,7.991509e-05,4.097371


- Dose Adjusted To 600 Mg/M2 - these are in babies but sufficient other data available so don't need to change anything, it's about the dose given
- mychophenolic acid, dose is adjusted, not concentration normalised, no need to change anything
- CHEMBL547, even though it says dose-normalised in the comment and text of original source, the table does not indicate dose-normatlise, and the values are similar to other earlier table that is not dose-normalised, so I would guess this is not actually dose-normalised

#### Make a selection of columns

In [39]:
slim_columns = ['Original source Substance', 'activity_id','pref_name', 'chembl_id','molregno', 'mw_freebase', 'value', 'source_unit', 'type', 'activity_comment', 'assay_chembl_id', 'src_description', 'pubmed_id', 'title', 'description', 'Molar_value', 'pMolar_value', 'ines_comment0']

In [40]:
combined_slim = combined[slim_columns]

#### Add notes about metabolites

In [41]:
combined_slim['ines_comment1'] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [42]:
combined_slim.loc[(combined_slim['chembl_id']=='CHEMBL1068')&(combined_slim['pubmed_id']==22835221),'ines_comment1'] = 'This is the Cmax for the main metabolite'

In [43]:
combined_slim.loc[(combined_slim['chembl_id']=='CHEMBL682')&(combined_slim['pubmed_id']==22835221),'ines_comment1'] = 'This is the Cmax for the main metabolite'

In [44]:
combined_slim.loc[combined_slim['assay_chembl_id']=='CHEMBL1220048','ines_comment1'] = 'From looking at original source this also looks like Cmax of main metabolite'

In [45]:
combined_slim.loc[(~combined_slim['ines_comment1'].isnull()|(~combined_slim['ines_comment0'].isnull())),:]

Unnamed: 0,Original source Substance,activity_id,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,Molar_value,pMolar_value,ines_comment0,ines_comment1
33,AMODIAQUINE,,AMODIAQUINE,CHEMBL682,29097,355.87,0.05,mg/L,Normal/therapeutic blood-plasma concentration,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,1.405007e-07,6.852321,,This is the Cmax for the main metabolite
469,OXCARBAZEPINE,,OXCARBAZEPINE,CHEMBL1068,139014,252.27,35.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,0.0001387402,3.857798,,This is the Cmax for the main metabolite
813,,3385221.0,AMODIAQUINE,CHEMBL682,29097,355.87,0.235,ug.mL-1,Cmax,,CHEMBL1220048,Scientific Literature,19841149.0,"Pharmacokinetics of artemether-lumefantrine and artesunate-amodiaquine in children in Kampala, Uganda.","Cmax in Ugandan children patient with uncomplicated malaria assessed as desethylamodiaquine level at 10 mg/kg, perorally qd on first 2 days and then 5 mg/kg, perorally on third day co-administered...",6.603535e-07,6.180224,,From looking at original source this also looks like Cmax of main metabolite
969,,5158104.0,AMODIAQUINE,CHEMBL682,29097,355.87,1.185,ug.mL-1,Cmax,,CHEMBL1656401,Scientific Literature,18779360.0,Effect of concomitant artesunate administration and cytochrome P4502C8 polymorphisms on the pharmacokinetics of amodiaquine in Ghanaian children with uncomplicated malaria.,"Cmax in children with uncomplicated malaria assessed as desethylamodiaquine at 10 mg/kg, po administered as single dose",3.329868e-06,5.477573,corrected from ChEMBL,
1071,,5228330.0,ZIDOVUDINE,CHEMBL129,27307,267.25,2.274,ug.mL-1,Cmax,,CHEMBL1681295,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,Cmax in HIV-1 infected patient on day 10 by LC-MS/MS analysis,8.508887e-06,5.070127,corrected from ChEMBL,
1072,,5228336.0,ZIDOVUDINE,CHEMBL129,27307,267.25,0.868,ug.mL-1,Cmax,,CHEMBL1681301,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,"Cmax in HIV-1 infected patient at 200 mg, po bid on day 10 coadministered with 500 mg, po bid amdoxovir by LC-MS/MS analysis",3.247895e-06,5.488398,corrected from ChEMBL,
1073,,5228342.0,ZIDOVUDINE,CHEMBL129,27307,267.25,2.022,ug.mL-1,Cmax,,CHEMBL1681307,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,"Cmax in HIV-1 infected patient at 300 mg, po bid on day 10 coadministered with 500 mg, po bid amdoxovir by LC-MS/MS analysis",7.565949e-06,5.121137,corrected from ChEMBL,
1458,,17805888.0,ACETAMINOPHEN,CHEMBL112,16450,151.16,4.2,ug.mL-1,Cmax,,CHEMBL3889380,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=39945812-f776-4b13-8199-e846e37aa101 | TRAMADOL HYDROCHLORIDE AND ACETAMINOPHEN - tramadol hydrochloride and acetaminophen tablet, film coated|Cadila Healthcare Limited",Evalaution of pharmacokinetic parameters of Acetaminophen following single oral co-administration of 325 mg Acetaminophen tablets with 37.5 mg Tramadol.,2.778513e-05,4.556188,corrected from ChEMBL,


In [46]:
# Combine comments columns
combined_slim['comments'] = combined_slim['ines_comment0']
combined_slim['comments'].fillna(combined_slim['ines_comment1'], inplace=True)

In [47]:
combined_slim.loc[~combined_slim['comments'].isnull()]

Unnamed: 0,Original source Substance,activity_id,pref_name,chembl_id,molregno,mw_freebase,value,source_unit,type,activity_comment,assay_chembl_id,src_description,pubmed_id,title,description,Molar_value,pMolar_value,ines_comment0,ines_comment1,comments
33,AMODIAQUINE,,AMODIAQUINE,CHEMBL682,29097,355.87,0.05,mg/L,Normal/therapeutic blood-plasma concentration,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,1.405007e-07,6.852321,,This is the Cmax for the main metabolite,This is the Cmax for the main metabolite
469,OXCARBAZEPINE,,OXCARBAZEPINE,CHEMBL1068,139014,252.27,35.0,mg/L,Normal/therapeutic blood-plasma concentration,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics",,0.0001387402,3.857798,,This is the Cmax for the main metabolite,This is the Cmax for the main metabolite
813,,3385221.0,AMODIAQUINE,CHEMBL682,29097,355.87,0.235,ug.mL-1,Cmax,,CHEMBL1220048,Scientific Literature,19841149.0,"Pharmacokinetics of artemether-lumefantrine and artesunate-amodiaquine in children in Kampala, Uganda.","Cmax in Ugandan children patient with uncomplicated malaria assessed as desethylamodiaquine level at 10 mg/kg, perorally qd on first 2 days and then 5 mg/kg, perorally on third day co-administered...",6.603535e-07,6.180224,,From looking at original source this also looks like Cmax of main metabolite,From looking at original source this also looks like Cmax of main metabolite
969,,5158104.0,AMODIAQUINE,CHEMBL682,29097,355.87,1.185,ug.mL-1,Cmax,,CHEMBL1656401,Scientific Literature,18779360.0,Effect of concomitant artesunate administration and cytochrome P4502C8 polymorphisms on the pharmacokinetics of amodiaquine in Ghanaian children with uncomplicated malaria.,"Cmax in children with uncomplicated malaria assessed as desethylamodiaquine at 10 mg/kg, po administered as single dose",3.329868e-06,5.477573,corrected from ChEMBL,,corrected from ChEMBL
1071,,5228330.0,ZIDOVUDINE,CHEMBL129,27307,267.25,2.274,ug.mL-1,Cmax,,CHEMBL1681295,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,Cmax in HIV-1 infected patient on day 10 by LC-MS/MS analysis,8.508887e-06,5.070127,corrected from ChEMBL,,corrected from ChEMBL
1072,,5228336.0,ZIDOVUDINE,CHEMBL129,27307,267.25,0.868,ug.mL-1,Cmax,,CHEMBL1681301,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,"Cmax in HIV-1 infected patient at 200 mg, po bid on day 10 coadministered with 500 mg, po bid amdoxovir by LC-MS/MS analysis",3.247895e-06,5.488398,corrected from ChEMBL,,corrected from ChEMBL
1073,,5228342.0,ZIDOVUDINE,CHEMBL129,27307,267.25,2.022,ug.mL-1,Cmax,,CHEMBL1681307,Scientific Literature,20038617.0,Lack of pharmacokinetic interaction between amdoxovir and reduced- and standard-dose zidovudine in HIV-1-infected individuals.,"Cmax in HIV-1 infected patient at 300 mg, po bid on day 10 coadministered with 500 mg, po bid amdoxovir by LC-MS/MS analysis",7.565949e-06,5.121137,corrected from ChEMBL,,corrected from ChEMBL
1458,,17805888.0,ACETAMINOPHEN,CHEMBL112,16450,151.16,4.2,ug.mL-1,Cmax,,CHEMBL3889380,Curated Drug Pharmacokinetic Data,,"DailyMed|setid=39945812-f776-4b13-8199-e846e37aa101 | TRAMADOL HYDROCHLORIDE AND ACETAMINOPHEN - tramadol hydrochloride and acetaminophen tablet, film coated|Cadila Healthcare Limited",Evalaution of pharmacokinetic parameters of Acetaminophen following single oral co-administration of 325 mg Acetaminophen tablets with 37.5 mg Tramadol.,2.778513e-05,4.556188,corrected from ChEMBL,,corrected from ChEMBL


#### Rename and reorder columns

In [48]:
# Drop
combined_slim.drop(labels=['ines_comment1', 'ines_comment0'], axis=1, inplace=True)

In [49]:
combined_slim.columns = ['Original source Substance','activity_id','pref_name', 'parent_chembl_id','molregno', 'mw_freebase', 'value', 'source_unit', 'type', 'activity_comment', 'assay_chembl_id', 'source_description', 'pubmed_id', 'doc_title', 'description', 'Molar_value', 'pMolar_value', 'comments']

In [50]:
len(combined_slim.columns)

18

In [51]:
combined_slim_selected = combined_slim[['Original source Substance', 'pref_name', 'parent_chembl_id','molregno', 'mw_freebase', 'type', 'value', 'source_unit', 'Molar_value', 'pMolar_value', 'assay_chembl_id', 'activity_id', 'description', 'activity_comment', 'comments', 'source_description', 'pubmed_id', 'doc_title']].sort_values(by='pref_name')

In [52]:
combined_slim_selected.loc[combined_slim_selected['Original source Substance']!=combined_slim_selected['pref_name'],:]

Unnamed: 0,Original source Substance,pref_name,parent_chembl_id,molregno,mw_freebase,type,value,source_unit,Molar_value,pMolar_value,assay_chembl_id,activity_id,description,activity_comment,comments,source_description,pubmed_id,doc_title
945,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,3.43,ug.mL-1,1.197877e-05,4.921588,CHEMBL1661927,5154216.0,Cmax in HIV infected male patient at 600 mg QD by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
946,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,1.59,ug.mL-1,5.552839e-06,5.255485,CHEMBL1661924,5154217.0,Cmax in HIV infected male patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
947,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,2.09,ug.mL-1,7.299015e-06,5.136736,CHEMBL1661071,5154218.0,Ratio of weight normalized Cmax in HIV infected patient at 600 mg QD to weight normalized Cmax in HIV infected patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
948,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,2.09,ug.mL-1,7.299015e-06,5.136736,CHEMBL1660921,5154219.0,Ratio of Cmax in HIV infected patient at 600 mg QD to Cmax in HIV infected patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
950,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,1.84,ug.mL-1,6.425927e-06,5.192064,CHEMBL1660927,5154221.0,Weight normalized Cmax in HIV infected patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
951,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,3.85,ug.mL-1,1.344555e-05,4.871421,CHEMBL1660929,5154222.0,Cmax in HIV infected patient at 600 mg QD by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
952,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,1.84,ug.mL-1,6.425927e-06,5.192064,CHEMBL1660934,5154223.0,Cmax in HIV infected patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
953,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,1.66,ug.mL-1,5.797304e-06,5.236774,CHEMBL1661930,5154287.0,Weight normalized Cmax in HIV infected male patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
954,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,3.58,ug.mL-1,1.250262e-05,4.902999,CHEMBL1661933,5154288.0,Weight normalized Cmax in HIV infected male patient at 600 mg QD by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
955,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,4.83,ug.mL-1,1.686806e-05,4.772935,CHEMBL1661939,5154289.0,Cmax in HIV infected female patient at 600 mg QD by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...


In [54]:
combined_slim_selected.columns

Index(['Original source Substance', 'pref_name', 'parent_chembl_id',
       'molregno', 'mw_freebase', 'type', 'value', 'source_unit',
       'Molar_value', 'pMolar_value', 'assay_chembl_id', 'activity_id',
       'description', 'activity_comment', 'comments', 'source_description',
       'pubmed_id', 'doc_title'],
      dtype='object')

In [55]:
combined_slim_selected.loc[combined_slim_selected['molregno'].isnull()]

Unnamed: 0,Original source Substance,pref_name,parent_chembl_id,molregno,mw_freebase,type,value,source_unit,Molar_value,pMolar_value,assay_chembl_id,activity_id,description,activity_comment,comments,source_description,pubmed_id,doc_title


In [56]:
# Some values not available if molecular weight was not available
combined_slim_selected.loc[combined_slim_selected['pMolar_value'].isnull()]

Unnamed: 0,Original source Substance,pref_name,parent_chembl_id,molregno,mw_freebase,type,value,source_unit,Molar_value,pMolar_value,assay_chembl_id,activity_id,description,activity_comment,comments,source_description,pubmed_id,doc_title
10,ADALIMUMAB,ADALIMUMAB,CHEMBL1201580,675482,,Normal/therapeutic blood-plasma concentration,9.0,mg/L,,,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics"
231,DROTRECOGIN ALFA,DROTRECOGIN ALFA (ACTIVATED),CHEMBL2109065,1381331,,Normal/therapeutic blood-plasma concentration,0.072,mg/L,,,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics"
429,MUROMONAB-CD3,MUROMONAB-CD3,CHEMBL1201608,675588,,Normal/therapeutic blood-plasma concentration,1.3,mg/L,,,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics"


In [57]:
# Remove columns where Molar value is null
combined_slim_selected = combined_slim_selected.loc[~combined_slim_selected['pMolar_value'].isnull()]

In [58]:
# Save file
combined_slim_selected.sort_values(by='pref_name').reset_index(drop=True).to_csv(basedir + '/results/interim/total_plasma_concentrations_approved_drugs_with_refs.txt', sep='\t', index=None)

In [59]:
len(combined_slim_selected['parent_chembl_id'].drop_duplicates())

757

In [60]:
combined_slim_selected.head()

Unnamed: 0,Original source Substance,pref_name,parent_chembl_id,molregno,mw_freebase,type,value,source_unit,Molar_value,pMolar_value,assay_chembl_id,activity_id,description,activity_comment,comments,source_description,pubmed_id,doc_title
0,ABACAVIR,ABACAVIR,CHEMBL1380,321707,286.34,Normal/therapeutic blood-plasma concentration,3.9,mg/L,1.4e-05,4.865817,,,,,,,22835221.0,"Therapeutic and toxic blood concentrations of nearly 1,000 drugs and other xenobiotics"
945,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,3.43,ug.mL-1,1.2e-05,4.921588,CHEMBL1661927,5154216.0,Cmax in HIV infected male patient at 600 mg QD by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
946,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,1.59,ug.mL-1,6e-06,5.255485,CHEMBL1661924,5154217.0,Cmax in HIV infected male patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
947,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,2.09,ug.mL-1,7e-06,5.136736,CHEMBL1661071,5154218.0,Ratio of weight normalized Cmax in HIV infected patient at 600 mg QD to weight normalized Cmax in HIV infected patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...
948,,ABACAVIR,CHEMBL1380,321707,286.34,Cmax,2.09,ug.mL-1,7e-06,5.136736,CHEMBL1660921,5154219.0,Ratio of Cmax in HIV infected patient at 600 mg QD to Cmax in HIV infected patient at 300 mg BID by HPLC/MS/MS analysis,,,Scientific Literature,19188387.0,Steady-state pharmacokinetics of abacavir in plasma and intracellular carbovir triphosphate following administration of abacavir at 600 milligrams once daily and 300 milligrams twice daily in huma...


## Take medians for further analysis

In [61]:
len(combined_slim_selected)

1583

In [62]:
# exclude metabolites
no_metabolites = combined_slim_selected.loc[(combined_slim_selected['comments'].isnull()) | ((~combined_slim_selected['comments'].isnull())&(combined_slim_selected['comments'].str.contains('corrected')))] 

In [63]:
len(no_metabolites)

1580

In [64]:
medians_selection = no_metabolites.groupby('molregno').agg({'pref_name': 'first', 'parent_chembl_id': 'first', 'Molar_value': 'median'})

In [65]:
medians_selection.reset_index(drop=False, inplace=True)
medians_selection.rename(columns={'Molar_value': 'median_Molar_value'}, inplace=True)

In [66]:
medians_selection.head()

Unnamed: 0,molregno,pref_name,parent_chembl_id,median_Molar_value
0,97,PRAZOSIN,CHEMBL2,5.216348e-08
1,115,NICOTINE,CHEMBL3,1.849112e-07
2,146,OFLOXACIN,CHEMBL4,8.025016e-06
3,147,NALIDIXIC ACID,CHEMBL5,0.0001291767
4,173,INDOMETHACIN,CHEMBL6,5.589871e-06


In [67]:
medians_selection['median_pMolar_value'] = medians_selection['median_Molar_value'].apply(lambda x: -np.log10(x))

In [68]:
medians_selection.to_csv(basedir + '/results/interim/molregno2median_total_plasma_conc.txt', sep='\t', index=False)

## Data inspection

Notes from inspection

- OXCARBAZEPINE / CHEMBL1068: the blood-plasma concentration is for main metabolite, whereas the CHEMBL data is for parent compound. Main metabolite is 70% of plasma compounds, whereas parent only 2%
- AMODIAQUINE / CHEMBL682: one error corrected (see above). The other values also differ because of difference in parent vs active metabolite measured.
The plasma-conc is for the main metabolite. the 'CHEMBL1220048' assay is also for main metabolite (from original source) but not annotated as such in ChEMBL, which is wrong.
- METHOTHREXATE / CHEMBL34259: couldn't see any errors. Checked ChEMBL original source. Can't check original source plasma-conc because is a book.
- AMIKACIN / CHEMBL177: The lower concentration of 0.2 is mentioned in the original paper as plasma concentration in neonates (not Cmax) from another study (minimum).
- NELFINAVIR / CHEMBL584: the lower one, plasma-conc is aim for minimum trough concentration. other sources for Cmax 
- ATAZANAVIR / CHEMBL1163: same as NELFINAVIR
- METRONIDAZOLE / CHEMBL137: low concentrations are for topical 
- ISOTRETINOIN / CHEMBL547: low concentration for topical
- BICALUTAMIDE / CHEMBL409: don't see any errors. Strangely the 'normal' concentration is higher. From wikipedia refs this seems more correct.
- INDINAVIR / 'CHEMBL115': low concentration is trough target
- BELINOSTAT / CHEMBL408513: see no errors
- ZIDOVUDINE / CHEMBL129: the ChEMBL assay units are wrong, they are actually per mg of dose. Dose is 200 or 300 mg so value should be multiplied. Will do this in corrections above.

In [40]:
checked = ['CHEMBL1068', 'CHEMBL34259', 'CHEMBL177', 'CHEMBL584', 'CHEMBL1163', 'CHEMBL137','CHEMBL547', 'CHEMBL409', 'CHEMBL115', 'CHEMBL408513', 'CHEMBL682', 'CHEMBL129']

In [41]:
means = combined.groupby('chembl_id')['pMolar_value'].describe()

In [48]:
means.loc[(means['count']>1)&(~means.index.isin(checked)),:].sort_values(by='std', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
chembl_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CHEMBL79,7.0,6.409557,0.884371,4.670876,6.106677,6.871536,6.979385,7.152362
CHEMBL376488,5.0,5.719502,0.872104,4.790457,5.004179,5.662633,6.240637,6.899602
CHEMBL1542,15.0,6.887574,0.870611,5.141873,6.688028,7.069991,7.318365,8.071835
CHEMBL25,4.0,4.164063,0.835052,2.954628,3.972039,4.429632,4.621656,4.842359
CHEMBL483254,3.0,6.692795,0.829239,5.850781,6.284874,6.718967,7.113802,7.508638
CHEMBL444633,6.0,6.513606,0.812947,5.065166,6.449202,6.625781,6.77661,7.547682
CHEMBL1194,3.0,5.912302,0.768565,5.042024,5.619475,6.196926,6.347441,6.497956
CHEMBL1535,5.0,6.131643,0.763305,5.154902,5.91364,5.924124,6.443697,7.221849
CHEMBL148,2.0,4.128877,0.746363,3.601118,3.864997,4.128877,4.392756,4.656635
CHEMBL806,10.0,6.134944,0.714533,5.411856,5.489457,6.017487,6.776765,7.121093
