# Map MeSH conditions to the Disease Ontology and MeSH interventions to DrugBank

In [1]:
import pandas

## Map MeSH to the Disease Ontology

In [2]:
mesh_df = pandas.read_table('data/mesh-intervention-to-condition.tsv')
mesh_df.head(2)

Unnamed: 0,nct_id,intervention,condition
0,NCT00000114,D014801,D012173
1,NCT00000114,D014801,D012174


In [3]:
url = 'https://github.com/dhimmel/disease-ontology/blob/75050ea2d4f60e745d3f3578ae03560a2cc0e444/data/xrefs.tsv?raw=true'
disease_map_df = pandas.read_table(url)

In [4]:
disease_map_df = disease_map_df.query("resource == 'MSH'")
disease_map_df = disease_map_df.drop('resource', axis='columns')
disease_map_df = disease_map_df.rename(columns={'resource_id': 'condition'})
disease_map_df.head(2)

Unnamed: 0,doid_code,doid_name,condition
0,DOID:4,disease,D004194
5,DOID:863,nervous system disease,D009422


## Map MeSH to DrugBank

In [5]:
# Map from DrugBank to MeSH using DrugCentral
url = 'https://github.com/olegursu/drugtarget/blob/9a6d84bed8650c6c507a2d3d786814c774568610/identifiers.tsv?raw=true'
drug_map_df = pandas.read_table(url)
drug_map_df = drug_map_df[drug_map_df.ID_TYPE.str.contains('MESH')][['DRUG_ID', 'IDENTIFIER']].rename(columns={'IDENTIFIER': 'intervention'}).merge(
drug_map_df[drug_map_df.ID_TYPE == 'DRUGBANK_ID'][['DRUG_ID', 'IDENTIFIER', 'DRUG_NAME']].rename(columns={'IDENTIFIER': 'drugbank_id', 'DRUG_NAME': 'drug_name'})
).drop('DRUG_ID', axis='columns')
drug_map_df.head(2)

Unnamed: 0,intervention,drugbank_id,drug_name
0,C016671,DB00067,vasopressin
1,C031183,DB03193,stearic acid


## Map ClinicalTrials.gov intervention-condition pairs

In [6]:
mapped_df = mesh_df.merge(drug_map_df).merge(disease_map_df)
mapped_df = mapped_df.drop(['condition', 'intervention'], axis='columns').drop_duplicates()
mapped_df = mapped_df.sort_values(['doid_code', 'drugbank_id', 'nct_id'])

In [7]:
mapped_df.tail(2)

Unnamed: 0,nct_id,drugbank_id,drug_name,doid_code,doid_name
145365,NCT00793741,DB09099,somatostatin,DOID:9993,hypoglycemia
145324,NCT02109315,DB09541,ascorbic acid,DOID:9993,hypoglycemia


In [8]:
len(mapped_df)

175620

In [9]:
mapped_df.nct_id.nunique(), mapped_df.drugbank_id.nunique(), mapped_df.doid_code.nunique()

(42826, 1181, 1617)

In [10]:
#mapped_df.query("doid_name == 'multiple sclerosis'").drug_name.value_counts()

In [11]:
mapped_df.to_csv('data/DrugBank-DO.tsv', sep='\t', index=False)