In [1]:
# Download .obo file
! wget --timestamping --directory-prefix download/ http://purl.obolibrary.org/obo/uberon/ext.obo

--2015-05-19 16:08:42--  http://purl.obolibrary.org/obo/uberon/ext.obo
Resolving purl.obolibrary.org (purl.obolibrary.org)... 132.174.1.35
Connecting to purl.obolibrary.org (purl.obolibrary.org)|132.174.1.35|:80... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: http://berkeleybop.org/ontologies/uberon/ext.obo [following]
--2015-05-19 16:08:42--  http://berkeleybop.org/ontologies/uberon/ext.obo
Resolving berkeleybop.org (berkeleybop.org)... 131.243.192.99
Connecting to berkeleybop.org (berkeleybop.org)|131.243.192.99|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12821492 (12M) [text/plain]
Server file no newer than local file ‘download/ext.obo’ -- not retrieving.



In [2]:
import json

import pandas
import requests

import obo

In [3]:
# Read the ontology into a networkx MultiDiGraph
with open('download/ext.obo') as obo_file:
    ontology = obo.read_obo(obo_file)

In [4]:
# Extract MeSH cross-references
rows = []
for node, data in ontology.nodes_iter(data=True):
    if not node.startswith('UBERON:'):
        continue
    xrefs = [xref.split(':', 1) for xref in data.get('xref', [])]
    mesh_refs = [ref[1] for ref in xrefs if ref[0] == 'MESH']
    if len(mesh_refs) != 1:
        continue
    rows.append([node, data['name'], mesh_refs[0]])

uberon_mesh_df = pandas.DataFrame(rows, columns=['uberon_id', 'uberon_name', 'mesh_tree_number'])

In [5]:
# Read MeSH json
url = 'https://raw.githubusercontent.com/dhimmel/mesh/1a1ab7f1d30cb8a21618b5239370863ccb6de547/data/mesh.json'
response = requests.get(url)
mesh = json.loads(response.text)

In [6]:
# Extract (mesh_id, mesh_tree_number) pairs
rows = []
for term in mesh:
    mesh_id = term['mesh_id']
    mesh_name = term['mesh_name']
    for tree_number in term['tree_numbers']:
        rows.append([mesh_id, mesh_name, tree_number])

mesh_df = pandas.DataFrame(rows, columns=['mesh_id', 'mesh_name', 'mesh_tree_number'])

In [7]:
# Merge uberon terms with mesh terms based on MeSH tree_numbers
uberon_mesh_df = uberon_mesh_df.merge(mesh_df, how='inner')
uberon_mesh_df.head()

Unnamed: 0,uberon_id,uberon_name,mesh_tree_number,mesh_id,mesh_name
0,UBERON:0001577,facial muscle,A02.633.567.400,D005152,Facial Muscles
1,UBERON:0001681,nasal bone,A02.835.232.781.324.665,D009295,Nasal Bone
2,UBERON:0001264,pancreas,A03.734,D010179,Pancreas
3,UBERON:0002356,perineum,A01.719,D010502,Perineum
4,UBERON:0001688,incus bone,A09.246.397.247.362,D007188,Incus


In [9]:
# Save mapping
uberon_mesh_df.to_csv('data/mesh-map.tsv', index=False, sep='\t')