### Anatomy - Anatomy
- MeSH Term -[is]- MeSH Tree Number
- MeSH Tree Number -[child of]- MeSH Tree Number

In [1]:
import pandas as pd, csv, json, requests as req
import xml.etree.ElementTree as ET
import biomedkg_utils
from biomedkg_utils import *

### Anatomy: MeSH -[is]- MeSH
- Name -[is]- ID
- ID -[is]- Tree Number
- Tree Number -[is] - Name

In [None]:
url = 'https://nlmpubs.nlm.nih.gov/projects/mesh/MESH_FILES/xmlmesh/desc2023.xml'
os.system(f'wget -N -P input/ {url}')
tree = ET.parse('input/desc2023.xml')
root = tree.getroot()

In [4]:
name2id, id2name, id2tree, tree2id = dict(), dict(), dict(), dict()
all_tree_numbers = list()
anatomy_mesh_tree_prefix = ('A01','A02','A03','A04',
                            'A05','A06','A07','A08',
                            'A09','A10','A11','A12',
                            'A14','A15','A16','A17.815',
                            'A17.360.710','A17.360.421',
                            'A17.360.296')

for ele in root:
    try:
        # MeSH Tree Number
        tree_numbers = ele.find('TreeNumberList').findall('TreeNumber')
        
        # If anatomy
        for tree_number in tree_numbers:
            if tree_number.text.startswith(anatomy_mesh_tree_prefix):
                
                ### Tree
                tree_number = tree_number.text
                all_tree_numbers.append(tree_number)
                
                
                ### ID to Tree
                try:
                    # MeSH ID
                    ID = ele.find('DescriptorUI').text

                    # MeSH ID -[is]- MeSH Tree
                    id2tree.setdefault(ID,set()).add(tree_number)
                    tree2id.setdefault(tree_number,set()).add(ID)
                except:
                    pass
                    
                ### ID to Name
                try:
                    # MeSH ID
                    ID = ele.find('DescriptorUI').text

                    # MeSH Term Name
                    name = ele.find('DescriptorName').find('String').text

                    # MeSH Term -[is]- MeSH ID
                    name2id.setdefault(name,set()).add(ID)
                    id2name.setdefault(ID,set()).add(name)
                except:
                    pass
    except:
        continue        
        
all_tree_numbers = sorted(all_tree_numbers)
tree2id = dict(sorted(tree2id.items()))

for k,v in name2id.copy().items():
    name2id[k] = list(name2id[k])

### Tree-is-ID

In [5]:
# MeSH Tree Number -[is]- MeSH ID
file = 'Anatomy_(MeSH)_2_Anatomy_(MeSH_Tree).csv'
outpath = os.path.join('output/anatomy2anatomy/',file)
output_edgefile_onerel_noweight(outpath = outpath,
                                columns = ['Anatomy (MeSH)','Anatomy (MeSH Tree)','Relationship'],
                                dictionary = id2tree,
                                rel = '-is-',
                                prefix_col1 = 'MeSH_Anatomy:',
                                prefix_col2 = 'MeSH_Tree_Anatomy:')

tree_to_id = pd.read_csv(outpath).drop_duplicates()
tree_to_id.to_csv(os.path.join('output/edges/', file), index=False)
tree_to_id.to_csv(os.path.join('output/edges to use/', file), index=False)

tree_to_id.tail()

Unnamed: 0,Anatomy (MeSH),Anatomy (MeSH Tree),Relationship
3160,MeSH_Anatomy:D066293,MeSH_Tree_Anatomy:A08.675.358.650,-is-
3161,MeSH_Anatomy:D066293,MeSH_Tree_Anatomy:A11.671.358.375,-is-
3162,MeSH_Anatomy:D066294,MeSH_Tree_Anatomy:A08.675.358.350,-is-
3163,MeSH_Anatomy:D066294,MeSH_Tree_Anatomy:A11.671.358.212,-is-
3164,MeSH_Anatomy:D066328,MeSH_Tree_Anatomy:A08.186.211.200.885.287.249....,-is-


In [6]:
# MeSH Term -[is]- MeSH ID
with open('output/anatomy2anatomy/meshterm-IS-meshid.json','w') as fout:
    json.dump(name2id, fout)

### MeSH Tree Number -[child_of]-> MeSH Tree Number

In [11]:
tree2tree = dict()

# Tree Number
for tree_num in all_tree_numbers:
    if '.' in tree_num:
        
        # Parent of Tree Number
        parent = ''
        for num in tree_num.split('.')[:len(tree_num.split('.'))-1]:
            parent += num+'.'
        parent = parent.strip('.')
        
        # Tree Number -[subclass of]-> Tree Number
        tree2tree[tree_num] = [parent]

In [16]:
# MeSH Tree Number -[subclass of]-> MeSH Tree Number
file = 'Anatomy_(MeSH_Tree)_2_Anatomy_(MeSH_Tree).csv'
outpath = os.path.join('output/anatomy2anatomy/',file)
output_edgefile_onerel_noweight(outpath = outpath,
                                columns = ['Anatomy (MeSH Tree)','Anatomy (MeSH Tree)','Relationship'],
                                dictionary = tree2tree,
                                rel = '-subclass_of->',
                                prefix_col1 = 'MeSH_Tree_Anatomy:',
                                prefix_col2 = 'MeSH_Tree_Anatomy:')

tree_df = pd.read_csv(outpath).drop_duplicates()
tree_df.to_csv(os.path.join('output/edges/', file), index=False)
tree_df.to_csv(os.path.join('output/edges to use/', file), index=False)
tree_df.tail()

Unnamed: 0,Anatomy (MeSH Tree),Anatomy (MeSH Tree).1,Relationship
3145,MeSH_Tree_Anatomy:A17.815.250.500,MeSH_Tree_Anatomy:A17.815.250,-subclass_of->
3146,MeSH_Tree_Anatomy:A17.815.805,MeSH_Tree_Anatomy:A17.815,-subclass_of->
3147,MeSH_Tree_Anatomy:A17.815.830,MeSH_Tree_Anatomy:A17.815,-subclass_of->
3148,MeSH_Tree_Anatomy:A17.815.830.206,MeSH_Tree_Anatomy:A17.815.830,-subclass_of->
3149,MeSH_Tree_Anatomy:A17.815.830.480,MeSH_Tree_Anatomy:A17.815.830,-subclass_of->


# Uberon-[is]-MeSH
UBERON anatomy ontology to MeSH anatomy ontology. Used in gene2anatomy

In [9]:
os.system('wget -N -P input/ https://raw.githubusercontent.com/obophenotype/uberon/master/src/ontology/uberon-edit.obo')

--2023-06-13 05:32:27--  https://raw.githubusercontent.com/obophenotype/uberon/master/src/ontology/uberon-edit.obo
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12200207 (12M) [text/plain]
Saving to: ‘input/uberon-edit.obo’

     0K .......... .......... .......... .......... ..........  0% 6.43M 2s
    50K .......... .......... .......... .......... ..........  0% 7.31M 2s
   100K .......... .......... .......... .......... ..........  1% 5.89M 2s
   150K .......... .......... .......... .......... ..........  1% 9.44M 2s
   200K .......... .......... .......... .......... ..........  2% 7.34M 2s
   250K .......... .......... .......... .......... ..........  2% 51.1M 1s
   300K .......... .......... .......... .......... ..........  2% 36.6M 1s
   350K .

0

In [10]:
uberon2mesh = dict()
mesh_id, uberon_id = '',''

with open('input/uberon-edit.obo','r') as fin:
    for i,line in enumerate(fin):
        
        # Start of term
        if line.startswith('id: UBERON:'):
            uberon_id = line.split('id: ')[1].strip()
            mesh_id = ''
            
        # MeSH xref
        if line.startswith('xref: MESH:'):
            mesh_id = line.split('xref: MESH:')[1].strip()
        
        # End of term
        if len(line) == 1:
            if mesh_id != '' and uberon_id != '':
                uberon2mesh[uberon_id] = mesh_id
                uberon_id = ''

with open('output/anatomy2anatomy/uberon2mesh.json','w') as fout:
    json.dump(uberon2mesh, fout)