In [1]:
from owlready2 import *
import csv
from elasticsearch import Elasticsearch
# import keys
import json






In [3]:
onto_path.append('../../data/ontologies/')
onto = get_ontology('http://purl.obolibrary.org/obo/bto.owl').load()

In [3]:
'''
Creates a csv file containing data from the BTO ontology.
- csv file  used to create an index in Elasticsearch
- 3 BTO terms are deprecated and have no label
    - BTO_0000441: adipose tissue
    - BTO_0001414: umbilical vein endothelium
    - BTO_0003973: prostatic intraepithelial neoplasia cell
- csv file contains the label, definition, and comment for each BTO term
    - allows for a more detailed search 
'''

with open('../../data/elastic_docs/bto.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['bto_id', 'label', 'definition', 'comment'])
    for c in onto.classes():
        biosample_id = c.name
        label = c.label[0] if c.label else ''
        definition = c.IAO_0000115[0] if c.IAO_0000115 else ''
        comment = c.comment[0] if c.comment else ''

        writer.writerow([biosample_id, label, definition, comment])
    

In [8]:
''' 
Creates a text file of synonyms in the BTO ontology
- each row in the text file contains comma separated synonyms for a BTO term
- text file loaded into the Elasticsearch docker container
    - used to create a synonym filter in the Elasticsearch index
'''

with open('../../data/elastic_docs/bto_synonyms.txt', 'w') as f:
    for c in onto.classes():
        if c.hasRelatedSynonym or c.hasExactSynonym:
            bto_label = c.label[0]
            f.write((bto_label + "," + ','.join(c.hasRelatedSynonym) + ','.join(c.hasExactSynonym) + '\n').lower())
        # elif len(c.label) > 0:
        #     f.write((c.label[0] + '\n').lower())

In [38]:
'''
Creates a JSON dump of text for adding the BTO terms to the Elasticsearch index
- each row in the JSON dump contains the BTO id, label, definition, and comment
'''

for c in onto.classes():
    bto_id = c.name
    label = c.label[0] if c.label else ''
    definition = c.IAO_0000115[0] if c.IAO_0000115 else ''
    comment = c.comment[0] if c.comment else ''

    # print({'index':{}})
    print(json.dumps({'index': {}}))
    print(json.dumps({'bto_id': bto_id, 'label': label, 'definition': definition, 'comment': comment}))
    # print(json.dumps({'_index': 'brenda_tissue_ontology_synonyms', '_id': 'id', '_source': '', 'csv': {'bto_id': bto_id, 'label': label, 'definition': definition, 'comment': comment}}),',')

{"index": {}}
{"bto_id": "BTO_0000000", "label": "tissues, cell types and enzyme sources", "definition": "A structured controlled vocabulary for the source of an enzyme. It comprises terms of tissues, cell lines, cell types and cell cultures from uni- and multicellular organisms.", "comment": ""}
{"index": {}}
{"bto_id": "BTO_0000001", "label": "culture condition:-induced cell", "definition": "", "comment": ""}
{"index": {}}
{"bto_id": "BTO_0000002", "label": "culture condition:1,4-dichlorobenzene-grown cell", "definition": "", "comment": ""}
{"index": {}}
{"bto_id": "BTO_0000003", "label": "intestinal cell line", "definition": "", "comment": ""}
{"index": {}}
{"bto_id": "BTO_0000004", "label": "culture condition:2,5-dihydroxybenzoate-grown cell", "definition": "", "comment": ""}
{"index": {}}
{"bto_id": "BTO_0000005", "label": "culture condition:2-aminobenzenesulfonate-grown cell", "definition": "", "comment": ""}
{"index": {}}
{"bto_id": "BTO_0000006", "label": "osteoblastoma cell", 