In [33]:
import gograph
import importlib
import re
import time

In [34]:
%%time
importlib.reload(gograph)
ontology = gograph.OBOParser('data/go-basic.obo').parse_ontology()

CPU times: user 1.76 s, sys: 31.6 ms, total: 1.79 s
Wall time: 1.82 s


In [35]:
ontology.draw_connections()

In [36]:
list(ontology.nodes)[0]

'GO:0000001'

In [37]:
ontology.nodes['GO:0000001'].__dict__

{'id': 'GO:0000001',
 'namespace': 'biological_process',
 'definition': '"The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton." [GOC:mcc, PMID:10873824, PMID:11389764]',
 'is_a': ['GO:0048308 ! organelle inheritance',
  'GO:0048311 ! mitochondrion distribution'],
 'ancestor_ids': ['GO:0048308', 'GO:0048311'],
 'child_ids': [],
 'ancestors': [<gograph.GoTerm at 0x7f9f4dcc2e48>,
  <gograph.GoTerm at 0x7f9f4dcc2eb8>],
 'children': [],
 'name': 'mitochondrion inheritance'}

In [38]:
ontology.get_full_ancestry('GO:0000001')

['GO:0009987',
 'GO:0051179',
 'GO:0051640',
 'GO:0008150',
 'GO:0051646',
 'GO:0048308',
 'GO:0051641',
 'GO:0048311',
 'GO:0007005',
 'GO:0071840',
 'GO:0006996',
 'GO:0016043']

In [9]:
%%time 
importlib.reload(gograph)

obo_fp = 'data/go-basic.obo'
obo_file = open(obo_fp, 'r')

def parse_ancestor_id(is_a):
    """
    Extracts go term ids from 'is_a' and 'relatioship' field
    """
    capture = re.search('(GO:\d{7})\s!\s', is_a)
    if capture:
        return capture.group(1)

go_list = {}
blank_template = {'id': None, 'name': None, 'namespace': None,
                  'def': None, 'is_a': []}

go_dag = gograph.GoGraph()
term_flag = False
for line in obo_file:
    capture = re.search('(.+):\s(.+)\n', line)
    if line == '[Term]\n':
        term_flag = True
    elif line == '\n':
        term_flag = False
        
    if capture and term_flag:
        name = capture.group(1)
        content = capture.group(2)
        if name == 'id':
            go_term = gograph.GoTerm(id_=content)
            go_dag.add_node(go_term) # push term into tree
        elif name == 'is_a' or name == 'relationship':
            go_term.is_a.append(content)
            go_term.ancestor_ids.append(parse_ancestor_id(content))
        elif name == 'name':
            go_term.name = content
        elif name == 'namespace':
            go_term.namespace = content
        elif name == 'def':
            go_term.definition = content

obo_file.close()

CPU times: user 1.9 s, sys: 15.6 ms, total: 1.91 s
Wall time: 1.91 s


In [None]:
go_dag.nodes['GO:0000015'].__dict__

In [None]:
go_dag.draw_connections()

In [None]:
t0 = time.time()
h = {}

i = 0
for node in go_dag.nodes.values():
    anc = traverse3(node)
    h[node.id] = list(set(anc))
    #i = i + 1
    #if i % 1000 == 0 or i == len(go_dag.nodes):
    #   bar = '*' * (i//1000)
    #    print(bar, end='\r')
        #print(f'Progress {i}/{len(go_dag.nodes)} done', end='\r')
print()
print('Wall time: ', time.time()-t0)
#['GO:0000001'].ancestors

In [None]:
t0 = time.time()
h = {}

i = 0
for node in go_dag.nodes.keys():
    anc = traverse4(node)
    h[node] = list(set(anc))
    #i = i + 1
    #if i % 1000 == 0 or i == len(go_dag.nodes):
    #   bar = '*' * (i//1000)
    #    print(bar, end='\r')
        #print(f'Progress {i}/{len(go_dag.nodes)} done', end='\r')
print()
print('Wall time: ', time.time()-t0)
#['GO:0000001'].ancestors

In [None]:
t0 = time.time()
def traverse2(term):
    all_ancestors = []
    if term.ancestors == []:
        all_ancestors.append(term.id)
    else:
        all_ancestors.append(term.id)
        for t in term.ancestors:
            all_ancestors.extend(traverse2(t))
    return all_ancestors
        
    
a = traverse2(go_dag.nodes['GO:0000001'])
print(time.time()-t0)
print(a)
print(len(a))
print(len(set(a)))

In [None]:
t0 = time.time()
def traverse3(term):
    all_ancestors = [term.id]
    if term.ancestors != []:
        for t in term.ancestors:
            all_ancestors.extend(traverse3(t))
    return all_ancestors
        
b = traverse3(go_dag.nodes['GO:0000001'])
print(time.time()-t0)

print(b)
print(len(b))
print(len(set(b)))

In [None]:
def traverse4(node_id):
    """
    Traverse GO graph from given node to root

    Attributes:
        node_id : str
            GO term id of node (ex: 'GO:0000001')
    Return:
        ancestors : list
            list of acestor GO term ids

    Note:
        The list of ancestor ids will have duplicates, because
        in the GO ontology children can have multiple parents and
        so there are multiple paths leading to the root.

        For example:

                /C\
            A--B   F--root
                \D/

        Calling traverse(A) will return
            [A, B, C, F, root, D, F, root]

    """
    ancestors = []

    #f go_dag.nodes[node_id].ancestors == []:
    #   return ancestors
    if go_dag.nodes[node_id].ancestors != []:
        for ancestor in go_dag.nodes[node_id].ancestors:
            ancestors.append(ancestor.id)
            ancestors.extend(traverse4(ancestor.id))

    return ancestors

t0 = time.time()
b = traverse4('GO:0000001')
print(time.time()-t0)
print(b)
print(len(b))
print(len(set(b)))

In [None]:
from progress.bar import Bar

In [None]:
bar = Bar('Processing', max=20)
for i in range(20):
    # Do some work
    bar.next()
bar.finish()

In [None]:
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    
    author : https://stackoverflow.com/a/34325723
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

In [None]:
import time

# A List of Items
items = list(range(0, 57))
l = len(items)

# Initial call to print 0% progress
printProgressBar(0, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
for i, item in enumerate(items):
    # Do stuff...
    time.sleep(0.1)
    # Update Progress Bar
    printProgressBar(i + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 50)

In [None]:
10/5

In [None]:
1+1

In [None]:
import numpy as np

In [None]:
np.array([[1,1],[1,1]]) * np.array([[10],[2]])

In [None]:
2+2