In [95]:
from goatools import obo_parser
obodag = obo_parser.GODag(
    "./data/geneontology/go-basic.obo",
    optional_attrs=["relationship"]
)

./data/geneontology/go-basic.obo: fmt(1.2) rel(2018-07-02) 47,231 GO Terms; optional_attrs(relationship)


In [78]:
def get_all_parents(goterm):
    if goterm.level == 0:
        return {}
    parents = goterm.parents
    return sorted(
        parents | set().union(*[
            get_all_parents(pargoterm) 
            for pargoterm in parents
        ]
    ), key=lambda gtrm: gtrm.level)

def get_all_children(goterm):
    if not goterm.children:
        return set()
    children = goterm.children
    return sorted(
        children | set().union(*[
            get_all_children(childgoterm) 
            for childgoterm in children
        ]
    ), key=lambda gtrm: -gtrm.level)

goterm_id = "GO:0048527"
[
    goterm.name for goterm in 
    get_all_parents(obodag[goterm_id])
]

['biological_process',
 'multicellular organismal process',
 'developmental process',
 'anatomical structure development',
 'post-embryonic development',
 'plant organ development',
 'post-embryonic plant organ development',
 'post-embryonic root development',
 'root development']

In [72]:
from pprint import pprint
pprint(dir(obodag))

['__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_populate_relationships',
 '_populate_terms',
 '_set_level_depth',
 '_str_desc',
 'clear',
 'copy',
 'draw_lineage',
 'fromkeys',
 'get',
 'id2int',
 'items',
 'keys',
 'label_wrap',
 'load_obo_file',
 'make_graph_pydot',
 'make_graph_pygraphviz',
 'paths_to_top',
 'pop',
 'popitem',
 'query_term',
 'setdefault',
 'typedefs',
 'update',
 'update_association',
 'values',
 'version',
 'write_dag',
 'write_hier',
 'write_hier_all']


In [93]:
sum(["growth" in goterm.name for goterm in obodag.values()])

632

In [99]:
from goatools.semantic import deepest_common_ancestor
obodag[deepest_common_ancestor(
    ["GO:0048527", "GO:0007124"],
    obodag
)]

GOTerm('GO:0008150'):
  children: 29 items
  parents: 0 items
  namespace:biological_process
  reldepth:0
  name:biological_process
  relationship_rev: 3 items
    negatively_regulates: 1 items
      GO:0048519	level-03	depth-03	negative regulation of biological process [biological_process]
    positively_regulates: 1 items
      GO:0048518	level-03	depth-03	positive regulation of biological process [biological_process]
    regulates: 1 items
      GO:0050789	level-02	depth-02	regulation of biological process [biological_process]
  relationship: 0 items
  is_obsolete:False
  alt_ids: 3 items
    GO:0007582
    GO:0044699
    GO:0000004
  depth:0
  id:GO:0008150
  _parents: 0 items
  level:0

In [112]:
terms_of_interest = obodag["GO:0048527"]
regulates = frozenset(
    [typedef 
     for typedef in obodag.typedefs.keys()
     if "regulates" in typedef]
)
print(regulates)

frozenset({'negatively_regulates', 'positively_regulates', 'regulates'})


In [113]:
from collections import defaultdict

regulating_terms = defaultdict(list)
for t in obodag.values():
    if hasattr(t, 'relationship'):
        for typedef in regulates.intersection(t.relationship.keys()):
            if terms_of_interest in t.relationship[typedef]:
                regulating_terms['{:s}d_by'.format(typedef[:-1])].append(t)

In [114]:
print("{:s} ({:s}) is:".format(
    terms_of_interest.name,
    terms_of_interest.id
))
for regulate_desc, goterms in regulating_terms.items():
    print('\n - {:s}:'.format(regulate_desc))
    for goterm in goterms:
        print('\t-- {:s} {:s}'.format(goterm.id, goterm.name))
        for gochild in goterm.children:
            print('\t\t-- {:s} {:s}'.format(gochild.id, gochild.name))

lateral root development (GO:0048527) is:

 - positively_regulated_by:
	-- GO:1901333 positive regulation of lateral root development

 - regulated_by:
	-- GO:2000023 regulation of lateral root development
		-- GO:1901333 positive regulation of lateral root development
		-- GO:1901332 negative regulation of lateral root development

 - negatively_regulated_by:
	-- GO:1901332 negative regulation of lateral root development


In [133]:
from goatools import godag_plot
godag_plot.plot_gos(
    "wtf.png",
    ['GO:0097191', 'GO:0038034'],
    obodag
)

    2 usr  10 GOs  WROTE: wtf.png


In [205]:
from Bio.UniProt.GOA import gafiterator
from collections import Counter
import tqdm

to_test = []
filename = './data/geneontology/goa_yeast.gaf'
with open(filename) as fp:
    for annot in tqdm.tqdm(gafiterator(fp)):
        subtree_sz = len(obodag[annot["GO_ID"]].get_all_children())
        # print(sum([annot["GO_ID"] == "GO:0005739" for annot in gafiterator(fp)]))
        if 10 <= subtree_sz <= 15:
            to_test.append(annot["DB_Object_Symbol"])
print(len(to_test))

96139it [11:42, 136.84it/s]

2951





In [222]:
sbtr_sz = Counter
def calc_subtree_size(go_term):
    

['GO:0000004',
 'GO:0003674',
 'GO:0008150',
 'GO:0008372',
 'GO:0044699',
 'GO:0005575',
 'GO:0005554',
 'GO:0007582']

In [234]:
entries = []
with open(filename) as fp:
    for annot in tqdm.tqdm(gafiterator(fp)):
        if annot["DB_Object_Symbol"] in to_test:
            entries.append(annot)

[entry["GO_ID"] for entry in entries]

96139it [00:04, 22117.00it/s]


['GO:0003674',
 'GO:0004360',
 'GO:0005575',
 'GO:0006002',
 'GO:0006047',
 'GO:0006048',
 'GO:0006487',
 'GO:0006541',
 'GO:0008150',
 'GO:0034221',
 'GO:0000183',
 'GO:0000183',
 'GO:0001302',
 'GO:0005730',
 'GO:0006310',
 'GO:0006325',
 'GO:0007059',
 'GO:0007580',
 'GO:0008156',
 'GO:0031582',
 'GO:0033553',
 'GO:0034503',
 'GO:0043007',
 'GO:0043110',
 'GO:0043110',
 'GO:0045911',
 'GO:0046872',
 'GO:0070550',
 'GO:0090342',
 'GO:0090342',
 'GO:0090579',
 'GO:0000462',
 'GO:0002181',
 'GO:0003735',
 'GO:0005654',
 'GO:0005730',
 'GO:0019843',
 'GO:0019843',
 'GO:0022627',
 'GO:0031167',
 'GO:0032040',
 'GO:0045903',
 'GO:0000445',
 'GO:0000445',
 'GO:0000446',
 'GO:0000446',
 'GO:0000723',
 'GO:0003676',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:0005515',
 'GO:00055

In [236]:
obodag['GO:0001302']

GOTerm('GO:0001302'):
  children: 0 items
  parents: 1 items
    GO:0007569	level-03	depth-03	cell aging [biological_process]
  namespace:biological_process
  reldepth:4
  name:replicative cell aging
  relationship_rev: 2 items
    part_of: 3 items
      GO:0001321	level-03	depth-03	age-dependent general metabolic decline involved in replicative cell aging [biological_process]
      GO:0001304	level-05	depth-05	progressive alteration of chromatin involved in replicative cell aging [biological_process]
      GO:0001303	level-07	depth-07	nucleolar fragmentation involved in replicative aging [biological_process]
    regulates: 1 items
      GO:1900062	level-05	depth-05	regulation of replicative cell aging [biological_process]
  relationship: 0 items
  is_obsolete:False
  alt_ids: 0 items
  depth:4
  id:GO:0001302
  _parents: 1 items
    GO:0007569
  level:4

In [239]:
len(to_test)

2951

In [3]:
from scipy.spatial import distance 

distance.sqeuclidean([1, 2, 3, 4], [-1, 2, 3, 7], [1, 2, 3, 0])

4.0

In [1]:
import igraph as ig
import igraph.remote.gephi

connection = igraph.remote.gephi.GephiConnection()
streamer = igraph.remote.gephi.GephiGraphStreamer()

streamer.post(graph=ig.Graph.Full(10), destination=connection)

In [14]:
%reset -f