## Extracting interaction data from GO associations

GO is not an interaction database. However, for some groups, GO provides additional information in protein binding annotations about the binding partner. This notebook provides an example of how to do this.

Please note that for PPI data you are likely better off using a dedicated PPI database. This notebook is primarily intended as illustrative of how to use to use the ontobio library to perform operations on gene product associations.

In [1]:
from ontobio import OntologyFactory
ofa = OntologyFactory()
ont = ofa.create('go')




In [2]:
# make sub-ontology with only isa/part-of
gobasic = ont.subontology(relations=['subClassOf', 'BFO:0000050'])

In [3]:
[pb] = ont.search('protein binding')
pb

'GO:0005515'

In [6]:
from ontobio.assoc_factory import AssociationSetFactory
afa = AssociationSetFactory()
url = "http://geneontology.org/gene-associations/goa_human.gaf.gz"

aset = afa.create_from_gaf(url, ontology=gobasic, skim=False)

In [8]:
gps_to_pb = aset.query([pb])
len(gps_to_pb)

11060

In [17]:
pairs = set()
for gp in aset.subjects:
    for a in aset.associations(gp):
        if pb in gobasic.ancestors(a['object']['id'], reflexive=True):
            for w in a['evidence']['with_support_from']:
                pairs.add( (a['subject']['id'],w) )

In [18]:
len(pairs)

66657

In [19]:
list(pairs)[0:20]

[('UniProtKB:P40763', 'UniProtKB:P25791'),
 ('UniProtKB:P24863', 'UniProtKB:O75528'),
 ('UniProtKB:P0CG47', 'UniProtKB:P33993'),
 ('UniProtKB:Q86VI1', 'PANTHER:PTN000480155'),
 ('UniProtKB:P31785', 'UniProtKB:Q7Z3S9'),
 ('UniProtKB:Q7Z3S9', 'UniProtKB:Q9NQS3'),
 ('UniProtKB:Q8N9N5', 'UniProtKB:Q8IYR0'),
 ('UniProtKB:P48552', 'UniProtKB:P49116'),
 ('UniProtKB:P35625', 'UniProtKB:P50052'),
 ('UniProtKB:O15379', 'UniProtKB:P54253'),
 ('UniProtKB:P18206', 'UniProtKB:P12003'),
 ('UniProtKB:Q8TBB1', 'UniProtKB:Q719H9'),
 ('UniProtKB:Q9Y2W2', 'UniProtKB:Q99IB8:PRO_0000045603'),
 ('UniProtKB:Q96BZ8', 'UniProtKB:Q15323'),
 ('UniProtKB:P04155', 'UniProtKB:Q9UMX0'),
 ('UniProtKB:P01106', 'UniProtKB:Q969H0'),
 ('UniProtKB:Q8IXJ6', 'UniProtKB:O60729'),
 ('UniProtKB:Q9NX70', 'UniProtKB:Q9H944'),
 ('UniProtKB:Q9UQL6', 'UniProtKB:Q8TBE0'),
 ('UniProtKB:Q09161', 'UniProtKB:P52298,UniProtKB:Q6P2Q9')]

In [20]:
[(g1,aset.label(g1),g2,aset.label(g2)) for (g1,g2) in list(pairs)[0:20]]

[('UniProtKB:P40763', 'STAT3', 'UniProtKB:P25791', 'LMO2'),
 ('UniProtKB:P24863', 'CCNC', 'UniProtKB:O75528', 'TADA3'),
 ('UniProtKB:P0CG47', 'UBB', 'UniProtKB:P33993', 'MCM7'),
 ('UniProtKB:Q86VI1', 'EXOC3L1', 'PANTHER:PTN000480155', None),
 ('UniProtKB:P31785', 'IL2RG', 'UniProtKB:Q7Z3S9', 'NOTCH2NL'),
 ('UniProtKB:Q7Z3S9', 'NOTCH2NL', 'UniProtKB:Q9NQS3', 'NECTIN3'),
 ('UniProtKB:Q8N9N5', 'BANP', 'UniProtKB:Q8IYR0', 'CFAP206'),
 ('UniProtKB:P48552', 'NRIP1', 'UniProtKB:P49116', 'NR2C2'),
 ('UniProtKB:P35625', 'TIMP3', 'UniProtKB:P50052', 'AGTR2'),
 ('UniProtKB:O15379', 'HDAC3', 'UniProtKB:P54253', 'ATXN1'),
 ('UniProtKB:P18206', 'VCL', 'UniProtKB:P12003', None),
 ('UniProtKB:Q8TBB1', 'LNX1', 'UniProtKB:Q719H9', 'KCTD1'),
 ('UniProtKB:Q9Y2W2', 'WBP11', 'UniProtKB:Q99IB8:PRO_0000045603', None),
 ('UniProtKB:Q96BZ8', 'LENG1', 'UniProtKB:Q15323', 'KRT31'),
 ('UniProtKB:P04155', 'TFF1', 'UniProtKB:Q9UMX0', 'UBQLN1'),
 ('UniProtKB:P01106', 'MYC', 'UniProtKB:Q969H0', 'FBXW7'),
 ('UniProtKB: