In [708]:
import urllib.request
import json
import itertools
from ast import literal_eval
import csv
import io
#import time

In [709]:
#the stringdb API returns results as byte strings. This means it needs to be transformed into a dictionary
#and then to a JSON object. 
def get_json(results):
    my_string = results.decode('utf8').replace("]", '')
    my_string_1 = my_string.replace("[", '')
    data = literal_eval(my_string_1)
    t = json.dumps(data)
    s = json.loads(t)
    return(s)

#This function takes the dictionary with the gene ID as the key and the list of interacting genes as values
#and creates a list of all pairwise comparisons.
def get_combinations(interactions):
    result_list = list(map(dict, itertools.combinations(interactions.items(), 2)))
    return(result_list)

def get_overlapping_assoc(a,b):
    funcs = []
    url = 'https://string-db.org/api/json/functional_annotation?identifiers=' + a + '%0d' + b
    results = urllib.request.urlopen(url).read()    #get results in bytes
    my_string = results.decode('utf8')
    s = json.loads(my_string)
    for func in s:
        if func["ratio_in_set"] == 1:
            desc = func["description"]
            go = func["term"]
            cat = func["category"]
            tup = (desc,go,cat)
            funcs.append(tup)
    return(funcs)

In [710]:
#This code looks up the stringdb identifier for the gene of interest
genes = ['vdrb','tbx2a','cryabb']
stringids = []
species = '7955'
string2gene = {}
for gene in genes:
    url = 'https://string-db.org/api/json/get_string_ids?identifiers=' + gene + '&species=' + species
    results = urllib.request.urlopen(url).read()    #get results in bytes
    n = get_json(results)
    stringids.append(n['stringId'])
    string2gene[gene] = n['stringId']
    string2gene[n['stringId']] = gene
    #time.sleep(1)
print(stringids)

{'vdrb': '7955.ENSDARP00000125655', '7955.ENSDARP00000125655': 'vdrb', 'tbx2a': '7955.ENSDARP00000027905', '7955.ENSDARP00000027905': 'tbx2a', 'cryabb': '7955.ENSDARP00000124531', '7955.ENSDARP00000124531': 'cryabb'}


In [711]:
#look up the 10 strongest interactions for each gene from stringdb
limit = '10'
interactions = {}
evidence = {}
for stringid in stringids:
    web = []
    url = 'https://string-db.org/api/json/interaction_partners?identifiers=' + stringid + '&limit=' + limit
    results = urllib.request.urlopen(url).read()    #get results in bytes
    p = get_json(results)
    for g in p:
        web.append(g.get("preferredName_B"))
    interactions[stringid] = web
    #time.sleep(1)
print(interactions)

7955.ENSDARP00000125655
7955.ENSDARP00000027905
7955.ENSDARP00000124531
{'7955.ENSDARP00000125655': ['ube2i', 'ube2i2', 'vdr', 'CYP27B1', 'sumo2b', 'cyp27a1.4', 'pias4a', 'cyp27a1.2', 'med1', 'pias4b'], '7955.ENSDARP00000027905': ['ppm1db', 'dlx1a', 'nkx2.5', 'ube2i2', 'nkx2.3', 'bmp4', 'tfap2a', 'bmp7b', 'bmp2b', 'rasgef1ba'], '7955.ENSDARP00000124531': ['hsf1', 'btr25', 'tp53', 'LOC799840', 'LOC570613', 'cryaa', 'gba', 'mgea5', 'cdc5l', 'rasgef1ba']}


In [712]:
#find the overlapping genes
overlaps = []
r = get_combinations(interactions)
for q in r:
    k = list(q.keys())
    v = list(q.values())
    m = list(set(v[0]).intersection(v[1]))
    if bool(m) == True:
        thistuple = (k[0],k[1],m)
        overlaps.append(thistuple)
print(overlaps)

[('7955.ENSDARP00000125655', '7955.ENSDARP00000027905', ['ube2i2']), ('7955.ENSDARP00000027905', '7955.ENSDARP00000124531', ['rasgef1ba'])]


In [713]:
#Find evidence supporting the overlaps
evidence = []
for a,b,c in overlaps:
    for z in c:
        url = 'https://string-db.org/api/json/network?identifiers=' + a + '%0d' + z
        results = urllib.request.urlopen(url).read()    #get results in bytes
        p = get_json(results)
        evidence.append(p)
print(evidence)

[{'preferredName_A': 'vdrb', 'fscore': 0, 'tscore': 0, 'score': 0.921, 'ascore': 0, 'ncbiTaxonId': '7955', 'pscore': 0, 'nscore': 0, 'dscore': 0.9, 'stringId_A': 'ENSDARP00000125655', 'escore': 0.249, 'preferredName_B': 'ube2i2', 'stringId_B': 'ENSDARP00000052745'}, {'preferredName_A': 'rasgef1ba', 'fscore': 0, 'tscore': 0.508, 'score': 0.519, 'ascore': 0.054, 'ncbiTaxonId': '7955', 'pscore': 0, 'nscore': 0, 'dscore': 0, 'stringId_A': 'ENSDARP00000116417', 'escore': 0.051, 'preferredName_B': 'tbx2a', 'stringId_B': 'ENSDARP00000027905'}]


In [716]:
#Find shared annotations in GO and UniProt
annotations = []
for a,b,c in overlaps:
    h = get_overlapping_assoc(a,b)
    t = (string2gene[a],string2gene[b],h)
    annotations.append(t)
    for z in c:
        f = get_overlapping_assoc(a,z)
        t = (string2gene[a],z,f)
        annotations.append(t)
        g = get_overlapping_assoc(b,z)
        t = (string2gene[b],z,g)
        annotations.append(t)
print(annotations)

[('vdrb', 'tbx2a', [('DNA-binding', 'KW-0238', 'Keyword'), ('Nucleus', 'KW-0539', 'Keyword'), ('Transcription', 'KW-0804', 'Keyword'), ('Transcription regulation', 'KW-0805', 'Keyword')]), ('vdrb', 'ube2i2', [('intracellular', 'GO:0005622', 'Component'), ('cell', 'GO:0005623', 'Component'), ('nucleus', 'GO:0005634', 'Component'), ('cytoplasm', 'GO:0005737', 'Component'), ('organelle', 'GO:0043226', 'Component'), ('membrane-bounded organelle', 'GO:0043227', 'Component'), ('intracellular organelle', 'GO:0043229', 'Component'), ('intracellular membrane-bounded organelle', 'GO:0043231', 'Component'), ('intracellular part', 'GO:0044424', 'Component'), ('cell part', 'GO:0044464', 'Component'), ('binding', 'GO:0005488', 'Function'), ('ion binding', 'GO:0043167', 'Function'), ('organic cyclic compound binding', 'GO:0097159', 'Function'), ('heterocyclic compound binding', 'GO:1901363', 'Function'), ('Nucleus', 'KW-0539', 'Keyword'), ('heart looping', 'GO:0001947', 'Process'), ('morphogenesis of