In [None]:
#!/usr/bin/python
import requests, sys


def convert_and_map_human_gene_symbol(gene, target_taxon):
    server = "https://rest.ensembl.org"
    ext = "/homology/symbol/human/"+gene+"?target_taxon="+str(target_taxon)+";format=condensed;type=orthologues"
    r = requests.get(server+ext, headers={"Content-Type" : "application/json"})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    decoded = r.json()['data'][0]
    return(decoded)

def get_goterms(gene):  
    server = "https://rest.ensembl.org"
    ext = "/xrefs/id/"+gene+"?external_db=GO;all_levels=1"
    r = requests.get(server+ext, headers={"Content-Type" : "application/json"})
    
    if not r.ok:
        r.raise_for_status()
        sys.exit()

    decoded = r.json()
    return(decoded)

def parse_goterms(gotermjson):
    parsed=[]
    for term in gotermjson:
        if not term['description'] in parsed:
            parsed.append(term['description'])

    return(parsed)
        
def convert_symbol_file(fname, target_taxon):
    mappings = {}
    with open(fname) as f:
        header = f.readline()
        header = header + '\n'+ f.readline()
        for line in f:
            gene=line.strip()
            mappings[gene] = convert_and_map_human_gene_symbol(gene, target_taxon)

    return(mappings)


def output_terms(fname, mappings, origin=True):
    outf = open(fname, 'w')
    outf.write('\t'.join(["SYMBOL","ID","GOterms"])+'\n')
    
    for gid in mappings:
        if len(mappings[gid]['homologies']) <= 0 and not origin:
            outf.write('\t'.join([gid, "", ""])+'\n')
        elif origin:
            outf.write('\t'.join([gid, mappings[gid]['id'], ", ".join(parse_goterms(get_goterms(mappings[gid]['id'])))])+'\n')
        else:
            outf.write('\t'.join([gid, mappings[gid]['homologies'][0]['id'], ", ".join(parse_goterms(get_goterms(mappings[gid]['homologies'][0]['id'])))])+'\n')
    outf.close()

def output_converted_ids(fname, mappings):
    outf = open(fname, 'w')
    outf.write('\t'.join(["SYMBOL","ID","MOUSE"])+'\n')
    for gid in mappings:
        if len(mappings[gid]['homologies']) <= 0:
            outf.write('\t'.join([gid, mappings[gid]['id'], ""])+'\n')
        else:
            outf.write('\t'.join([gid, mappings[gid]['id'], mappings[gid]['homologies'][0]['id']])+'\n')
    outf.close()
    
    


In [None]:
# Homo Sapiens - Human 
origin_taxon=9606
# Mus Musculus - Mouse 
target_taxon=10090

In [None]:
mappings = convert_symbol_file("../shared/"+"List1.txt", target_taxon)
output_converted_ids("../"+"List1.converted.txt", mappings)

In [None]:
# RUN THE CODE ON AN ID SET
output_terms("../"+"List1_ID.GO.txt", mappings, origin=True)

In [57]:
! echo "Official ID"
! grep CRISP ../List1_ID.GO.txt

Official ID
grep: ../List1_ID.GO.txt: No such file or directory


In [59]:
#! git code here
! git commit -m "ID version"
! git tag -a v1.1 -m "ID version"
! git push


On branch master
Your branch is up-to-date with 'origin/master'.
Changes not staged for commit:
	[31mmodified:   Prepping_RS_Workshop.ipynb[m
	[31mmodified:   VersionedNotebook.ipynb[m

no changes added to commit
Everything up-to-date
