<h1 style="color:orange"  align="center"><b><u>OrthoDB Tutorial</u></b></h1>

## Imports

In [1]:
import json
import requests

### Send request and convert the response to dictionary

In [2]:
def api_send(request_url):
    response = requests.get(request_url)
    
    try:
        return json.loads(response.text)
    except ValueError:
        return response.text
    

### Handy function to construct requests to the API

In [3]:
def create_request(cmd, arguments):
    known_commands = ["search", "blast","tree","group","orthologs","ogdetails","siblings","fasta","tab"] 
    
    if cmd not in known_commands:
        print("passed unknown command")
        return None
    
    if type(arguments) is not dict:
        print("passed arguments is of type {}, must be a dict".format(type(arguments)))
        return None
    
    request_url = 'http://www.orthodb.org/{}?'.format(cmd)
    
    args = ""

    for arg, values in arguments.items():
        values = map(str, values) # Double checking that all variables are strings
        args += arg + "=" + ",".join(values) + "&"
        request_url += args
        args = ""
    
    if request_url[-1] == "&":
        request_url = request_url[:-1]
    
    
    return(request_url)

### Example 1 | Search

#### dictionary of params

In [4]:
example1 = {"query" : ["p450"], "limit" : [2], "level": [33208], "singlecopy":[0.8]}

#### construct request url

In [5]:
url = create_request("search",example1)
print(url)

http://www.orthodb.org/search?query=p450&limit=2&level=33208&singlecopy=0.8


#### make an API request and print the response

In [6]:
response = api_send(url)
print(response)

{'status': 'ok', 'message': None, 'data': ['101276at33208', '104147at33208'], 'count': 345, 'skip': 0, 'limit': 2, 'query': 'p450', 'level': 33208, 'url': 'https://www.orthodb.org//search?query=p450&limit=2&level=33208&singlecopy=0.8', 'universal': None, 'singlecopy': 80, 'inclusive': 1}


---

## Example 7 | tab

In [7]:
example7 = {"id":["716at7742"], "species": ["9606_0","9544_0"], "long":[0]}

In [8]:
url = create_request("tab", example7)
response = api_send(url)
print(response)

pub_og_id	og_name	level_taxid	organism_taxid	organism_name	int_prot_id	pub_gene_id	description
716at7742	dynein heavy chain 8, axonemal	7742	9544_0	Macaca mulatta	9544_0:00146d	DNAH8	LOW QUALITY PROTEIN: dynein heavy chain 8, axonemal
716at7742	dynein heavy chain 8, axonemal	7742	9544_0	Macaca mulatta	9544_0:001b78	DNAH5	dynein heavy chain 5, axonemal
716at7742	dynein heavy chain 8, axonemal	7742	9606_0	Homo sapiens	9606_0:0017fc	DNAH5	Dynein heavy chain 5, axonemal
716at7742	dynein heavy chain 8, axonemal	7742	9606_0	Homo sapiens	9606_0:0019b4	DNAH8	Dynein heavy chain 8, axonemal



---

## Example "Extract hemoglobin ortholog in mouse to human"

#### https://www.ncbi.nlm.nih.gov/gene/3043
#### Hemoglobin NCBI ID : 3043
#### Human Tax ID: 9606
#### Monkey Tax ID: 9544
#### Primates Tax Level ID: 9443 

### Perform an API request

In [9]:
params = {"query":[3043], "species":["9606_0","9544_0"], "level":[9443]}
url = create_request("search", params)
response = api_send(url)
print(response)

{'status': 'ok', 'message': None, 'data': ['36039at9443', '46208at9443'], 'count': 2, 'skip': 0, 'limit': 1000, 'query': '3043', 'level': 9443, 'url': 'https://www.orthodb.org//search?query=3043&species=9606_0,9544_0&level=9443', 'universal': None, 'singlecopy': None, 'inclusive': 1}


### Extract the orthologous groups IDs

In [10]:
ogs = response["data"]
print(ogs)

['36039at9443', '46208at9443']


In [11]:
complete_response = ""
for OG in ogs:
    params = {"id":[OG], "level":[9443], "species":["9606_0","9544_0"]}
    url = create_request("tab", params)
    response = api_send(url)
    complete_response += response

In [12]:
print(complete_response)

pub_og_id	og_name	level_taxid	organism_taxid	organism_name	int_prot_id	pub_gene_id	description
36039at9443	Proteasome assembly chaperone 1	9443	9544_0	Macaca mulatta	9544_0:000ccd	PSMG1	Proteasome assembly chaperone 1
36039at9443	Proteasome assembly chaperone 1	9443	9606_0	Homo sapiens	9606_0:004d6e	PSMG1	Proteasome assembly chaperone 1
pub_og_id	og_name	level_taxid	organism_taxid	organism_name	int_prot_id	pub_gene_id	description
46208at9443	hemoglobin subunit beta	9443	9544_0	Macaca mulatta	9544_0:0037dd	C6KGT1_MACMU	Beta hemoglobin
46208at9443	hemoglobin subunit beta	9443	9544_0	Macaca mulatta	9544_0:0038e5	LOC715607	LOW QUALITY PROTEIN: hemoglobin subunit delta-like
46208at9443	hemoglobin subunit beta	9443	9606_0	Homo sapiens	9606_0:002b68	HBB	Beta-globin
46208at9443	hemoglobin subunit beta	9443	9606_0	Homo sapiens	9606_0:002ffa	HBD	Delta globin



## OR: Get all the ortologs in the given groups

In [13]:
complete_response = []
for OG in ogs:
    url = create_request("orthologs", {"id":[OG], "species":["9606_0","9544_0"]})
    response = api_send(url)
    complete_response.append(response)

print(complete_response)

[{'status': 'ok', 'message': None, 'data': [{'organism': {'description': 'rhesus macaque;rhesus macaques;rhesus monkeys', 'param': 'species in Primates', 'id': '9544_0', 'name': 'Macaca mulatta', 'type': 'taxonomy'}, 'species_statistics': {'in_clusters_count': 142105, 'genes_count': 20999, 'mapping_type': 'C'}, 'genes': [{'gene_id': {'id': 'PSMG1', 'param': '9544_0:000ccd'}, 'exons': '8', 'interpro': [{'description': 'Proteasome assembly chaperone 1', 'param': '1..287', 'id': 'IPR016565', 'type': 'interpro'}], 'aas': '288', 'how_much_more_info': 2, 'description': 'Proteasome assembly chaperone 1', 'uniprot': {'id': 'G7MMS2', 'name': 'Proteasome assembly chaperone 1', 'type': 'uniprot'}, 'more_info': True}]}, {'organism': {'description': 'man', 'param': 'species in Primates', 'id': '9606_0', 'name': 'Homo sapiens', 'type': 'taxonomy'}, 'species_statistics': {'in_clusters_count': 156629, 'genes_count': 21416, 'mapping_type': 'C'}, 'genes': [{'gene_id': {'id': 'PSMG1', 'param': '9606_0:00

### Get all the orthologous gene IDs

In [14]:
for response in complete_response:
    for organism in response["data"]:
        for gene in organism["genes"]:
            print(gene["gene_id"]["param"])

9544_0:000ccd
9606_0:004d6e
9544_0:0037dd
9544_0:0038e5
9606_0:002b68
9606_0:002ffa


## Practice
### Extract EMBL gene ID from the previous gene IDs