<a href="https://colab.research.google.com/github/chakitarora/compbio_utils/blob/master/GO_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:

##############################################################
## The following script retrieves and prints out
## significantly enriched (FDR < 1%) GO Processes
## for the given set of proteins. 
##
## Requires requests module:
## type "python -m pip install requests" in command line (win)
## or terminal (mac/linux) to install the module
##############################################################

import requests ## python -m pip install requests 
import json

my_genes = ['BAK1', 'BCL2', 'BAX']

string_api_url = "https://string-db.org/api"
output_format = "json"
method = "enrichment"


##
## Construct the request
##

request_url = "/".join([string_api_url, output_format, method])

##
## Set parameters
##



params = {

    "identifiers" : "%0d".join(my_genes), # your protein
    "species" : 9606, # species NCBI identifier 
    "caller_identity" : "www.awesome_app.org" # your app name

}

##
## Call STRING
##

response = requests.post(request_url, data=params)

##
## Read and parse the results
##

data = json.loads(response.text)

for row in data:

    term = row["term"]
    preferred_names = ",".join(row["preferredNames"])
    fdr = float(row["fdr"])
    description = row["description"]
    category = row["category"]

    #if category == "Process" and fdr < 0.01: #THIS is !! change category to 'PMID','Component','Process','Function','KEGG'

        ## print significant GO Process annotations
        print("\t".join([category,term, preferred_names, str(fdr), description]))

    #print("\t".join([category,term, preferred_names, str(fdr), description])) #for printing everything

Component	GO.0005635	BAX,BCL2	0.0057	nuclear envelope
Component	GO.0005741	BAX,BAK1,BCL2	1.66e-05	mitochondrial outer membrane
Component	GO.0005783	BAX,BAK1,BCL2	0.0033	endoplasmic reticulum
Component	GO.0005789	BAX,BCL2	0.026	endoplasmic reticulum membrane
Component	GO.0005829	BAX,BAK1,BCL2	0.04	cytosol
Component	GO.0046930	BAX,BAK1,BCL2	6.28e-08	pore complex
Component	GO.0097136	BAX,BAK1	1.66e-05	Bcl-2 family protein complex
Component	GO.0098827	BAX,BCL2	0.026	endoplasmic reticulum subcompartment
Function	GO.0015267	BAX,BCL2	0.0057	channel activity
Function	GO.0031072	BAX,BAK1	0.00049	heat shock protein binding
Function	GO.0042803	BAX,BAK1,BCL2	0.00049	protein homodimerization activity
Function	GO.0046982	BAX,BAK1,BCL2	0.00018	protein heterodimerization activity
Function	GO.0051087	BAX,BAK1	0.00049	chaperone binding
Function	GO.0051400	BAX,BAK1,BCL2	8.7e-09	BH domain binding
Function	GO.0051434	BAX,BCL2	6.25e-06	BH3 domain binding
InterPro	IPR002475	BAX,BAK1,BCL2	1.19e-09	Bcl2-like
I

In [7]:
print(data)

[{'p_value': 0.0015, 'number_of_genes': 2, 'description': 'nuclear envelope', 'ncbiTaxonId': 9606, 'term': 'GO.0005635', 'number_of_genes_in_background': 446, 'inputGenes': ['BAX', 'BCL2'], 'fdr': 0.0057, 'category': 'Component', 'preferredNames': ['BAX', 'BCL2']}, {'p_value': 8.18e-07, 'number_of_genes': 3, 'description': 'mitochondrial outer membrane', 'ncbiTaxonId': 9606, 'term': 'GO.0005741', 'number_of_genes_in_background': 181, 'inputGenes': ['BAX', 'BAK1', 'BCL2'], 'fdr': 1.66e-05, 'category': 'Component', 'preferredNames': ['BAX', 'BAK1', 'BCL2']}, {'p_value': 0.00078, 'number_of_genes': 3, 'description': 'endoplasmic reticulum', 'ncbiTaxonId': 9606, 'term': 'GO.0005783', 'number_of_genes_in_background': 1796, 'inputGenes': ['BAX', 'BAK1', 'BCL2'], 'fdr': 0.0033, 'category': 'Component', 'preferredNames': ['BAX', 'BAK1', 'BCL2']}, {'p_value': 0.0079, 'number_of_genes': 2, 'description': 'endoplasmic reticulum membrane', 'ncbiTaxonId': 9606, 'term': 'GO.0005789', 'number_of_gene

In [8]:
type(data)

list

In [10]:
data

[{'category': 'Component',
  'description': 'nuclear envelope',
  'fdr': 0.0057,
  'inputGenes': ['BAX', 'BCL2'],
  'ncbiTaxonId': 9606,
  'number_of_genes': 2,
  'number_of_genes_in_background': 446,
  'p_value': 0.0015,
  'preferredNames': ['BAX', 'BCL2'],
  'term': 'GO.0005635'},
 {'category': 'Component',
  'description': 'mitochondrial outer membrane',
  'fdr': 1.66e-05,
  'inputGenes': ['BAX', 'BAK1', 'BCL2'],
  'ncbiTaxonId': 9606,
  'number_of_genes': 3,
  'number_of_genes_in_background': 181,
  'p_value': 8.18e-07,
  'preferredNames': ['BAX', 'BAK1', 'BCL2'],
  'term': 'GO.0005741'},
 {'category': 'Component',
  'description': 'endoplasmic reticulum',
  'fdr': 0.0033,
  'inputGenes': ['BAX', 'BAK1', 'BCL2'],
  'ncbiTaxonId': 9606,
  'number_of_genes': 3,
  'number_of_genes_in_background': 1796,
  'p_value': 0.00078,
  'preferredNames': ['BAX', 'BAK1', 'BCL2'],
  'term': 'GO.0005783'},
 {'category': 'Component',
  'description': 'endoplasmic reticulum membrane',
  'fdr': 0.026,