In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

In [2]:
# http://pantherdb.org/services/tryItOut.jsp?url=%2Fservices%2Fapi%2Fpanther

import requests
import json
import pandas as pd

def panther(input_genes, reference_genes=None, organism='9606', annotation_dataset='GO:0008150', test_type='FISHER', correction_type='FDR'):
    '''
        Function to query Panther API. More info at Panther:
        http://pantherdb.org/services/tryItOut.jsp?url=%2Fservices%2Fapi%2Fpanther
        
        input_genes:
            list of genes to query
        
        reference_genes:
            (optional) list of reference genes
        
        organism: 
            all supported orgaminisms listed at http://pantherdb.org/services/oai/pantherdb/supportedgenomes
        
        annotation_dataset: 
            all supported annotations listed at http://pantherdb.org/services/oai/pantherdb/supportedannotdatasets
            GO:0008150 (Biological Process)
            ANNOT_TYPE_ID_PANTHER_GO_SLIM_BP (Biological Process Slim)
            ANNOT_TYPE_ID_PANTHER_PATHWAY (Panther Pathway)
            ANNOT_TYPE_ID_REACTOME_PATHWAY (Reactome Pathway)
            
        test_type: FISHER, BINOMIAL
        
        correction_type: FDR, BONFERRONI, NONE
    
    '''
    
    # stringify input genes
    input_genes     = ','.join(input_genes)
    
    # stringify reference genes if provided
    if reference_genes != None:
        reference_genes = ','.join(reference_genes)
    
    #API details
    url = 'http://pantherdb.org/services/oai/pantherdb/enrich/overrep'

    query = {'geneInputList':input_genes,
                       'organism': organism,
                       'refInputList': reference_genes,
                       'refOrganism': organism,
                       'annotDataSet':annotation_dataset,
                       'enrichmentTestType': test_type,
                       'correction': correction_type
                      }
    
    # if no reference genes provided remove from query
    if reference_genes == None:
        del query['refInputList']
        del query['refOrganism']

    #Making http post request
    response = requests.post(url, data=query)

    res = response.json()
    res = res['results']
    res = pd.DataFrame(res['result'])
    res = res[res.fdr < 0.05].copy()
    return res


In [5]:
connectors = pd.read_csv('de_connectors.txt', header=None)
connectors = connectors[0].to_list()

In [6]:
len(connectors)

5083

In [7]:
net = pd.read_csv('../network_analysis/output/{}_nearest_neighbor_weighted.txt'.format('UQCRC2'), sep='\t')
net = net.set_index('End gene', drop=False)
ref = net['End gene'].to_list()
refGenes = ref

In [8]:
res = panther(connectors, refGenes, annotation_dataset = 'GO:0008150')


In [9]:
res.to_csv('de.go_bp.tsv', sep='\t', index=False)

In [10]:
import ast
def get_term(d, term):
    if term in d:
        return d[term]
    return 'None'

res['label'] = res.term.apply(lambda s: get_term(s, 'label'))
res['go'] = res.term.apply(lambda s: get_term(s, 'id'))

In [11]:
res_clean = res[res.go != 'None']

In [12]:
res_clean.to_csv('de.go_bp.clean.tsv', sep='\t', index=False)