In [1]:
import os

# Scatter Plot Imports
from maayanlab_bioinformatics.enrichment import enrich_crisp
import matplotlib as mpl

# Bar Chart Imports
import pandas as pd 
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import time
from matplotlib.ticker import MaxNLocator
from IPython.display import display, FileLink, HTML, Markdown

# Hexagonal Canvas Imports
import math
import uuid
import urllib
from textwrap import dedent
from string import Template
from operator import itemgetter

# Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Category20
output_notebook()

In [7]:
!pip install maayanlab_bioinformatics



In [4]:
import maayanlab_bioinformatics
maayanlab_bioinformatics.__version__

'1.5.1'

In [2]:
# Enrichr API Function for Manhattan Plot and Bar Chart
# Takes a gene list and Enrichr libraries as input
def Enrichr_API(enrichr_gene_list, all_libraries):

    all_terms = []
    all_pvalues =[] 
    all_adjusted_pvalues = []

    for library_name in all_libraries : 
        ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/addList'
        genes_str = '\n'.join(enrichr_gene_list)
        description = ''
        payload = {
            'list': (None, genes_str),
            'description': (None, description)
        }

        response = requests.post(ENRICHR_URL, files=payload)
        if not response.ok:
            raise APIFailure

        data = json.loads(response.text)
        time.sleep(0.5)
        ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/enrich'
        query_string = '?userListId=%s&backgroundType=%s'
        user_list_id = data['userListId']
        short_id = data["shortId"]
        gene_set_library = library_name
        response = requests.get(
            ENRICHR_URL + query_string % (user_list_id, gene_set_library)
         )
        if not response.ok:
            raise APIFailure

        data = json.loads(response.text)

        if len(data[library_name]) == 0:
            raise NoResults

        short_results_df  = pd.DataFrame(data[library_name][0:10])
        all_terms.append(list(short_results_df[1]))
        all_pvalues.append(list(short_results_df[2]))
        all_adjusted_pvalues.append(list(short_results_df[6]))
        
        results_df  = pd.DataFrame(data[library_name])
        # adds library name to the data frame so the libraries can be distinguished
        results_df['library'] = library_name.replace('_', '')

    return [results_df, short_results_df, all_terms, all_pvalues, all_adjusted_pvalues, str(short_id)]

In [3]:
# Output a table of significant p-values and q-values

def create_download_link(df, title = "Download CSV file of the full table of significant results", filename = "Enrichment_Analysis_Visualizer_data.csv"):  
    csv = df.to_csv(filename, index = False)
    html = f'<a href="{filename}" target=_blank>{title}</a>'
    return HTML(html)




In [4]:
# Scatter Plot Parameters
significance_value = 0.05

# Bar Chart Parameters
figure_file_format = ['png', 'svg']
output_file_name = 'Enrichr_results_bar'
color = 'lightskyblue'
final_output_file_names = ['{0}.{1}'.format(output_file_name, file_type) for file_type in figure_file_format]

# Hexagonal Canvas Parameters
canvas_color = 'Blue'
num_hex_colored = 10

# Manhattan Plot Parameters
manhattan_colors = ['#003f5c', '#7a5195', '#ef5675', '#ffa600']

In [5]:
enrichr_library = 'PanglaoDB_Augmented_2021'

genesets = [file for file in os.listdir("../genesets") if "geneset" in file]
modules = [geneset.split("_")[1].split(".csv")[0] for geneset in genesets]

for module in modules:
    df= pd.read_csv("../genesets/geneset_{}.csv".format(module), index_col=0)
    gene_list_input = df.index.tolist()

    genes = [str(x).strip() for x in gene_list_input]
    results = Enrichr_API(genes, [enrichr_library])
    if len(results) > 0:
        res_df = pd.DataFrame(results[0]).rename(columns={
            0: 'rank',
            1: 'term',
            2: 'p-value',
            3: 'zscore',
            4: 'combined_score',
            5: 'overlap_genes',
            6: 'q-value'
        })
        
    res_df.to_csv("../celltype_enr/panglaoDB_{}.txt".format(module))

In [6]:
enrichr_library = 'Allen_Brain_Atlas_10x_scRNA_2021'

genesets = [file for file in os.listdir("../genesets") if "geneset" in file]
modules = [geneset.split("_")[1].split(".csv")[0] for geneset in genesets]

for module in modules:
    df= pd.read_csv("../genesets/geneset_{}.csv".format(module), index_col=0)
    gene_list_input = df.index.tolist()

    genes = [str(x).strip() for x in gene_list_input]
    results = Enrichr_API(genes, [enrichr_library])
    if len(results) > 0:
        res_df = pd.DataFrame(results[0]).rename(columns={
            0: 'rank',
            1: 'term',
            2: 'p-value',
            3: 'zscore',
            4: 'combined_score',
            5: 'overlap_genes',
            6: 'q-value'
        })
        
    res_df.to_csv("../celltype_enr/allenBrain10x_{}.txt".format(module))

In [7]:
enrichr_library = 'Allen_Brain_Atlas_10x_scRNA_2021'

genesets = [file for file in os.listdir("../genesets") if "geneset" in file]
modules = [geneset.split("_")[1].split(".csv")[0] for geneset in genesets]

for module in modules:
    df= pd.read_csv("../genesets/geneset_{}.csv".format(module), index_col=0)
    gene_list_input = df.index.tolist()

    genes = [str(x).strip() for x in gene_list_input]
    results = Enrichr_API(genes, [enrichr_library])
    if len(results) > 0:
        res_df = pd.DataFrame(results[0]).rename(columns={
            0: 'rank',
            1: 'term',
            2: 'p-value',
            3: 'zscore',
            4: 'combined_score',
            5: 'overlap_genes',
            6: 'q-value'
        })
        
    res_df.to_csv("../celltype_enr/allenBrain10x_{}.txt".format(module))