In [12]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import altair as alt
import numpy as np

In [3]:
%load_ext autoreload

In [4]:
%autoreload 2

In [5]:
from enrichrpy.enrichr import get_pathway_enrichment

In [2]:
genes = [
    'TYROBP',
    'HLA-DRA',
    'SPP1',
    'LAPTM5',
    'C1QB',
    'FCER1G',
    'GPNMB',
    'FCGR3A',
    'RGS1',
    'HLA-DPA1',
    'ITGB2',
    'C1QC',
    'HLA-DPB1',
    'IFI30',
    'SRGN',
    'APOC1',
    'CD68',
    'HLA-DRB1',
    'C1QA',
    'LYZ',
    'APOE',
    'HLA-DQB1',
    'CTSB',
    'HLA-DQA1',
    'CD74',
    'AIF1',
    'FCGR2A',
    'CD14',
    'S100A9',
    'CTSS'
]

In [29]:
df = get_pathway_enrichment(genes)
df

Unnamed: 0,Rank,Term name,P-value,Z-score,Combined score,Overlapping genes,Adjusted p-value,Old p-value,Old adjusted p-value
0,1,antigen processing and presentation of exogeno...,1.378489e-16,112.965909,4125.557199,"[CD74, FCER1G, HLA-DPB1, HLA-DRA, IFI30, HLA-D...",5.123632e-14,0,0
1,2,antigen processing and presentation of peptide...,1.700654e-16,110.444444,4010.276254,"[CD74, FCER1G, HLA-DPB1, HLA-DRA, IFI30, HLA-D...",5.123632e-14,0,0
2,3,antigen processing and presentation of exogeno...,2.311413e-16,106.865591,3847.535587,"[CD74, FCER1G, HLA-DPB1, HLA-DRA, IFI30, HLA-D...",5.123632e-14,0,0
3,4,interferon-gamma-mediated signaling pathway (G...,7.312984e-12,99.332145,2547.012264,"[HLA-DPB1, HLA-DRA, IFI30, HLA-DRB1, HLA-DQA1,...",9.885133e-10,0,0
4,5,cellular response to interferon-gamma (GO:0071...,7.432431e-12,63.900241,1637.454431,"[HLA-DPB1, HLA-DRA, IFI30, AIF1, HLA-DRB1, HLA...",9.885133e-10,0,0
...,...,...,...,...,...,...,...,...,...
660,661,supramolecular fiber organization (GO:0097435),4.123141e-01,1.933005,1.712584,[LYZ],4.148092e-01,0,0
661,662,"regulation of transcription, DNA-templated (GO...",4.389228e-01,1.217720,1.002709,"[CD74, SPP1, APOE, HLA-DRB1]",4.409119e-01,0,0
662,663,organelle organization (GO:0006996),4.712158e-01,1.609003,1.210677,[APOE],4.726373e-01,0,0
663,664,negative regulation of cellular process (GO:00...,5.776315e-01,1.184315,0.649975,[GPNMB],5.785015e-01,0,0


In [78]:
def enrichment_barplot(enrichr_results, n=10, scheme='blues'):
    """
    Plots enrichment results in barplot form
    
    Parameters
    ----------
    enrichr_results: pd.DataFrame
      - result dataframe from enrichrpy.enrichr.get_pathway_enrichment
    n: int
      - plot top N pathways, default=10
    scheme: str
      - altair color scheme to use. schemes listed here https://vega.github.io/vega/docs/schemes/
    """
    source = enrichr_results.copy()
    source['Num hits'] = [len(ls) for ls in source['Overlapping genes']]
    source['-log10(FDR)'] = [-np.log10(x) for x in source['Adjusted p-value']]
    source['Pathway'] = source['Term name'].to_list()
    
    if n is not None:
        source = source.sort_values('Adjusted p-value').iloc[:n]
    
    c = alt.Chart(source).mark_bar().encode(
        x=alt.X('-log10(FDR)'),
        y=alt.Y('Pathway', sort={"encoding": "x", "order": "descending"}),
        color=alt.Color('Num hits', scale=alt.Scale(scheme=scheme, domainMin=0))
    )
    xrule = (
        alt.Chart()
            .mark_rule(strokeDash=[8, 6], color="red", strokeWidth=2)
            .encode(x=alt.datum(-np.log10(.05)))
    )
    
    return c + xrule

In [79]:
enrichment_barplot(enrichr_results=df)

In [98]:
def enrichment_dotplot(enrichr_results, n=10, hue='Z-score', scheme='viridis', log=True):
    """
    Plots enrichment results in dotplot form
    
    Parameters
    ----------
    enrichr_results: pd.DataFrame
      - result dataframe from enrichrpy.enrichr.get_pathway_enrichment
    n: int
      - plot top N pathways, default=10
    hue: str
      - variable to color the dotplot by, default='Combined score'
    scheme: str
      - altair color scheme to use. schemes listed here https://vega.github.io/vega/docs/schemes/
    """
    source = enrichr_results.copy()
    source['Num hits'] = [len(ls) for ls in source['Overlapping genes']]
    source['-log10(FDR)'] = -np.log10(source['Adjusted p-value'])
    source['Pathway'] = source['Term name'].to_list()
    source[f'log({hue})'] = np.log(source[hue])
    
    if n is not None:
        source = source.sort_values('Adjusted p-value').iloc[:n]
        
    
    
    c = alt.Chart(source).mark_circle().encode(
        x=alt.X('-log10(FDR):Q'),
        y=alt.Y('Pathway', sort={"encoding": "x", "order": "descending"}),
        size=alt.Size('Num hits'),
        color=alt.Color(hue if not log else f'log({hue})', scale=alt.Scale(scheme=scheme, domainMin=0))
    )
    xrule = (
        alt.Chart()
            .mark_rule(strokeDash=[8, 6], color="red", strokeWidth=2)
            .encode(x=alt.datum(-np.log10(.05)))
    )
    
    return (c + xrule).configure_axis(grid=True)

In [99]:
enrichment_dotplot(enrichr_results=df, n=20)