This Notebook performs Enrichment Analyses of all the Phenotypic Classes using the **WormBase Enrichment Suite**. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
import tissue_enrichment_analysis as tea

rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})

%matplotlib inline

# This enables SVG graphics inline. 
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14

In [2]:
q = 0.1
tidy  = pd.read_csv('../output/SI1_dpy_22_results.csv')

In [3]:
tissue = tea.fetch_dictionary('tissue')
phenotype = tea.fetch_dictionary('phenotype')
go = tea.fetch_dictionary('go')
dicts = {'tissue': tissue, 'phenotype': phenotype, 'go': go}

In [4]:
# filter dictionaries and keep only transcripts that were detected
# at any level
for key, d in dicts.items():
    d = d[d.wbid.isin(tidy.ens_gene.unique())]
    dicts[key] = d

In [5]:
# perform all enrichment analysis and store them in a hash
analysis = {}
for phenoclass, group in tidy.groupby('phenotypic class'):
    frames = {}
    for k, d in dicts.items():
        df = tea.enrichment_analysis(group.ens_gene.unique(), d, show=False)
        frames[k] = df
    analysis[phenoclass] = frames

In [6]:
# pretty print the results:
for phenoclass, f in analysis.items():
    for k, d in f.items():
        # print only sig results (q < 10^-3)
        d['logQ'] = -d['Q value'].apply(np.log10)
        sig = (d['Q value'] < 10**-3)
        if d[sig].shape[0] == 0:
            continue

        # trim names for easier printing
        if k.lower() == 'tissue':
            d['minTerm'] = d.Term.str[:-13]
        if k.lower() == 'phenotype':
            d['minTerm'] = d.Term.str[:-20]
        if k.lower() == 'go':
            d['minTerm'] = d.Term.str[:-10]

        # subset dataframe to sig terms and make sure
        # there's >2 observations per term
        tmp = d[sig & (d.Observed > 2)]

        if tmp.shape[0] == 0:
            continue

        print(phenoclass, k)        
        print(tmp[['minTerm', 'logQ', 'Observed']].round(0))
        print('\n\n')

bx93 associated tissue
      minTerm  logQ  Observed
77  intestine   5.0       134



bx93 associated go
                            minTerm  logQ  Observed
59           immune system process    6.0        17
58  organic acid metabolic process    4.0        15
3      response to biotic stimulus    4.0        10



sy622 associated tissue
                 minTerm  logQ  Observed
33  cephalic sheath cell   5.0        31



sy622 associated go
                                 minTerm  logQ  Observed
71       organic acid metabolic process    6.0        37
72                immune system process    6.0        33
70  protein heterodimerization activity    4.0        12



sy622 specific tissue
               minTerm  logQ  Observed
246          intestine   8.0       649
64     muscular system   4.0       465
186  epithelial system   3.0       406



sy622 specific phenotype
                  minTerm  logQ  Observed
27  avoids bacterial lawn   3.0        62



sy622 specific go
             

In [8]:
tidy[tidy.ext_gene == 'srt-39']

Unnamed: 0.1,Unnamed: 0,target_id,pval,qval,b,se_b,mean_obs,var_obs,tech_var,sigma_sq,...,ens_gene,ext_gene,description,transcript_biotype,strain,order,fancy,allele,genotype,phenotypic class


In [10]:
tidy[tidy.ext_gene == 'atn-1']

Unnamed: 0.1,Unnamed: 0,target_id,pval,qval,b,se_b,mean_obs,var_obs,tech_var,sigma_sq,...,ens_gene,ext_gene,description,transcript_biotype,strain,order,fancy,allele,genotype,phenotypic class
14483,14483,W04D2.1a,0.3065256,1.0,-0.24569,0.240275,7.13005,0.89015,0.045179,0.041418,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4187,0,dpy-22(bx93),bx93,bx93,transhet specific
14484,14484,W04D2.1b,0.7193712,1.0,0.084381,0.23485,6.794859,0.624799,0.061665,0.021067,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4187,0,dpy-22(bx93),bx93,bx93,
14485,14485,W04D2.1c,0.02353052,0.536744,2.985946,1.318468,3.374749,10.428579,0.993191,1.614344,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4187,0,dpy-22(bx93),bx93,bx93,transhet specific
14486,14486,W04D2.1d,0.01362949,0.413078,3.008471,1.219544,3.176171,10.283781,0.846522,1.384408,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4187,0,dpy-22(bx93),bx93,bx93,transhet specific
33899,33899,W04D2.1a,0.1353887,0.490536,-0.358774,0.240275,7.13005,0.89015,0.045179,0.041418,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4087,1,dpy-22(sy622),sy622,sy622,transhet specific
33900,33900,W04D2.1b,0.4635513,0.861654,0.172147,0.23485,6.794859,0.624799,0.061665,0.021067,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4087,1,dpy-22(sy622),sy622,sy622,
33901,33901,W04D2.1c,0.4310888,0.839627,1.038069,1.318468,3.374749,10.428579,0.993191,1.614344,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4087,1,dpy-22(sy622),sy622,sy622,transhet specific
33902,33902,W04D2.1d,0.3398116,0.765079,1.1641,1.219544,3.176171,10.283781,0.846522,1.384408,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4087,1,dpy-22(sy622),sy622,sy622,transhet specific
53315,53315,W04D2.1a,0.007934044,0.077677,-0.596697,0.224756,7.13005,0.89015,0.045179,0.041418,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4176,2,dpy-22(bx93)/dpy-22(sy622),bx93/sy622,bx93/sy622,transhet specific
53316,53316,W04D2.1b,0.830621,0.973428,0.046991,0.219682,6.794859,0.624799,0.061665,0.021067,...,WBGene00000228,atn-1,AcTiniN [Source:UniProtKB/TrEMBL;Acc:H2L2C8],protein_coding,PS4176,2,dpy-22(bx93)/dpy-22(sy622),bx93/sy622,bx93/sy622,
