# Table of Contents
 <p>

In [5]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import epistasis as epi
import scipy.stats as stats
import tissue_enrichment_analysis as ea

from matplotlib import rc

import os
rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})


%matplotlib inline

# This enables SVG graphics inline. 
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14


In [2]:
strains =['ps4187', 'ps4087', 'ps4176', 'ew15', 'mt21245', 'mt4866','n2731', 'n2376', 'n767', 'n2731-n767', 'tm1489',
          'n3809', 'n3917', 'n745', 'n767-tm1489', 'n2731-tm1489',
          'ia4', 'ok1402', 'ok161', 'sa307', 'sa307-ok161', 'sa307-ia4',
          'q71']

strain_dict = {}
for i, g in enumerate(strains):
    strain_dict[g] = i

In [3]:
genmap = pd.read_csv('../sleuth/rna_seq_info.txt', sep=' ', comment='#')
tidy  = pd.read_csv('../input/quantifications.csv')

In [4]:
overlap = epi.find_overlap(['ps4087', 'ps4187', 'ps4176'], tidy)

In [6]:
tissue = ea.fetch_dictionary('tissue')
pheno = ea.fetch_dictionary('phenotype')
go = ea.fetch_dictionary('go')

In [7]:
wbids = tidy[tidy.target_id.isin(overlap)].ens_gene.unique()

In [8]:
ea.enrichment_analysis(wbids, tissue, show=False)

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
50,Nucleus WBbt:0006803,11.233021,29,2.581674,2e-06,0.000396
61,gon herm sujn WBbt:0008210,11.110256,29,2.610201,1e-06,0.000396
6,spermathecal-uterine valve cell WBbt:0008217,11.417169,29,2.540034,2e-06,0.000396
37,spermathecal-uterine junction WBbt:0006756,12.046341,29,2.40737,7e-06,0.000485
116,cephalic sheath cell WBbt:0008406,4.649734,15,3.225991,2.3e-05,0.001164
29,intestine WBbt:0005772,85.782223,124,1.445521,2.1e-05,0.001164
7,pm5 WBbt:0003737,0.583135,4,6.859475,0.000279,0.011065
30,pm3 WBbt:0003740,0.583135,4,6.859475,0.000279,0.011065
10,hermaphrodite WBbt:0007849,46.297848,69,1.49035,0.00052,0.01575
19,somatic gonad WBbt:0005785,18.967232,34,1.792565,0.00051,0.01575


In [11]:
gea = ea.enrichment_analysis(wbids, go, show=False)
gea[gea['Enrichment Fold Change'] > 3]

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
95,immune system process GO:0002376,1.067487,24,22.482719,3.945225e-26,1.1835680000000002e-23
30,developmental process GO:0032502,17.677328,56,3.1679,3.779112e-14,5.668667e-12
62,organic acid metabolic process GO:0006082,6.202626,31,4.997883,1.33762e-13,1.33762e-11
33,glucuronosyltransferase activity GO:0015020,0.351679,9,25.591502,3.79323e-12,2.844922e-10
52,embryo development GO:0009790,10.232932,36,3.518053,5.273347e-11,3.164008e-09
174,response to biotic stimulus GO:0009607,0.547748,8,14.605251,6.032789e-09,2.585481e-07
59,reproduction GO:0000003,9.89059,30,3.033186,4.730472e-08,1.773927e-06
92,aging GO:0007568,2.937922,13,4.424896,2.390091e-06,7.610073e-05
13,post-embryonic development GO:0009791,7.304347,22,3.011905,2.283022e-06,7.610073e-05
123,lytic vacuole GO:0000323,0.426372,5,11.726844,5.202672e-06,0.0001418911


In [12]:
melted_go = pd.melt(go, id_vars='wbid')
melted_go = melted_go[melted_go.value == 1]

In [21]:
overlap2 = epi.find_overlap(['ps4087', 'ps4176'], tidy)

In [22]:
ea.enrichment_analysis(tidy[tidy.target_id.isin(overlap2)].ens_gene.unique(), tissue, show=False)

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
10,gon herm sujn WBbt:0008210,55.0306,117,2.12609,4.354391e-15,1.210521e-12
189,Nucleus WBbt:0006803,55.638673,117,2.102854,9.652858e-15,1.341747e-12
142,spermathecal-uterine valve cell WBbt:0008217,56.550782,118,2.08662,1.314673e-14,1.341747e-12
174,spermathecal-uterine junction WBbt:0006756,59.667156,119,1.994397,2.559102e-13,1.778576e-11
264,cephalic sheath cell WBbt:0008406,23.030762,52,2.25785,1.161411e-08,6.457443e-07
157,somatic gonad WBbt:0005785,93.947268,148,1.575352,2.043171e-08,9.46669e-07
164,intestine WBbt:0005772,424.89096,505,1.18854,2.165027e-05,0.0008598248
54,Psub1 WBbt:0006874,17.710124,35,1.976271,3.940988e-05,0.001369493
148,hermaphrodite WBbt:0007849,229.319504,282,1.229725,0.0001571516,0.004854239
170,AB WBbt:0004015,8.665039,19,2.192719,0.000364257,0.01012635


In [18]:
overlap = epi.find_overlap(strains, tidy, q=0.2)

In [19]:
overlap

[]