# Table of Contents
 <p>

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import epistasis as epi
import sklearn.decomposition
import tissue_enrichment_analysis as ea

from matplotlib import rc

import os
rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})


%matplotlib inline

# This enables SVG graphics inline. 
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14

In [2]:
tissue = ea.fetch_dictionary('tissue')
pheno = ea.fetch_dictionary('phenotype')
go = ea.fetch_dictionary('go')

In [3]:
strains =['ps4187', 'ps4087', 'ps4176', 'ew15', 'mt21245', 'mt4866','n2731', 'n2376', 'n767', 'n2731-n767', 'tm1489',
          'n3809', 'n3917', 'n745', 'n767-tm1489', 'n2731-tm1489',
          'ia4', 'ok1402', 'ok161', 'sa307', 'sa307-ok161', 'sa307-ia4',
          'q71']

strain_dict = {}
for i, g in enumerate(strains):
    strain_dict[g] = i

In [4]:
q = 0.1

In [5]:
genmap = pd.read_csv('../sleuth/rna_seq_info.txt', sep=' ', comment='#')
tidy  = pd.read_csv('../input/quantifications.csv')

In [6]:
stress = epi.find_overlap(['sa307', 'sa307-ok161'], tidy)
tidy = tidy[(tidy.strain.isin(['ps4187', 'ps4087', 'ps4176'])) & (~tidy.target_id.isin(stress))]

In [7]:
print('ps4087 DE genes sans stress: {0}'.format(len(tidy[(tidy.strain == 'ps4087') & (tidy.qval < q)])))
print('ps4187 DE genes sans stress: {0}'.format(len(tidy[(tidy.strain == 'ps4187') & (tidy.qval < q)])))
print('ps4176 DE genes sans stress: {0}'.format(len(tidy[(tidy.strain == 'ps4176') & (tidy.qval < q)])))

ps4087 DE genes sans stress: 1793
ps4187 DE genes sans stress: 229
ps4176 DE genes sans stress: 2377


In [25]:
weak = epi.find_overlap(['ps4187'], tidy)
strong = tidy[(tidy.qval < q) & (tidy.strain == 'ps4087') & (~tidy.target_id.isin(weak))].ens_gene
weak = tidy[tidy.target_id.isin(weak)].ens_gene.unique()

In [26]:
ea.enrichment_analysis(strong, tissue, show=False)

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
70,Nucleus WBbt:0006803,74.367789,123,1.653942,1.155666e-08,2e-06
78,gon herm sujn WBbt:0008210,73.555027,123,1.672217,6.000848e-09,2e-06
252,spermathecal-uterine valve cell WBbt:0008217,75.586933,124,1.640495,1.648136e-08,2e-06
253,spermathecal-uterine junction WBbt:0006756,79.752342,124,1.554813,3.40447e-07,2.4e-05
265,Psub1 WBbt:0006874,23.671714,47,1.985492,1.665285e-06,9.3e-05
129,somatic gonad WBbt:0005785,125.571841,175,1.393625,3.29791e-06,0.000153
161,muscular system WBbt:0005737,510.618182,587,1.149587,0.0001488912,0.005913
213,hermaphrodite WBbt:0007849,306.513143,362,1.181026,0.0004028246,0.013998
135,intestine WBbt:0005772,567.917954,638,1.123402,0.0007630685,0.02357
222,outer labial sensillum WBbt:0005501,298.487111,346,1.159179,0.001750731,0.04867


In [27]:
ea.enrichment_analysis(strong, pheno, show=False)

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
1,avoids bacterial lawn WBPhenotype:0000402,48.456672,78,1.609685,9e-06,0.002042
130,diplotene absent during oogenesis WBPhenotype:...,11.677621,24,2.055213,0.000179,0.019352
128,gonad vesiculated WBPhenotype:0001979,22.15474,38,1.715209,0.000329,0.023716
123,pleiotropic defects severe early emb WBPhenoty...,13.205535,25,1.893146,0.000546,0.029478
64,rachis absent WBPhenotype:0001942,9.385752,19,2.024345,0.000809,0.034932
133,rachis narrow WBPhenotype:0001941,30.449125,47,1.543558,0.001013,0.036474


In [31]:
_ = ea.enrichment_analysis(strong, go, show=False)
_[_['Enrichment Fold Change'] > 4]

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
54,post-embryonic development GO:0009791,67.075721,271,4.04021,9.839091000000001e-84,7.379318000000001e-82
70,aging GO:0007568,26.978901,113,4.188458,1.291655e-37,7.749932e-36
68,immune system process GO:0002376,9.802715,43,4.38654,1.7529e-16,4.780636e-15
103,muscle cell development GO:0055001,4.858489,26,5.351459,6.445881e-13,1.208603e-11
33,cellular component assembly involved in morpho...,4.658433,25,5.366611,1.540048e-12,2.717732e-11
114,striated muscle cell differentiation GO:0051146,4.858489,25,5.145633,4.149531e-12,6.915884e-11
141,actomyosin structure organization GO:0031032,5.115703,25,4.886914,1.376369e-11,2.173214e-10
156,collagen trimer GO:0005581,5.458655,25,4.579883,6.053495e-11,8.64785e-10
13,myosin filament organization GO:0031033,4.201164,21,4.998615,2.744732e-10,3.430915e-09
188,contractile fiber GO:0043292,6.030242,25,4.145771,5.536343e-10,6.388088e-09


In [33]:
ea.enrichment_analysis(weak, tissue, show=False)

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
83,spermathecal-uterine valve cell WBbt:0008217,11.115726,28,2.518954,4e-06,0.000679
62,gon herm sujn WBbt:0008210,10.816916,28,2.588538,2e-06,0.000679
44,Nucleus WBbt:0006803,10.93644,28,2.560248,3e-06,0.000679
92,spermathecal-uterine junction WBbt:0006756,11.728286,29,2.472655,4e-06,0.000679
63,hermaphrodite WBbt:0007849,45.075462,71,1.575136,9.1e-05,0.005034
103,somatic gonad WBbt:0005785,18.466447,35,1.895329,0.000154,0.007157


In [34]:
ea.enrichment_analysis(weak, go, show=False)

Unnamed: 0,Tissue,Expected,Observed,Enrichment Fold Change,P value,Q value
47,immune system process GO:0002376,1.098107,15,13.659873,5.055593e-14,1.516678e-11
157,embryo development GO:0009790,10.526458,38,3.609951,7.897545e-12,1.184632e-09
57,developmental process GO:0032502,18.184393,52,2.859595,1.28484e-11,1.28484e-09
75,response to biotic stimulus GO:0009607,0.56346,8,14.19799,7.685551e-09,5.764163e-07
85,reproduction GO:0000003,10.174296,31,3.046894,2.682156e-08,1.609293e-06
49,aging GO:0007568,3.022195,14,4.632395,6.3994e-07,3.1997e-05
38,post-embryonic development GO:0009791,7.513868,22,2.927919,3.618834e-06,0.0001550929
0,lytic vacuole GO:0000323,0.438602,5,11.399846,6.106143e-06,0.0002289804
88,organic acid metabolic process GO:0006082,6.380545,19,2.977802,1.088836e-05,0.0003629453
164,glucuronosyltransferase activity GO:0015020,0.361767,4,11.056841,3.5127e-05,0.00105381
