# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Defining-the-hypoxia-response.-See-our-paper" data-toc-modified-id="Defining-the-hypoxia-response.-See-our-paper-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Defining the hypoxia response. See our paper</a></div><div class="lev1 toc-item"><a href="#Defining-the-Dpy-phenotype-response" data-toc-modified-id="Defining-the-Dpy-phenotype-response-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Defining the Dpy phenotype response</a></div><div class="lev1 toc-item"><a href="#Defining-the-Ras-response" data-toc-modified-id="Defining-the-Ras-response-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Defining the Ras response</a></div><div class="lev1 toc-item"><a href="#Defining-the-Wnt-response" data-toc-modified-id="Defining-the-Wnt-response-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Defining the Wnt response</a></div>

In [1]:
import epistasis as epi
import pandas as pd
import numpy as np
import scipy as scipy
import sklearn.decomposition
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
import os
rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})


%matplotlib inline

# This enables SVG graphics inline. 
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14

In [2]:
genmap = pd.read_csv('../sleuth/rna_seq_info.txt', sep='\t', comment='#')
frames = []

for root, dirs, files in os.walk("../sleuth/sleuth_strains"):
    for file in files:
        if file == 'lrt.csv':
            continue
        
        strain =  file[:-4].replace('_', '-')
        df = pd.read_csv(root + '/' + file, sep=',')
        df.sort_values('target_id', inplace=True)
        df['strain'] = strain.replace('b-', '')
        df['genotype'] = genmap[genmap.strain == file[:-4]].genotype.unique()[0].replace('b_', '').replace('_', '-')
        frames += [df]
    
tidy = pd.concat(frames)
tidy.dropna(subset=['ens_gene', 'b', 'qval'], inplace=True)
tidy['absb'] = tidy.b.abs()
tidy.sort_values(['target_id'], ascending=True, inplace=True)

b-CB6088
['b_egl9_hif1']


In [3]:
q=0.1

# Defining the hypoxia response. See our paper

In [29]:
hyp_response_pos = epi.find_overlap(['vhl1', 'egl9', 'rhy1', 'egl9-vhl1'], tidy[tidy.b > 0], col='genotype')
hyp_response_neg = epi.find_overlap(['vhl1', 'egl9', 'rhy1', 'egl9-vhl1'], tidy[tidy.b < 0], col='genotype')

either_or = (((tidy.b < 0) & (tidy.qval < q)) | (tidy.qval > q))
hyp_response_pos = tidy[(tidy.target_id.isin(hyp_response_pos)) & ((tidy.genotype == 'egl9-hif1') & either_or)].target_id.values.tolist()

# do the same for the negative set
either_or = (((tidy.b > 0) & (tidy.qval < q)) | (tidy.qval > q))
hyp_response_neg = tidy[(tidy.target_id.isin(hyp_response_neg)) & (tidy.genotype == 'egl9-hif1') & either_or].target_id.values.tolist()

# get the list
hyp_response = list(set(hyp_response_neg + hyp_response_pos))
hyp = tidy[(tidy.target_id.isin(hyp_response)) &
           (tidy.genotype == 'egl9')
          ].copy().sort_values('qval')

# annotate whether they are candidates for direct or
# indirect regulation.
def annotate(x):
    if x > 0:
        return 'candidate for direct regulation'
    else:
        return 'candidate for indirect regulation'

# annotate
hyp['regulation'] = hyp.b.apply(annotate)
cols = ['target_id', 'ens_gene', 'ext_gene', 'b', 'qval', 'regulation']
hyp[cols].to_csv('../input/hypoxia_response.csv', index=False)

# get the list of gene IDs as a numpy array.
hyp_response = tidy[tidy.target_id.isin(hyp_response)].ens_gene.unique()
print('There are {0} genes in the predicted hypoxia response'.format(len(hyp_response)))

320
There are 595 genes in the predicted hypoxia response


# Defining the Dpy phenotype response

In [32]:
embryonic = epi.find_overlap(['dpy7', 'dpy10', 'unc54', 'clk-1'], tidy, col='genotype')
dpy = epi.find_overlap(['dpy7', 'dpy10'], tidy, col='genotype', q=0.01)
dpy = tidy[tidy.target_id.isin(dpy) & (~tidy.target_id.isin(embryonic))].copy()

In [33]:
print(len(dpy.ens_gene.unique()), 'genes found Dpy')
dpy[dpy.genotype == 'dpy7'][['b', 'qval', 'target_id', 'ens_gene', 'ext_gene']].to_csv('../input/dpy_geneset.csv', index=False)

628 genes found Dpy


# Defining the Ras response

In [51]:
# ras_common = epi.find_overlap(['let60', 'let60.gf'], tidy, col='genotype')
# & (~tidy.target_id.isin(ras_common)

let60 = tidy[(tidy.genotype == 'let60') & (tidy.qval < 10**-2)]
let60[['b', 'qval', 'target_id', 'ens_gene', 'ext_gene']].to_csv('../input/ras_geneset.csv', index=False)


let60gf = tidy[(tidy.genotype == 'let60.gf') & (tidy.qval < 10**-2)]
let60gf[['b', 'qval', 'target_id', 'ens_gene', 'ext_gene']].to_csv('../input/rasgf_geneset.csv', index=False)

print('{0} genes found in let-60(lf)'.format(len(let60)))
print('{0} genes found in let-60(gf)'.format(len(let60gf)))

284 genes found in let-60(lf)
2089 genes found in let-60(gf)


# Defining the Wnt response

In [24]:
wnt = tidy[(tidy.genotype == 'bar1') & (tidy.qval < 10**-2)]
wnt[['b', 'qval', 'target_id', 'ens_gene', 'ext_gene']].to_csv('../input/wnt_geneset.csv', index=False)