# Table of Contents
 <p>

In [9]:
import pandas as pd
import numpy as np
import morgan as morgan
import os
import genpy
import gvars


In [6]:
gold = pd.read_csv('../input/hypoxia_gold_standard.csv')

# Add the tpm files: 
q = 0.1
kallisto_loc = '../input/kallisto_all/'
sleuth_loc = '../sleuth/kallisto/'
# Specify the genotypes to refer to:
single_mutants = ['b', 'c', 'd', 'e', 'g']
double_mutants = {'a' : 'bd', 'f':'bc'}

# initialize the morgan.hunt object:
thomas = morgan.hunt('target_id', 'b', 'tpm', 'qval')
thomas.add_genmap('../input/library_genotype_mapping.txt', comment='#')
thomas.add_single_mutant(single_mutants)
thomas.add_double_mutants(['a', 'f'], ['bd', 'bc'])
thomas.set_qval()
thomas.add_tpm(kallisto_loc, '/kallisto/abundance.tsv', '')

# load all the beta values for each genotype:
for file in os.listdir("../sleuth/kallisto"):
    if file[:4] == 'beta':
        letter = file[-5:-4].lower()
        thomas.add_beta(sleuth_loc + file, letter)
        thomas.beta[letter].sort_values('target_id', inplace=True)
        thomas.beta[letter].reset_index(inplace=True)
thomas.filter_data()

In [12]:
frames = []
for key, df in thomas.beta.items():
    df['genotype'] = genvar.fancy_mapping[key]
    df['code'] = key
    frames += [df]
    df['sorter'] = genvar.sort_muts[key]

tidy = pd.concat(frames)
tidy.sort_values('sorter', inplace=True)
tidy.dropna(subset=['ens_gene'], inplace=True)

In [16]:
gold.shape

(22, 1)

In [10]:
genvar = gvars.genvars()

In [14]:
codes = ['a', 'b', 'c', 'd', 'e', 'f']

print('Genotype, #TFs')
for c in codes:
    ind = (tidy.qval < q) & (tidy.code == c) & (tidy.ens_gene.isin(gold.WBIDS))
    print(genvar.mapping[c], tidy[ind].shape[0])

Genotype, #TFs
egl-9;vhl-1 15
egl-9 14
hif-1 2
vhl-1 11
rhy-1 14
egl-9;hif-1 4


In [15]:
ind = (tidy.qval < q) & (tidy.code == 'c') & (tidy.ens_gene.isin(gold.WBIDS))
tidy[ind]

Unnamed: 0.1,index,Unnamed: 0,target_id,pval,qval,b,se_b,mean_obs,var_obs,tech_var,sigma_sq,smooth_sigma_sq,final_sigma_sq,ens_gene,ext_gene,genotype,code,sorter
24825,42,43,W07A12.7,3.147921e-10,1.466785e-07,0.469131,0.074568,6.880413,2.01661,0.001792,0.002415,0.006549,0.006549,WBGene00012324,rhy-1,\emph{hif-1},c,4
18104,330,331,K10H10.2.1,7.366751e-05,0.004459222,-0.581204,0.146616,6.587986,3.535277,0.025268,-0.001996,0.006976,0.006976,WBGene00010759,cysl-2,\emph{hif-1},c,4
