# Create time-dependent transcriptome dicts

This workbook imports the "absolute" time-dependent transcriptome that was calculated by integrating the FISH experiments by Aouefa/Lotte with the RNAseq experiments by Lotte.

Results are pickled for use in the TRSL workflow.

In [17]:
import pandas as pd
import cPickle as pkl
import math

In [18]:
transcript_file = '../data/FISH_RNAseq_combined_FACS_phases_5_genes.xlsx'

In [19]:
transcript_data = pd.read_excel(transcript_file, sheetname='nostress_WT_R1_rounded', 
                                skiprows=1, skipfooter=2, parse_cols='A, D:P')

Add systematic gene names:

In [20]:
from pygenome import sg

In [21]:
orf_genomic_dict = pkl.load(open("../parameters/orf_coding.p"))

systematic = []
for gene in transcript_data['Gene']:
    try:
        systematic.append(sg.systematic_name(gene))
    except:
        systematic.append(gene)
    
transcript_data['systematic'] = pd.Series(systematic)

In [22]:
transcript_data.head()

Unnamed: 0,Gene,0,5,10,15,20,25,30,35,40,45,50,55,60,systematic
0,YAL001C,0,1,1,1,1,2,2,1,1,1,0,1,1,YAL001C
1,YAL002W,1,0,1,1,1,2,2,3,1,1,0,1,1,YAL002W
2,YAL003W,19,15,48,51,51,100,100,104,27,47,23,46,46,YAL003W
3,YAL004W,0,0,0,0,0,0,0,0,0,0,0,0,0,YAL004W
4,YAL005C,29,26,62,47,50,83,93,99,41,34,26,46,44,YAL005C


Just trying out how to select rows:

In [23]:
transcript_data[transcript_data['Gene'].isin(['YAL001C'])]

Unnamed: 0,Gene,0,5,10,15,20,25,30,35,40,45,50,55,60,systematic
0,YAL001C,0,1,1,1,1,2,2,1,1,1,0,1,1,YAL001C


In [24]:
transcriptome_time_dependent = {}

for t in range(0, 61, 5):
    transcriptome_time_dependent[t] = pd.Series([int(val) if not math.isnan(val) else 0 
                                                 for val in transcript_data[t].values],
                                                index=transcript_data['systematic']).to_dict()

In [25]:
print len(transcriptome_time_dependent[5])

6651


In [26]:
pkl.dump(transcriptome_time_dependent, open("../parameters/transcriptome_time_dependent.p", "wb"))