# Create time-resolved transcriptome dicts

This workbook imports the "absolute" time-dependent transcriptome that was calculated by integrating the FISH experiments by Aouefa/Lotte with the RNAseq experiments by Lotte.

Results are pickled for use in the TRSL workflow.

04 August 2016: updated to include two extra genes by Wiebke

07 March 2019: updated to include new cell cycle phase definitions from Lotte/Katja paper

In [1]:
import pandas as pd
import cPickle as pkl
import math
import collections as col

In [2]:
#transcript_file = '../data/FISH_RNAseq_combined_FACS_phases_7_genes.xlsx'
transcript_file = '../../data/FISH_RNAseq_combined_FACS_phases_7_genes_new_CC_phases.xlsx'

In [3]:
transcript_data = pd.read_excel(transcript_file, sheetname='nostress_WT_R1_rounded', 
                                skiprows=1, skipfooter=2, parse_cols='A, D:P')

Add systematic gene names:

In [4]:
orf_genomic_dict = pkl.load(open("../../parameters/orf_coding.p"))

systematic = []
for gene in transcript_data['Gene']:
    try:
        systematic.append(sg.systematic_name(gene))
    except:
        systematic.append(gene)
    
transcript_data['systematic'] = pd.Series(systematic)

In [5]:
transcript_data.head()

Unnamed: 0,Gene,0,5,10,15,20,25,30,35,40,45,50,55,60,systematic
0,YAL001C,1.0,2.0,2.0,2.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,YAL001C
1,YAL002W,2.0,0.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,YAL002W
2,YAL003W,53.0,42.0,83.0,89.0,88.0,38.0,38.0,39.0,27.0,47.0,23.0,46.0,31.0,YAL003W
3,YAL004W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,YAL004W
4,YAL005C,79.0,71.0,107.0,81.0,86.0,31.0,35.0,37.0,41.0,34.0,26.0,46.0,30.0,YAL005C


Just trying out how to select rows:

In [6]:
transcript_data[transcript_data['Gene'].isin(['YAL001C'])]

Unnamed: 0,Gene,0,5,10,15,20,25,30,35,40,45,50,55,60,systematic
0,YAL001C,1.0,2.0,2.0,2.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,YAL001C


In [7]:
transcriptome_time_dependent = {}

for t in range(0, 61, 5):
    transcriptome_time_dependent[t] = pd.Series([int(val) if not math.isnan(val) else 0 
                                                 for val in transcript_data[t].values],
                                                index=transcript_data['systematic']).to_dict()

In [8]:
print len(transcriptome_time_dependent[5])

6651


In [9]:
#pkl.dump(transcriptome_time_dependent, open("../parameters/transcriptome_time_dependent.p", "wb"))
pkl.dump(transcriptome_time_dependent, open("../../parameters/transcriptome_time_dependent_v2.p", "wb"))

In [10]:
transcriptome_average = col.Counter()

for key in transcriptome_time_dependent:
    transcriptome_average = transcriptome_average + col.Counter(transcriptome_time_dependent[key])
    
transcriptome_average = {gene: transcriptome_average[gene] / len(transcriptome_time_dependent) 
                         for gene in transcriptome_average}

In [11]:
len(transcriptome_average)

5578

In [12]:
sum(transcriptome_average.values())

36856

In [13]:
#pkl.dump(transcriptome_average, open("../parameters/transcriptome_teufel.p", "wb"))
pkl.dump(transcriptome_average, open("../../parameters/transcriptome_teufel_v2.p", "wb"))