# Yeast genome 
Jens Hahn - 31/08/2016     
Downloaded at **http://www.uniprot.org/docs/yeast**

In [1]:
import pandas as pd

In [3]:
# read data
df = pd.read_csv('../data/yeastgenome.csv', sep=',')
# set column names
df.columns = ['Gene designations', 'OLN', 'Swiss-Prot', '' ,'SGD', 'Size 3D', 'CH']
# remove whitespaces
for col in df.columns:
    df[col] = df[col].str.strip()

In [4]:
# proteins of CDC module
proteins = {'Swe1_cyt': 'SGD:S000003723p',
            'APC_cyt': 'SGD:S000002971p',
            'Clb2_cyt': 'SGD:S000006323p',
            'Mcm1_nuc': 'SGD:S000004646p',
            'Clb2_p_cyt': 'SGD:S000006323pP',
            'Swi5_nuc': 'SGD:S000002553p',
            'Hog1_PP_cyt': 'SGD:S000004103pPP_13',
            'Swe1_p_cyt': 'SGD:S000003723pP',
            'Sic1_p_cyt': 'SGD:S000004069pP',
            'Far1_cyt': 'SGD:S000003693p',
            'Cdc14_p_cyt': 'SGD:S000001924pP',
            'Sic1_cyt': 'SGD:S000004069p',
            'Cln3_Far1_p_cyt': 'SGD:S000000038p_SGD:S000003693pP',
            'Clb2_Sic1_cyt': 'SGD:S000006323p_SGD:S000004069p',
            'Sic1_Hp_cyt': 'SGD:S000004069pHp',
            'Clb3_cyt': 'SGD:S000002314p',
            'MBF_nuc': 'SGD:S000002214p_SGD:S000004172p',
            'Cln3_cyt': 'SGD:S000000038p',
            'Fus3_cyt': 'SGD:S000000112p',
            'SBF_p_cyt': 'SGD:S000000913p_SGD:S000004172pP',
            'Whi5_p_cyt': 'SGD:S000005609pP',
            'Cdc14_cyt': 'SGD:S000001924p',
            'Whi5_cyt': 'SGD:S000005609p',
            'Clb5_Sic1_Hp_cyt': 'SGD:S000006324p_SGD:S000004069pHp',
            'Cln2_cyt': 'SGD:S000006177p',
            'Far1_p_cyt': 'SGD:S000003693pP',
            'Mih1_cyt': 'SGD:S000004639p',
            'Cln2_Far1_p_cyt': 'SGD:S000006177p_SGD:S000003693pP',
            'Clb3_Sic1_cyt': 'SGD:S000002314p_SGD:S000004069p',
            'Clb5_Sic1_cyt': 'SGD:S000006324p_SGD:S000004069p',
            'Clb5_cyt': 'SGD:S000006324p',
            'SBF_Whi5_nuc': 'SGD:S000000913p_SGD:S000004172p_SGD:S000005609p',
            'APC_p_cyt': 'SGD:S000002971pP',
            'Swi5_p_cyt': 'SGD:S000002553pP',
            'SBF_nuc': 'SGD:S000000913p_SGD:S000004172p'}

In [5]:
# get dict of gene names
yeast_genes = {}
for protein in proteins:
    proteins[protein] = proteins[protein].split('_')
    for sgd_id in proteins[protein]:
        if sgd_id[4:14]:
            if not protein in yeast_genes.keys():
                yeast_genes[protein] = [list(df[df['SGD'] == sgd_id[4:14]]['OLN'])[0]]
            else:
                yeast_genes[protein].append(list(df[df['SGD'] == sgd_id[4:14]]['OLN'])[0])

In [6]:
parameter_genes = [gene  for protein in yeast_genes for gene in yeast_genes[protein]]
print ' '.join(parameter_genes)

YJL187C YGL003C YFR028C YLR113W YJL187C YLR079W YAL040C YPR119W YLR079W YPR119W YLR079W YLR079W YDR146C YAL040C YJL157C YDL056W YLR182W YPL256C YJL157C YDR146C YJL157C YBL016W YDL155W YER111C YLR182W YOR083W YFR028C YOR083W YPR120C YLR079W YJL157C YMR036C YDL155W YLR079W YPR119W YPR120C YLR079W YPR120C YER111C YLR182W YOR083W YGL003C YMR043W YPL256C YER111C YLR182W


### Transcriptome (average)

Transcriptome is normalized to 60000.

In [7]:
import cPickle as pkl

In [10]:
transcriptome = pkl.load(open('../parameters/transcriptome_plotkin.p'))

In [12]:
{gene: transcriptome[gene] for gene in parameter_genes}

{'YAL040C': 2,
 'YBL016W': 4,
 'YDL056W': 4,
 'YDL155W': 3,
 'YDR146C': 5,
 'YER111C': 3,
 'YFR028C': 7,
 'YGL003C': 1,
 'YJL157C': 4,
 'YJL187C': 3,
 'YLR079W': 6,
 'YLR113W': 10,
 'YLR182W': 7,
 'YMR036C': 3,
 'YMR043W': 6,
 'YOR083W': 1,
 'YPL256C': 7,
 'YPR119W': 2,
 'YPR120C': 4}

### Transcriptome (time course)

Transcriptome is normalized to <60000.

In [23]:
transcriptome_time_dependent = pkl.load(open("../parameters/transcriptome_time_dependent.p"))

Times in minutes:

In [24]:
sorted(transcriptome_time_dependent.keys())

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]

In [25]:
{gene: [transcriptome_time_dependent[time][gene] for time in sorted(transcriptome_time_dependent.keys())] 
 for gene in parameter_genes}

{'YAL040C': [1, 1, 1, 1, 1, 3, 2, 4, 1, 2, 1, 2, 1],
 'YBL016W': [2, 1, 2, 1, 1, 3, 3, 2, 1, 1, 1, 1, 1],
 'YDL056W': [1, 1, 2, 5, 2, 7, 6, 5, 1, 1, 1, 1, 1],
 'YDL155W': [1, 0, 2, 2, 3, 4, 4, 4, 1, 1, 0, 1, 1],
 'YDR146C': [0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 1, 1, 0],
 'YER111C': [0, 1, 3, 2, 1, 0, 1, 1, 0, 0, 0, 1, 1],
 'YFR028C': [1, 0, 3, 3, 4, 6, 8, 6, 2, 1, 1, 2, 1],
 'YGL003C': [0, 0, 1, 1, 1, 2, 3, 2, 1, 0, 0, 1, 1],
 'YJL157C': [1, 0, 1, 1, 0, 1, 1, 2, 1, 1, 1, 3, 4],
 'YJL187C': [0, 2, 8, 4, 3, 3, 2, 1, 0, 1, 0, 1, 2],
 'YLR079W': [0, 0, 2, 2, 2, 3, 1, 2, 1, 2, 1, 6, 5],
 'YLR113W': [1, 1, 4, 3, 6, 8, 8, 7, 3, 2, 2, 3, 2],
 'YLR182W': [1, 0, 3, 4, 3, 4, 4, 2, 1, 1, 1, 2, 2],
 'YMR036C': [0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0],
 'YMR043W': [1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 0, 1],
 'YOR083W': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'YPL256C': [1, 5, 18, 8, 3, 4, 3, 2, 1, 0, 0, 1, 3],
 'YPR119W': [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0],
 'YPR120C': [1, 2, 6, 3, 2, 2, 2, 2, 1, 1, 0,

### Protein synthesis rates (average)

In [27]:
prot_synth_rates = pkl.load(open("../parameters/prot_speeds.p"))

In [28]:
{gene: prot_synth_rates[gene] for gene in parameter_genes}

{'YAL040C': 0.03926664595047475,
 'YBL016W': 0.15028248587579873,
 'YDL056W': 0.05706375886327985,
 'YDL155W': 0.11525423728820654,
 'YDR146C': 0.15198169439824039,
 'YER111C': 0.058758721997832719,
 'YFR028C': 0.16441142405162809,
 'YGL003C': 0.022882002316463701,
 'YJL157C': 0.17006130116680432,
 'YJL187C': 0.029944348710433982,
 'YLR079W': 0.23644067796624724,
 'YLR113W': 0.46271186440706452,
 'YLR182W': 0.090398033842819567,
 'YMR036C': 0.032769287268022092,
 'YMR043W': 0.29322033898323135,
 'YOR083W': 0.026271186440694139,
 'YPL256C': 0.19717514124305924,
 'YPR119W': 0.158474576271284,
 'YPR120C': 0.079943502824907975}

Unit is molecules per second (per cell).

### Protein synthesis rates (time course)