# Exploring Wildtype Expression

In [1]:
import numpy as np 
import pandas as pd 
import altair as alt 
import futileprot as fp 
colors, palette = fp.viz.altair_style()

In [2]:
# Load the Balakrishan 2021 data 
data = pd.read_csv('../../data/literature/Balakrishnan2021/Balakrishnan2021_wt_rnaseq.csv')

# Clean up the data to a form that is useful.
data.rename(columns={'WT-pre':'glucose', 'WT-acetate':'acetate', 'gene':'gene_name'},
            inplace=True)
data = data[['gene_name', 'glucose', 'acetate']]
data = data.melt('gene_name', var_name='growth_medium', value_name='reads')


In [17]:
# Set up the  dictionary of genes we care about. 
genes = {'flh':['flhD', 'flhC', 'flgA', 'flgM', 'flgN', 'flgB', 'flgC', 'flgD', 
                 'flgE', 'flgF', 'flgG', 'flgH', 'flgI', 'flgJ', 'flhB', 'flhA', 
                 'flhE', 'fliA', 'fliZ', 'fliY', 'fliE', 'fliF', 'fliG', 'flH',
                 'flhI', 'flhJ', 'flhK', 'fliL', 'fliM', 'fliN', 'fliO', 'fliP',
                 'fliQ', 'fliR', 'csgC', 'ymdA', 'ymdB', 'ymdC', 'yecR', 'fliD', 
                 'fliS', 'fliT', 'ycgR', 'trg', 'flxA', 'ynjH', 'motA', 'motB',
                 'cheA', 'cheW', 'fliC', 'aer', 'yhjH', 'tsr', 'yjdA', 'yjcZ'],
         'glt':['gltI', 'gltJ', 'gltK', 'gltL'],
         'mal':['malG', 'malF', 'malE','malK', 'malM', 'lamB'],
         'his':['hisJ', 'hisQ', 'hisM', 'hisP'],
         'dpp': ['dppA', 'dppB', 'dppC', 'dppD'],
         'pot': ['potF', 'potG', 'potH', 'potI'],
         'nmp': ['nmpC'],
         'rbs': ['rbsD', 'rbsB', 'rbsA', 'rbsC'],
         'mgl': ['mglB', 'mglA', 'mglC'],
         'opp': ['oppA', 'oppB', 'oppC', 'oppD', 'oppF']}

ribosomal = ['rpsA', 'rpsB', 'pbsC', 'rpsD', 'rpsE', 'rpsF', 'rpsG', 'rpsH', 
             'rpsI', 'rpsJ', 'rpsK', 'rpsL', 'rpsM', 'rpsN', 'rpsO', 'rpsP',
             'rpsQ', 'rpsR', 'rpsS', 'rpsT', 'rpsU', 'sra', 'rplA', 'rplB',
             'rplC', 'rplD', 'rplE', 'rplF', 'rplJ', 'rplL', 'rplI', 'rplK', 
             'rplM', 'rplN', 'rplO', 'rplP', 'rplQ',' rplR', 'rplS', 'rplT', 
             'rplU', 'rplV', 'rplW', 'rplX', 'rplY', 'rpmA', 'rpmB', 'rpmC',
             'rpmD', 'rpmE', 'rpmF', 'rpmG', 'rpmH', 'rpmI', 'rpmJ']


In [18]:
# Compute the ribosomal fractions for each growth condition
ribo_genes = []

fracs = pd.DataFrame()
for nom, gen in genes.items():
    for g, d in data.groupby(['growth_medium']):
        total = d['reads'].sum()
        ko = d[d['gene_name'].isin(gen)]['reads'].sum()
        ribo = d[d['gene_name'].isin(ribosomal)]['reads'].sum()
        info = {'knockout':nom, 
                'growth_medium':g,
                'total_reads': total,
                'ribosomal_fraction': ribo / total,
                'knockout_fraction': ko /total,
                'metabolic_fraction': (total - ribo - ko) / total
                } 
        fracs = fracs.append(info, ignore_index=True) 




In [19]:
fracs

Unnamed: 0,growth_medium,knockout,knockout_fraction,metabolic_fraction,ribosomal_fraction,total_reads
0,acetate,flh,0.163971,0.737407,0.098622,21116899.0
1,glucose,flh,0.071478,0.736995,0.191527,12306118.0
2,acetate,glt,0.009978,0.891399,0.098622,21116899.0
3,glucose,glt,0.002252,0.806221,0.191527,12306118.0
4,acetate,mal,0.008909,0.892468,0.098622,21116899.0
5,glucose,mal,0.004257,0.804216,0.191527,12306118.0
6,acetate,his,0.001762,0.899615,0.098622,21116899.0
7,glucose,his,0.001471,0.807002,0.191527,12306118.0
8,acetate,dpp,0.00397,0.897408,0.098622,21116899.0
9,glucose,dpp,0.002106,0.806367,0.191527,12306118.0
