# Site 734 Plasmid v. Early
This notebook calculates and plots the enrichment of codons at site 734 in the early gDNA samples compared to the plasmid.

## Notebook setup
Imports

In [1]:
import pandas as pd
import plotnine as p9

# print imports and versions
# copied from https://stackoverflow.com/questions/40428931/package-for-listing-version-of-packages-used-in-a-jupyter-notebook
print('\n'.join(f'{m.__name__}=={m.__version__}' \
                for m in globals().values() if getattr(m, '__version__', None)))

%run functions/enrichment.py.ipynb import calculate_enrichment

pandas==0.25.3
plotnine==0.6.0


Input data

In [2]:
site_734_counts = 'results/site_734/site_734_counts.csv'

Output data

In [3]:
site_734_dir = 'results/site_734/'

Notebook aesthetics

In [4]:
p9.theme_set(p9.theme_classic())
CBPALETTE_RICH = ['#648FFF', '#FFB000', '#DC267F', '#785EF0', '#FE6100']

## Load data
Load site 734 frequencies

In [7]:
codon_counts = pd.read_csv(site_734_counts)
display(codon_counts)

Unnamed: 0,name,library,source,stage,sorted,site,wildtype,codon,count,letter
0,wt-plasmid-noStage-notSorted,wt,plasmid,noStage,notSorted,734,GAT,AAA,0,K
1,wt-gDNA-noStage-notSorted,wt,gDNA,noStage,notSorted,734,GAT,AAA,0,K
2,lib1-plasmid-noStage-notSorted,lib1,plasmid,noStage,notSorted,734,GAT,AAA,0,K
3,lib2-plasmid-noStage-notSorted,lib2,plasmid,noStage,notSorted,734,GAT,AAA,0,K
4,lib3-plasmid-noStage-notSorted,lib3,plasmid,noStage,notSorted,734,GAT,AAA,0,K
...,...,...,...,...,...,...,...,...,...,...
1083,lib2-gDNA-late-notSorted,lib2,gDNA,late,notSorted,734,GAT,TTT,0,F
1084,lib3-gDNA-late-notSorted,lib3,gDNA,late,notSorted,734,GAT,TTT,0,F
1085,lib1-gDNA-late-sorted,lib1,gDNA,late,sorted,734,GAT,TTT,0,F
1086,lib2-gDNA-late-sorted,lib2,gDNA,late,sorted,734,GAT,TTT,0,F


In [12]:
codon_counts['name'].unique()

array(['wt-plasmid-noStage-notSorted', 'wt-gDNA-noStage-notSorted',
       'lib1-plasmid-noStage-notSorted', 'lib2-plasmid-noStage-notSorted',
       'lib3-plasmid-noStage-notSorted', 'lib1-gDNA-early-notSorted',
       'lib2-gDNA-early-notSorted', 'lib3-gDNA-early-notSorted',
       'lib1-gDNA-mid-notSorted', 'lib2-gDNA-mid-notSorted',
       'lib3-gDNA-mid-notSorted', 'lib1-gDNA-late-notSorted',
       'lib2-gDNA-late-notSorted', 'lib3-gDNA-late-notSorted',
       'lib1-gDNA-late-sorted', 'lib2-gDNA-late-sorted',
       'lib3-gDNA-late-sorted'], dtype=object)

In [16]:
tolerance = calculate_enrichment(
    codon_counts,
    selected_sample='lib1-gDNA-early-notSorted',
    reference_sample='lib1-plasmid-noStage-notSorted')
display(tolerance)

tolerance_plot = (p9.ggplot(tolerance) +
                p9.aes(x='codon',
                       y='enrichment') +
                p9.geom_bar(stat='identity') +
                p9.ggtitle('Codon tolerance:\n'
                           f' v Mut_Plasmid') +
                p9.labs(x='codon',
                        y='enrichment') +
                p9.theme(figure_size=(6, 2),
                         plot_title=p9.element_text(size=10),
                         axis_title=p9.element_text(size=9),
                         axis_text_x=p9.element_text(size=8, rotation=90),
                         axis_text_y=p9.element_text(size=8))
               )

display(tolerance_plot)

Unnamed: 0,codon,letter,count_selected,count_pseudo_selected,count_reference,count_pseudo_reference,enrichment
0,AAA,K,0,0.1,0,0.1,0.040512
1,AAC,N,82,82.1,129,129.1,-0.612523
2,AAG,K,88,88.1,64,64.1,0.499330
3,AAT,N,75,75.1,28,28.1,1.458755
4,ACA,T,0,0.1,0,0.1,0.040512
...,...,...,...,...,...,...,...
59,TGT,C,0,0.1,0,0.1,0.040512
60,TTA,L,0,0.1,0,0.1,0.040512
61,TTC,F,42,42.1,65,65.1,-0.588325
62,TTG,L,0,0.1,0,0.1,0.040512


NameError: name 'selected_sample' is not defined