### Figure 2B - Mixture Capsule vs Lysate Bsub fraction

Here we plot the data looking at the gene by gene breakdown of Bsub genes versus E coli genes for the Mixture capsule sample and the mixture lysate. 

In [8]:
import pandas as pd
import os
from bokeh.io import push_notebook, show, output_notebook, export_svg
output_notebook()
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import Scatter
import datetime

First we will read our data frames into a file:

In [9]:
df_dir = '../../../Processed Data Files/dataframe files/230117Li/'
prefix = '230117Li_Mixing_'
df_dict = dict()
names = []
# Fetch all the relevant dataframe files and put them in a useable format:
for file in os.listdir(df_dir):
    if file.endswith('_dataframe.txt') and file.startswith(prefix):
        name = file[file.find(prefix)+len(prefix):]
        name = name[:name.find('_1')]
        new_df  = pd.read_csv(''.join([df_dir,file]))
        new_df['Length'] = new_df['Stop'] - new_df['Start']
        df_dict[name] = new_df
        names.append(name)
print(f'\nHere is the list of unique samples for which there are dataframe files: \n\n{names}\n')

# a name dictionary for converting between the datafram name and a form nicer for plots:
names_dict = {'Ecoli_Cap':'E. coli Capsule', 'Mix_Cap':'Mixture Capsule', 'Ecoli_Lysate':'E. coli Lysate', 
              'Mix_Lysate':'Mixture Lysate', 'Bsub_Lysate':'B. subtilis Lysate'}


Here is the list of unique samples for which there are dataframe files: 

['Bsub_Lysate', 'Ecoli_Cap', 'Ecoli_Lysate', 'Mix_Cap', 'Mix_Lysate']



Some helper functions to help us with the plotting:

In [10]:
def into_RPM(df_dict, name):
    '''This felper function gets the dataframe for a particular experiment and converts its reads into RPM'''
    sample_df = df_dict[name].copy()
    sample_df = sample_df.rename(columns = {'Counts':name})
    sample_df[name] = sample_df[name]/(sum(sample_df[name])/1000000) #RPM]
    
    return sample_df

Lets get the data we need in order to look at the comparative abundance between the two capsule samples.  This notebook will output both the an interactive plot below and an svg image of the plot.  In order for the svg export to work you need to have selenium and the firefox/geckodriver libraries installed via conda.  Feel free to comment these parts of the code out if all you want is the interactive plot. 

In [39]:
x = 'Bsub_Lysate' #b sub lysate
y1 = 'Mix_Lysate' # capsule - mixture sample
y2 = 'Mix_Cap' # mixture - Lysate

y1_df = into_RPM(df_dict, y1)
y2_df = into_RPM(df_dict, y2)
x_df = into_RPM(df_dict, x)

plot1_df = x_df.merge(y1_df)
plot1_df.drop(plot1_df[plot1_df['Name'] == 'lacI'].index, inplace = True) # currently dropping any gene without reads in both
plot1_df.drop(plot1_df[plot1_df['Name'] == 'Capsule'].index, inplace = True) # currently dropping capsule reads from plot

plot2_df = x_df.merge(y2_df)
plot2_df.drop(plot2_df[plot2_df['Name'] == 'lacI'].index, inplace = True) # currently dropping any gene without reads in both
plot2_df.drop(plot2_df[plot2_df['Name'] == 'Capsule'].index, inplace = True) # currently dropping capsule reads from plot

p1 = figure(
    aspect_scale = 1, width = 460, height = 350,
    output_backend = "svg", tooltips = [("Gene", "@Name")]
)
p1.circle(x=x, y=y1, source=plot1_df.loc[plot1_df['Genome'] == "bsubtilis"],
          size=10, fill_alpha=0.8, line_alpha=0, color="coral", legend_label = "B. subtilis")
p1.circle(x=x, y=y1, source = plot1_df.loc[plot1_df['Genome'] == "ecoli"],
          size=10, fill_alpha=0.8, line_alpha=0, fill_color="darkseagreen",
        legend_label = "E. coli")


p2 = figure(
    aspect_scale = 1, width = 460, height = 350,
    output_backend = "svg", tooltips = [("Gene", "@Name")]
)
p2.circle(x=x, y=y2, source=plot2_df.loc[plot2_df['Genome'] == "bsubtilis"],
          size=10, fill_alpha=0.8, line_alpha=0, color="coral", legend_label = "B. subtilis")
p2.circle(x=x, y=y2, source=plot2_df.loc[plot2_df['Genome'] == "ecoli"],
          size=10, fill_alpha=0.8, line_alpha=0, fill_color="darkseagreen",
           legend_label = "E. coli")

#configure title appearance:
#p1.title = "Species Composition of RNAs"
#p2.title.visible = False
#p1.title.text_font = "arial"
#p1.title.text_font_size = "22px"
#p1.title.align = "center"
#1.title.text_font_style = "normal"#

p1.xaxis.visible = False
p1.yaxis.axis_label = "Total Lysate RNA Levels (RPM)"
p2.yaxis.axis_label = "MTCs RNA Levels (RPM)"
p2.xaxis.axis_label = "B. sub Lysate RNA Levels RPM"

p1.xaxis.ticker = [20000, 40000, 60000]
p2.xaxis.ticker = [20000, 40000, 60000]

p1.yaxis.ticker = [1000, 3000, 5000]
p2.yaxis.ticker = [1000, 3000, 5000]

p1.axis.major_label_text_font = "arial"
p1.axis.major_label_text_font_style = "normal"
p1.axis.major_label_text_font_size = "20px"
p1.axis.axis_label_text_font = "arial"
p1.axis.axis_label_text_font_style = "normal"
p1.axis.axis_label_text_font_size = "20px"
p1.legend.location = (140, 35)
p1.legend.label_text_font_size = '18pt'
p1.legend.glyph_width = 30
p1.legend.glyph_height = 30
p1.legend.label_text_font_style = "italic"

p2.axis.major_label_text_font = "arial"
p2.axis.major_label_text_font_style = "normal"
p2.axis.major_label_text_font_size = "20px"
p2.axis.axis_label_text_font = "arial"
p2.axis.axis_label_text_font_style = "normal"
p2.axis.axis_label_text_font_size = "22px"
p2.legend.location = (140,35)
p2.legend.label_text_font_size = '18pt'
p2.legend.glyph_width = 30
p2.legend.glyph_height = 30
p2.legend.label_text_font_style = "italic"


p3 = column(p1, p2)


show(p3)
export_svg(p3, filename = f'./2B_{datetime.date.today()}.svg')

['./2B_2023-07-21.svg']