This notebook demonstrates some code I wrote to handle data from the Deniz lab nanodrop.

In [1]:
import glob
import pandas as pd
import numpy as np

import bokeh.plotting
import bokeh.io

bokeh.io.output_notebook()

import phase_diagram_analysis as pda
import bokeh_scatter

In [2]:
file_list = glob.glob("data_copy/*.tsv")

This is what the data looks like straight off the instrument:

In [3]:
pd.read_csv(file_list[0], sep="\t").head(15)

Unnamed: 0,#,Sample ID,User name,Date and Time,1 (nm),1 (Abs),2 (nm),2 (Abs),3 (nm),3 (Abs),...,5 (nm),5 (Abs),6 (nm),6 (Abs),7 (nm),7 (Abs),8 (nm),8 (Abs),9 (nm),9 (Abs)
0,1,buffer,Deniz Lab,6/5/2019 12:34:11 PM,350,-0.002,600,0.001,,,...,,,,,,,,,,
1,2,buffer,Deniz Lab,6/5/2019 12:35:00 PM,350,0.002,600,0.0,,,...,,,,,,,,,,
2,3,buffer,Deniz Lab,6/5/2019 12:34:49 PM,350,0.002,600,0.001,,,...,,,,,,,,,,
3,4,buffer,Deniz Lab,6/5/2019 1:18:36 PM,350,0.002,600,0.002,,,...,,,,,,,,,,
4,5,buffer,Deniz Lab,6/5/2019 1:18:45 PM,350,-0.002,600,0.0,,,...,,,,,,,,,,
5,6,buffer,Deniz Lab,6/5/2019 1:18:53 PM,350,-0.002,600,-0.002,,,...,,,,,,,,,,
6,7,buffer,Deniz Lab,6/5/2019 2:00:04 PM,350,0.004,600,0.004,,,...,,,,,,,,,,
7,8,buffer,Deniz Lab,6/5/2019 1:59:46 PM,350,-0.001,600,0.001,,,...,,,,,,,,,,
8,9,buffer,Deniz Lab,6/5/2019 1:59:56 PM,350,0.002,600,0.003,,,...,,,,,,,,,,
9,10,buffer,Deniz Lab,6/5/2019 2:00:15 PM,350,0.003,600,0.003,,,...,,,,,,,,,,


Here's how I've tidied it:

In [4]:
df = pda.tidy_nanodrop_data(file_list)
df.head()

Dropped 101 buffer samples.


Unnamed: 0,Sample ID,Abs 350,Abs 600,Peptide,Peptide concentration (uM),RNA/Peptide Ratio,Date,Time
0,RG7_150_0,0.002,0.001,RG7,150.0,0.0,6/5/2019,2:02:29 PM
1,RG7_150_0.01,0.004,0.001,RG7,150.0,0.01,6/5/2019,2:04:08 PM
2,RG7_150_0,-0.001,0.002,RG7,150.0,0.0,6/5/2019,2:02:40 PM
3,RG7_150_0.01,0.007,0.003,RG7,150.0,0.01,6/5/2019,2:04:17 PM
4,RG7_150_0.025,0.019,0.005,RG7,150.0,0.025,6/5/2019,2:05:41 PM


In [5]:
allowed_dates = ["6/5/2019", "11/1/2019", "11/18/2019", "11/19/2019"]
filtered_df = df.loc[df["Date"].isin(allowed_dates)]

In [6]:
p = bokeh_scatter.scatter(
    data=filtered_df,
    cat="Peptide",
    x="RNA/Peptide Ratio",
    x_axis_type="log",
    y="Abs 350",
    height=350,
    width=600,
)

p.legend.location = "top_left"
    
bokeh.io.show(p)

In [7]:
plots = []
for peptide, group in filtered_df.groupby("Peptide"):
    p = bokeh_scatter.scatter(
        data=group,
        cat="Date",
        tooltips=[("Date", "@Date"),
                  ("Ratio", "@RNA/Peptide Ratio")],
        x="RNA/Peptide Ratio",
        x_axis_type="log",
        y="Abs 350",
        height=200,
        width=500,
        title=peptide,
    )
    
    p.legend.location = "top_left"
    
    plots.append(p)
    
bokeh.io.show(bokeh.layouts.column(plots))

In [8]:
no_outliers_df = pda.identify_outliers(filtered_df, "Abs 350")
no_outliers_df = no_outliers_df.loc[no_outliers_df["Outlier"] == False]

In [9]:
lower, upper = pda.find_outlier_bounds(filtered_df, "Abs 350", ParseKey=pda.parse_rna_peptide)

plots = []
for peptide, data in filtered_df.groupby("Peptide"):
    x1 = np.sort(filtered_df["RNA/Peptide Ratio"].unique())
    y1 = lower[peptide]
    y2 = upper[peptide]

    x=np.concatenate((x1, x1[::-1]))
    y=np.concatenate((y1, y2[::-1]))
    
    plot = bokeh.plotting.figure(x_axis_type="log", height=200, width=500, title=peptide)
        
    #this plots a gray area indicating the outlier bounds
    plot.patch(x=x, y=y, line_color="gray", fill_color="gray")
        
    p = bokeh_scatter.scatter(
        data=data,
        cat="Date",
        x="RNA/Peptide Ratio",
        y="Abs 350",
        p=plot,
        show_legend=False
    )
    plots.append(p)
    
bokeh.io.show(bokeh.layouts.grid(plots))

# outliers are calculated for each x value, which is why the grey areas are so spiky
# I really need more data to do this properly, but here's the proof of concept

In [10]:
# now with outliers removed

plots = []
for peptide, group in no_outliers_df.groupby("Peptide"):
    p = bokeh_scatter.scatter(
        data=group,
        cat="Date",
        tooltips=[("Date", "@Date"),
                  ("Ratio", "@RNA/Peptide Ratio")],
        x="RNA/Peptide Ratio",
        x_axis_type="log",
        y="Abs 350",
        height=200,
        width=500,
        title=peptide,
    )
    
    p.legend.location = "top_left"
    
    plots.append(p)
    
bokeh.io.show(bokeh.layouts.column(plots))