Looking at plate effects.

In [1]:
# %load ../start.py
# Load useful extensions
import os
import sys

# Activate the autoreload extension for easy reloading of external packages
%reload_ext autoreload
%autoreload 1

# Set up cashdir
from ipycache import CacheMagics
CacheMagics.cachedir = '../cachedir'

# Trun on the water mark
%reload_ext watermark
%watermark -u -d -v -g

# Load ipycache extension
%reload_ext ipycache
from ipycache import CacheMagics
CacheMagics.cachedir = '../cachedir'

# Add project library to path
sys.path.insert(0, '../../lcdb-wf/lib')
sys.path.insert(0, '../../lib/python')

# Set up references
import yaml
with open('../../config/config.yml') as fh:
    config = yaml.load(fh)

assembly = config['assembly']
tag = config['aligner']['tag']
REF = os.path.join(os.environ['REFERENCES_DIR'], assembly, tag)


last updated: 2017-09-22 

CPython 3.5.2
IPython 6.1.0
Git hash: b31c213e0fbaedf9092be694730dd537cd996a08


In [2]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
stable = pd.read_csv('../../config/sampletable.tsv', sep='\t', index_col='samplename')

In [4]:
stable.head()

Unnamed: 0_level_0,SRX,BioSample,GEO,drsc,target_FBgn,target_symbol,drsc_rep,rep,plate_id,well_id,plate_row,plate_column
samplename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
SRR3486891,SRX1748887,SAMN04959321,GSM2145154,DRSC07681,FBgn0003396,shn,2,1,1,E8,E,8
SRR3486986,SRX1748983,SAMN04959673,GSM2145249,DRSC07681,FBgn0003396,shn,2,2,1,E8,E,8
SRR3487576,SRX1749576,SAMN04960293,GSM2145839,DRSC15627,FBgn0038742,Arc42,2,2,4,G4,G,4
SRR3487481,SRX1749480,SAMN04960137,GSM2145744,DRSC15627,FBgn0038742,Arc42,2,1,4,G4,G,4
SRR3486819,SRX1748815,SAMN04959150,GSM2145082,DRSC32770,FBgn0011715,Snr1,2,2,10,G7,G,7


In [5]:
libsize = pd.read_table('../../rnaseq-wf/data/aggregation/libsizes_table.tsv', index_col='sample')
libsize.rename(columns={
    'cutadapt.bam.libsize': 'cutadapt_bam_libsize',
    'cutadapt.fastq.libsize': 'cutadapt_fastq_libsize',
    'fastq.libsize': 'fastq_libsize',
}, inplace=True)

In [6]:
libsize.head()

Unnamed: 0_level_0,cutadapt_bam_libsize,cutadapt_fastq_libsize,fastq_libsize
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SRR3486644,6.498697,7.979129,8.0193
SRR3486645,4.516275,5.495366,5.514436
SRR3486646,5.873005,6.92508,6.957373
SRR3486647,5.850562,7.253668,7.279183
SRR3486648,5.796863,7.128077,7.1507


In [7]:
merged = stable.merge(libsize, left_index=True, right_index=True)
merged['plate_column'] = merged['plate_column'].astype(str)
merged.index.name = 'srr'
merged.reset_index(inplace=True)

In [8]:
merged.head()

Unnamed: 0,srr,SRX,BioSample,GEO,drsc,target_FBgn,target_symbol,drsc_rep,rep,plate_id,well_id,plate_row,plate_column,cutadapt_bam_libsize,cutadapt_fastq_libsize,fastq_libsize
0,SRR3486891,SRX1748887,SAMN04959321,GSM2145154,DRSC07681,FBgn0003396,shn,2,1,1,E8,E,8,3.24642,4.792072,5.462838
1,SRR3486986,SRX1748983,SAMN04959673,GSM2145249,DRSC07681,FBgn0003396,shn,2,2,1,E8,E,8,0.677074,1.168138,1.836914
2,SRR3487576,SRX1749576,SAMN04960293,GSM2145839,DRSC15627,FBgn0038742,Arc42,2,2,4,G4,G,4,6.137403,8.633944,8.801874
3,SRR3487481,SRX1749480,SAMN04960137,GSM2145744,DRSC15627,FBgn0038742,Arc42,2,1,4,G4,G,4,6.849205,8.372345,8.916935
4,SRR3486819,SRX1748815,SAMN04959150,GSM2145082,DRSC32770,FBgn0011715,Snr1,2,2,10,G7,G,7,7.44703,8.992069,9.023902


In [9]:
from bokeh.io import show, output_notebook
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
)
from bokeh.plotting import figure

In [10]:
output_notebook()

In [11]:
def plot_plates(pid, rep, title, lmin, lmax):
    plate = merged[(merged['plate_id'] == pid) & (merged['rep'] == rep)].copy()
    rows = list(plate['plate_row'].unique())
    rows = sorted(rows)
    cols = list(plate['plate_column'].unique())
    cols = sorted(cols, key=lambda x: int(x))

    colors = sns.color_palette("rocket", n_colors=10).as_hex()
    mapper = LinearColorMapper(palette=colors, low=lmin, high=lmax)

    TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

    p = figure(title=title, x_range=cols, y_range=list(reversed(rows)),
               x_axis_location='above', plot_width=400, plot_height=300,
               toolbar_location='below', tools=TOOLS
              )

    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "5pt"
    p.axis.major_label_standoff = 0

    # Plot circle for adding Hovertools
    source_all = ColumnDataSource(plate)
    p.circle("plate_column", "plate_row", radius=.4, source=source_all, 
             line_color=None, fill_color=None)

    # Plot circle for all drsc samples
    no_lacz = ColumnDataSource(plate[plate['target_symbol'] != 'LacZ'])
    p.circle("plate_column", "plate_row", radius=.4, source=no_lacz,
           fill_color={'field': 'fastq_libsize', 'transform': mapper}, name='bob'
          )

    # Plot square for all LacZ samples
    lacz = ColumnDataSource(plate[plate['target_symbol'] == 'LacZ'])
    p.rect("plate_column", "plate_row", width=.6, height=.6, source=lacz,
           fill_color={'field': 'fastq_libsize', 'transform': mapper},
          )

    color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
                         ticker=BasicTicker(desired_num_ticks=len(colors)),
                         formatter=PrintfTickFormatter(format="%d"),
                         label_standoff=6, border_line_color=None, location=(0, 0))

    p.add_layout(color_bar, 'right')

    p.select(HoverTool).tooltips = [
         ('SRR', '@srr'),
         ('DRSC', '@drsc'),
         ('Gene', '@target_symbol'),
    ]

    return p

In [12]:
from itertools import product
plots = []
for pid, rep in product(sorted(merged.plate_id.unique()), (1, 2)):
    plots.append(plot_plates(pid, rep, 'Plate {}: Rep{}'.format(pid, rep), 0, 20))

In [13]:
from bokeh.layouts import gridplot

In [14]:
from bokeh.io import output_file

In [15]:
output_file('libsize.html')
p = gridplot(plots, ncols=2)
show(p)

KeyboardInterrupt: 