In [1]:
# load data with pandas
import pandas as pd

path_to_data = "../data/elife-31098-supp1-v2.xls"
df = pd.read_excel(path_to_data)

In [2]:
# inspect data with head method
df.head()

Unnamed: 0,Gene,log2 Fold Change (H/L) (KRASG12V/Empty Vector),p-value
0,ABCC1,0.649462,0.015625
1,ABCC3,1.597891,0.25
2,ACP2,-1.499113,0.5
3,ACSL3,-1.279971,0.5
4,ADAM10,-0.065187,1.0


In [3]:
# load bokeh dependencies for the volcano plot 
from bokeh.plotting import figure
from bokeh.layouts import layout
from bokeh.embed import components
from bokeh.io import show
from bokeh.io import output_notebook

In [4]:
# set up notebok environment to render bokeh plots inline
output_notebook()

In [5]:
# create the volcano plot

# we'll need numpy for the log transform
import numpy as np

# initialize figure object
p = figure(plot_width=400, plot_height=400)

# add the points as a circle glyph
p.circle(x=df['log2 Fold Change (H/L) (KRASG12V/Empty Vector)'],
         y=-np.log10(df['p-value']))

# format the axis labels
p.xaxis.axis_label = "log2(H/L) (KRAS-G12V/EV)" #TODO: Include conditions
p.yaxis.axis_label = "-log10(p-value)"

# render the plot
show(p)

In [6]:
# add colors

# initialize an empty list to store the color information
color_list = []

# iterate through the pandas dataframe and assign a color based on the criteria
for index, row in df.iterrows():
    
    FoldChange = row['log2 Fold Change (H/L) (KRASG12V/Empty Vector)']
    p_val = row['p-value']
    
    # significantly upregulated proteins
    if FoldChange >= 1 and p_val <= 0.01:
        color_list.append("blue")
    
    # significantly upregulated proteins
    elif FoldChange <= -1 and p_val <= 0.01:
        color_list.append("red")
        
    # all other proteins 
    else:
        color_list.append("black")
    
# add this list to your dataframe and then inspect with head()
df['color'] = color_list
df.head(n=10)

Unnamed: 0,Gene,log2 Fold Change (H/L) (KRASG12V/Empty Vector),p-value,color
0,ABCC1,0.649462,0.015625,black
1,ABCC3,1.597891,0.25,black
2,ACP2,-1.499113,0.5,black
3,ACSL3,-1.279971,0.5,black
4,ADAM10,-0.065187,1.0,black
5,ADAM15,0.404062,0.625,black
6,ADAM17,0.11709,0.46875,black
7,ALCAM,0.019856,0.002159035,black
8,ANO6,0.431644,0.25,black
9,ANPEP,2.082598,5.87e-22,blue


In [7]:
# initialize figure object
p = figure(plot_width=400, plot_height=400)

# add the points as a circle glyph
p.circle(x=df['log2 Fold Change (H/L) (KRASG12V/Empty Vector)'],
         y=-np.log10(df['p-value']),
        color=color_list,
        line_color='black')

# format the axis labels
p.xaxis.axis_label = "log2(H/L) (KRAS-G12V/EV)" #TODO: Include conditions
p.yaxis.axis_label = "-log10(p-value)"

show(p)

In [8]:
# add horizontal and vertical cutoffs using spans 
#(http://bokeh.pydata.org/en/latest/docs/user_guide/annotations.html#spans)

from bokeh.models import Span

# p-value cutoff of 0.01 --> -log10(0.01) = 2
hline = Span(location=2, dimension='width', 
             line_color='black', 
             line_width=1,
             line_dash='dashed')

# cutoff for a negative 2-fold change
vline_left = Span(location=-1, dimension='height', 
             line_color='black', 
             line_width=1,
             line_dash='dashed')

# cutoff for a positive 2-fold change
vline_right = Span(location=1, dimension='height', 
             line_color='black', 
             line_width=1,
             line_dash='dashed')

# update the figure object's attributes
p.renderers.extend([hline,vline_left,vline_right])

# show the updated image
show(p)

In [9]:
# add tooltips

from bokeh.models import ColumnDataSource

# prepare the source data object
source = ColumnDataSource(data=dict(

    x=df['log2 Fold Change (H/L) (KRASG12V/Empty Vector)'],
    y=-np.log10(df['p-value']),
    Gene=df['Gene'] ,
    color=df['color']

))

# Define tooltips
TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("Gene", "@Gene"),

]

# initialize figure object
p = figure(plot_width=400, plot_height=400, tooltips=TOOLTIPS)

# add the points as a circle glyph
p.circle(x='x',
         y='y',
         color='color',
         line_color='black',
         source=source)
        
show(p)

In [10]:
df['p-value'].hist()

<matplotlib.axes._subplots.AxesSubplot at 0x7f8e6170ec18>

In [11]:
# multiple hypothesis correction
from statsmodels.stats.multitest import multipletests

passes_multiple_testing = multipletests(df['p-value'], alpha=0.01, method='fdr_bh')[0]
corrected_p_vals = multipletests(df['p-value'], alpha=0.01, method='fdr_bh')[1]

df['passes_multiple_testing'] = passes_multiple_testing
df['corrected_p-values'] = corrected_p_vals
df.head()

Unnamed: 0,Gene,log2 Fold Change (H/L) (KRASG12V/Empty Vector),p-value,color,passes_multiple_testing,corrected_p-values
0,ABCC1,0.649462,0.015625,black,False,0.047743
1,ABCC3,1.597891,0.25,black,False,0.333333
2,ACP2,-1.499113,0.5,black,False,0.55
3,ACSL3,-1.279971,0.5,black,False,0.55
4,ADAM10,-0.065187,1.0,black,False,1.0


In [12]:
# update tool tips 
# add tooltips

from bokeh.models import ColumnDataSource

# prepare the source data object
source = ColumnDataSource(data=dict(

    x=df['log2 Fold Change (H/L) (KRASG12V/Empty Vector)'],
    y=-np.log10(df['p-value']),
    Gene=df['Gene'] ,
    color=df['color'],
    p_vals=df['p-value'],
    corrected_p_vals=df['corrected_p-values']

))

# Define tooltips
TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("Gene", "@Gene"),
    ("p-value", "@p_vals"),
    ("corrected p-values", "@corrected_p_vals")

]

# initialize figure object
p = figure(plot_width=400, plot_height=400, tooltips=TOOLTIPS)

# add the points as a circle glyph
p.circle(x='x',
         y='y',
         color='color',
         line_color='black',
         source=source)

# format the axis labels
p.xaxis.axis_label = "log2(H/L) (KRAS-G12V/EV)" #TODO: Include conditions
p.yaxis.axis_label = "-log10(p-value)"

# update the figure object's attributes
p.renderers.extend([hline,vline_left,vline_right])
        
show(p)

In [13]:
# hide grid lines
p.xgrid.visible = False
p.ygrid.visible = False

# remove minor ticks
p.xaxis.minor_tick_line_color = None
p.yaxis.minor_tick_line_color = None

# remove border
p.outline_line_color = None

# format and save plot as a vector graphic
p.output_backend = "svg"
p.toolbar.logo = None

show(p)