In [1]:
import numpy as np
import pandas as pd 

import bokeh_catplot

import bokeh.plotting
import bokeh.io

bokeh.io.output_notebook()

In [2]:
df = pd.read_csv('data/frog_tongue_adhesion.csv',comment='#')

df.head()

Unnamed: 0,date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)
0,2013_02_26,I,3,1205,46,1.95,-785,884,1.27,-0.29,387,70,0.82,3117,-2030
1,2013_02_26,I,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695
2,2013_03_01,I,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239
3,2013_03_01,I,2,1556,41,2.51,-455,1025,0.74,-0.17,330,158,0.52,4718,-1381
4,2013_03_01,I,3,493,36,0.8,-974,499,1.57,-0.423,245,216,0.12,2012,-3975


In [6]:
df_mean = df.groupby('ID')['impact force (mN)'].mean().reset_index()

df_mean

Unnamed: 0,ID,impact force (mN)
0,I,1530.2
1,II,707.35
2,III,550.1
3,IV,419.1


In [18]:
# set up canvas 
p = bokeh.plotting.figure(
    frame_height=100,
    frame_width=300,
    x_axis_label='impact force (mN)',
    y_range=df_mean['ID'][::-1]
)

#horizontal bar graph; set up glyphs 
p.hbar(
    source=df_mean,
    y='ID',
    right='impact force (mN)',
    height=0.6
)

# turn off horizontal grid lines
p.ygrid.grid_line_color = None
p.x_range.start = 0
p.toolbar_location = 'above'

bokeh.io.show(p)

We just took the mean and plotting that one value. but we dont know the distribution of the data. we only know the mean by making a bar graph

Goal of bokah catplot is to show how the data is distributed

# Use bokeh catplot for categorical variables 

In [20]:
p = bokeh_catplot.box(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
)

bokeh.io.show(p)

In [21]:
p = bokeh_catplot.strip(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
)

bokeh.io.show(p)

In [22]:
p = bokeh_catplot.histogram(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
)

bokeh.io.show(p)

In [30]:
x = np.random.normal(0,1,size=10000)

p = bokeh_catplot.histogram(
    data=x,
    density=True
)

bokeh.io.show(p)

CDF tells you what the probability of observing a value below 

CDF is the antiderivative of the PDF

ECDF = fraction of the datapoints we observed ≤ x

In [31]:
p = bokeh_catplot.ecdf(
    data=x,
)

bokeh.io.show(p)

In [33]:
p = bokeh_catplot.ecdf(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
)

bokeh.io.show(p)

In [34]:
p = bokeh_catplot.ecdf(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
    style='staircase'
)

bokeh.io.show(p)

In [35]:
p = bokeh_catplot.ecdf(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
    p=p, # add onto the plot that already exists ^
)

bokeh.io.show(p)

In [36]:
p = bokeh_catplot.box(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
    horizontal=True,
    whisker_caps=True,
    outlier_marker='diamond',
    box_kwargs=
)

bokeh.io.show(p)

In [38]:
p = bokeh_catplot.strip(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
    horizontal=True,
    marker='dash',
    marker_kwargs=dict(alpha=0.5),
    frame_height=150
)

p.toolbar_location='above'
bokeh.io.show(p)

In [41]:
p = bokeh_catplot.strip(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
    horizontal=True,
    jitter=True,
    frame_height=150,
    tooltips=[
        ('trial','@{trial number}'),
        ('adh force', '@{adhesive force (mN)}')
    ]
)

p.toolbar_location='above'
bokeh.io.show(p)

Group by two categorical vars

In [44]:
p = bokeh_catplot.strip(
    data=df,
    cats=['ID', 'trial number'],# categorical var
    val='impact force (mN)',
    horizontal=True,
    frame_height=350,
    color_column='trial number'
)

p.toolbar_location='above'
bokeh.io.show(p)

In [53]:
p = bokeh_catplot.strip(
    data=df,
    cats=['ID'],# categorical var
    val='impact force (mN)',
    horizontal=True,
    frame_height=250,
    jitter=True
)

p.toolbar_location='above'
bokeh.io.show(p)

In [54]:
p = bokeh_catplot.box(
    data=df,
    cats=['ID'],# categorical var
    val='impact force (mN)',
    horizontal=True,
    frame_height=200,
    display_points=False,
    box_kwargs=dict(fill_color=None, line_color='gray'),
    median_kwargs=dict(line_color='gray'),
    whisker_kwargs=dict(line_color='gray'),
    p=p
)

p.toolbar_location='above'
bokeh.io.show(p)

In [50]:
p = bokeh_catplot.ecdf(
    data=df,
    cats='ID', # categorical var
    val='impact force (mN)',
    kind='colored', # add onto the plot that already exists ^
)

bokeh.io.show(p)