In [None]:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import pandas as pd
import requests

In [None]:
N = 3000
x = np.random.random(size=N) * 100
y = np.random.random(size=N) * 100
radii = np.random.random(size=N) * 1.5
colors = ['#%02x%02x%02x' % (r, g, 150) for r, g in zip(np.floor(50+2*x).astype(np.int64), np.floor(30+2*y).astype(np.int64))]

In [None]:
output_notebook()

In [None]:
demo = figure()
demo.circle(x, y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)
show(demo)

In [None]:
url = 'https://raw.githubusercontent.com/derhuerst/mtcars/master/index.json'
data = requests.get(url).json()
df = pd.DataFrame(data)

weights = df['wt'].values
fuel_efficiency = df['mpg'].values

p1 = figure(title='Motor Trend Cars Dataset (1974) - Weight vs. Fuel Efficiency')
p1.xaxis.axis_label = 'Weight (x1000 lbs)'
p1.yaxis.axis_label = 'Miles Per Gallon'
p1.scatter(weights, fuel_efficiency, fill_color='blue', fill_alpha=0.6, size=12)
show(p1)

In [None]:
counts_by_cyl = df.groupby('cyl').size()
num_cylinders = list(map(str, counts_by_cyl.index.values))
counts = counts_by_cyl.values

p2 = figure(x_range=num_cylinders, plot_height=400, title="Count By Number of Cylinders")

p2.vbar(x=num_cylinders, top=counts, width=0.8, fill_color=['blue', 'orange', 'green'])

p2.xgrid.grid_line_color = None
p2.y_range.start = 0

show(p2)

In [None]:
avg_mpg_by_cyl = df.groupby('cyl').mpg.mean()
num_cylinders = list(map(str, avg_mpg_by_cyl.index.values))
averages = avg_mpg_by_cyl.values

p3 = figure(x_range=num_cylinders, plot_height=400, title="Average MPG By Number of Cylinders")

p3.vbar(x=num_cylinders, top=averages, width=0.8, fill_color=['blue', 'orange', 'green'])

p3.xgrid.grid_line_color = None
p3.y_range.start = 0

show(p3)

In [None]:
# Find the quartiles and IQR for each category
df['vs'] = df['vs'].astype(str)
groups = df.groupby('vs')
q1 = groups[['wt']].quantile(q=0.25)
q2 = groups[['wt']].quantile(q=0.5)
q3 = groups[['wt']].quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5*iqr
lower = q1 - 1.5*iqr

# Find the outliers for each category
def outliers(group):
    cat = group.name
    return group[(group.wt > upper.loc[cat][0]) | (group.wt < lower.loc[cat][0])]['wt']
out = groups.apply(outliers).dropna()

# Prepare outlier data for plotting, we need coordinate for every outlier.
outx = []
outy = []
cats = list(groups.groups.keys())
for cat in cats:
    # only add outliers if they exist
    try:
        if not out.loc[cat].empty:
            for value in out[cat]:
                outx.append(cat)
                outy.append(value)
    except KeyError:
        continue
        
p4 = figure(x_range=cats, title='Comparison of Weight for V-Shaped [0] and Straight Engines [1]')

# If no outliers, shrink lengths of stems to be no longer than the minimums or maximums
qmin = groups[['wt']].quantile(q=0.00)
qmax = groups[['wt']].quantile(q=1.00)
upper.wt = [min([x,y]) for (x,y) in zip(list(qmax.iloc[:,0]),upper.wt) ]
lower.wt = [max([x,y]) for (x,y) in zip(list(qmin.iloc[:,0]),lower.wt) ]

# stems
p4.segment(cats, upper.wt, cats, q3.wt, line_width=2, line_color="black")
p4.segment(cats, lower.wt, cats, q1.wt, line_width=2, line_color="black")

# boxes
p4.rect(cats, (q3.wt+q2.wt)/2, 0.7, q3.wt-q2.wt, fill_color="#E08E79", line_width=2, line_color="black")
p4.rect(cats, (q2.wt+q1.wt)/2, 0.7, q2.wt-q1.wt, fill_color="#3B8686", line_width=2, line_color="black")

# whiskers (almost-0 height rects simpler than segments)
p4.rect(cats, lower.wt, 0.2, 0.01, line_color="black")
p4.rect(cats, upper.wt, 0.2, 0.01, line_color="black")

# outliers
p4.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6)

p4.xgrid.grid_line_color = None
p4.ygrid.grid_line_color = "white"
p4.grid.grid_line_width = 2
p4.xaxis.major_label_text_font_size="12pt"
p4.yaxis.axis_label = 'Weight (x1000 lbs)'

show(p4)