In [1]:
import numpy as np
import pandas as pd

from bokeh.io import push_notebook, show, output_notebook, output_file
from bokeh.layouts import row
from bokeh.plotting import figure, output_file
from bokeh.models import HoverTool, Jitter, ColumnDataSource
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap
output_notebook()

In [2]:
df = pd.read_csv('car-clean-mpg.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg             392 non-null float64
cylinders       392 non-null int64
displacement    392 non-null float64
horsepower      392 non-null float64
weight          392 non-null float64
acceleration    392 non-null float64
model_year      392 non-null int64
origin          392 non-null int64
car_name        392 non-null object
dtypes: float64(5), int64(3), object(1)
memory usage: 27.6+ KB


In [4]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,car_name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [None]:
plot = figure(plot_width=300, plot_height=300)
plot.hbar(y=df['cylinders'], height=0.5, left=0, right=df['mpg'], color="teal")
plot.xaxis.axis_label='MPG'
plot.yaxis.axis_label='Number of Cylinders'

show(plot)

output_file('cylindersVMPG.html')

In [None]:
# same plot as above but playing with grouping classes to try to 
# accomodate categorical variables

df.cyl = df.cylinders.astype(str)
group = df.groupby('cylinders')

source = ColumnDataSource(group)

# cyl_cmap = factor_cmap('cylinders', factors=sorted(df.cyl.unique()))

plot = figure(plot_height=300, title='MPG by Number of Cylinders')
plot.hbar(y=df['cylinders'], height=1, left=0, right=df['mpg'], color="teal")
plot.xaxis.axis_label='MPG'
plot.yaxis.axis_label='Number of Cylinders'

show(plot)

In [None]:
colormap = {1: "red", 2: "blue", 3: "green"}
colors = [colormap[x] for x in df['origin']]

hover = HoverTool(tooltips=[
    ("origin", '{}'.format(df['origin'])),
    ("MPG", '{}'.format(df['mpg'])),
    ('description', '{}'.format(dict(df['car_name'])))
])

p = figure(title = 'MPG by Origin', tools=[hover])
p.xaxis.axis_label = 'MPG'
p.yaxis.axis_label = 'Origin'

p.circle(df['mpg'], df['origin'], color=colors, fill_alpha=0.2, size=10)

show(p)

In [17]:
# Same as above but with jitter and info on hover
colors = ['red', 'skyblue', 'green']

# df.cylinders = df.cylinders.astype(str)
# df.mpg = df.mpg.astype(str)
# df.car_name = df.car_name.astype(str)

# source = ColumnDataSource(df)

p = figure(title = 'MPG by Origin')
p.xaxis.axis_label = 'MPG'
p.yaxis.axis_label = 'Origin'

for i, origin in enumerate(list(df.origin.unique())):
    x = df[df['origin'] == origin]['mpg']
    color = colors[i % len(colors)]

    p.circle(x=x, y={'value': origin, 'transform': Jitter(width=.2)}, color=color)

p.tags = ["cheese", 'bleu']
p.add_tools(HoverTool(tooltips=[('x','@x'), ('cheese', p.select(tags))]))

# output_file('mpgVOrigin.html')
    
show(p)

ValueError: expected an element of either String or List(Tuple(String, String)), got [('x', '@x'), ('cheese', [Figure(id='d3e1df02-5f3a-4f7c-a311-e88dfcd4e39e', ...)])]

In [None]:
df.cylinders = df.cylinders.astype(str)
df.model_year = df.model_year.astype(str)

group = df.groupby(('cylinders', 'car_name'))

source = ColumnDataSource(group)
# index_cmap = factor_cmap('cylinders_car_name', palette=Spectral5, factors=sorted(df.cylinders.unique()), end=1)

p = figure(plot_width=2500, title='Mean MPG by Number of Cylinders and Car Name', x_range=group)

p.vbar(x='cylinders_car_name', top='mpg_mean', width=1, source=source,
      line_color='white', )

p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Car Name grouped by Number of Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None

p.add_tools(HoverTool(tooltips=[("mpg", '@mpg_mean'), ('cylinders, car_name', '@cylinders_car_name')]))

show(p)