<hr />

# Baseball App Example

In this example we use Blaze and Bokeh to explore the Lahman Baseball Statistics database.



In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import *
output_notebook()

In [None]:
db = bz.Data('sqlite:///lahman2013.sqlite')
db.dshape

In [None]:
list(db.Salaries.teamID.distinct())

In [None]:
r = bz.compute(db.Salaries["teamID"].distinct())
odo(r, pd.DataFrame)

In [None]:
result = bz.by(db.Salaries.teamID, avg=db.Salaries.salary.mean(), 
                                   max=db.Salaries.salary.max(), 
                                   ratio=db.Salaries.salary.max() / db.Salaries.salary.min()
                ).sort('ratio', ascending=False)
df = odo(result, pd.DataFrame)

In [None]:
df.head()

In [None]:
df = df.sort('avg')
source = ColumnDataSource(df)
p = figure(x_range=list(df["teamID"]))
p.scatter(x="teamID", y="avg", source=source)
show(p)

Hmm, can't read the y axis very well...

In [None]:
df = df.sort('avg')
source = ColumnDataSource(df)
p = figure(x_range=list(df["teamID"]))
p.scatter(x="teamID", y="avg", source=source)
p.xaxis.major_label_orientation = np.pi/3

show(p)

Let's view a max versus ratio

In [None]:
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,lasso_select"

df = df.sort('avg')
source = ColumnDataSource(df)
s1 = figure(title="Pay Avg",x_range=source.data["teamID"], tools=TOOLS, width=500)
s1.scatter(x="teamID", y="avg", source=source)
s1.xaxis.major_label_orientation = np.pi/3

s2 = figure(title="Pay Ratio", x_range=s1.x_range, tools=TOOLS, width=500)
s2.scatter(x="teamID", y="ratio", source=source)
s2.xaxis.major_label_orientation = np.pi/3

p = gridplot([[s1, s2]])
show(p)

<hr/>

Now let's join on the AllStars table to see how max salaries and all star count correlate.

In [None]:
result = bz.by(db.AllstarFull.teamID, all_stars=db.AllstarFull.playerID.count()
                ).sort('all_stars', ascending=False)
r = bz.Data(odo(result, pd.DataFrame))
m = odo(r, pd.DataFrame)["all_stars"].max()
print "max number of all stars from a single team:", m

print "normalized list of all_stars:\n", bz.compute((r.all_stars / m).head())

# Now let's use this as the size of the circles in the scatter plot
df1 = odo(r, pd.DataFrame)
df1['all_stars'] /= (df1['all_stars'].max() / 10)
df1['all_stars'] += 10

<hr/>

Now lets join the data to all_star sizes

In [None]:
r = bz.join(bz.Data(df1), bz.Data(df), 'teamID')
r.head()

In [None]:
df_j = odo(r, pd.DataFrame)
df_j = df_j.sort("max")
print df_j.head()
source = odo(df_j, ColumnDataSource)
p = figure(x_range=list(df_j["teamID"]))
p.scatter(x="teamID", y="max", size="all_stars", source=source, fill_alpha=0.5, )
p.xaxis.major_label_orientation = np.pi/3

show(p)

<hr/>

Now let's make this an interactive plot!

In [None]:
def compute_df(year=2012):
    result = db.Salaries[ db.Salaries.yearID==year ]
    result = bz.Data(odo(result, pd.DataFrame))
    result = bz.by(result.teamID, max=result.salary.max()).sort('max', ascending=False)
    df = odo(result, pd.DataFrame)
    asf_year = db.AllstarFull[ db.AllstarFull.yearID==year]
    result = bz.by(asf_year.teamID, all_stars=db.AllstarFull.playerID.count()
                    ).sort('all_stars', ascending=False)
    r = bz.Data(odo(result, pd.DataFrame))
    df1 = odo(r, pd.DataFrame)
    df1['all_stars'] /= (df1['all_stars'].max() / 10)
    df1['all_stars'] += 10
    r = bz.join(bz.Data(df1), bz.Data(df), 'teamID')
    df_j = odo(r, pd.DataFrame)
    df_j = df_j.sort("max")
    return df_j

source = odo(compute_df(), ColumnDataSource)

p = figure(x_range=list(source.data["teamID"]))
p.scatter(x="teamID", y="max", size="all_stars", source=source, fill_alpha=0.5, )
p.xaxis.major_label_orientation = np.pi/3

In [None]:
from IPython.html.widgets import interact, IntSliderWidget

def update(year):
    df = compute_df(year)
    source.data['all_stars'] = df['all_stars']
    source.data['max'] = df['max']
    source.push_notebook()
#interact(update, year=(1980, 2013))
interact(update, year=IntSliderWidget(min=1985, max=2013, value=2013))

In [None]:
show(p)