In [None]:
import pandas as pd
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np

%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
df = pd.read_csv('../data/baseballdb/core/Batting.csv')
df['AB'] = df['AB'].replace(0, np.nan)
df = df.dropna()
df['batting_avg'] = df['H'] / df['AB']
df = df[df['yearID'] >= 2016]
df = df.iloc[0:15]
df.head(5)

In [None]:
df

In [None]:
import theano.tensor as tt

with pm.Model() as baseball_model:
    
    phi = pm.Uniform('phi', lower=0.0, upper=1.0)
    kappa_log = pm.Exponential('kappa_log', lam=1.5)
    kappa = pm.Deterministic('kappa', tt.exp(kappa_log))

    thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=len(df))
    like = pm.Binomial('like', n=df['AB'], p=thetas, observed=df['H'])

In [None]:
with baseball_model:
    trace = pm.sample(2000, init='advi')

In [None]:
pm.traceplot(trace)

In [None]:
ylabels = df['AB'].astype(str) + ' ' + df['H'].astype('str')
pm.forestplot(trace, varnames=['thetas'], ylabels=ylabels)