Skip to content

Commit

Permalink
Merge pull request #19 from bambinos/default_priors
Browse files Browse the repository at this point in the history
Default priors
  • Loading branch information
tyarkoni committed Aug 25, 2016
2 parents 58df44c + 4d5573b commit a6b4bed
Show file tree
Hide file tree
Showing 4 changed files with 424 additions and 118 deletions.
86 changes: 48 additions & 38 deletions bambi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from collections import OrderedDict, defaultdict
from bambi.utils import listify
from patsy import dmatrices, dmatrix
import statsmodels.api as sm
import warnings
from bambi.priors import PriorFactory
from copy import deepcopy
Expand Down Expand Up @@ -60,22 +59,35 @@ def build(self):
raise ValueError("No outcome (y) variable is set! Please call "
"set_y() before build() or fit().")

# compute and store information used to set the default priors
# X = fixed effects design matrix
# R2X = 1 - 1/VIF for each x, i.e., R2 for predicting each x from all other x's
# R2Y = R2 for predicting y from all x's *other than* the current x
# compute information used to set the default priors
# X = fixed effects design matrix (excluding intercept/constant term)
# r2_x = 1 - 1/VIF for each x, i.e., R2 for predicting each x from all other x's
# r2_y = R2 for predicting y from all x's *other than* the current x
X = pd.concat([pd.DataFrame(x.data, columns=x.levels) for x in self.terms.values()
if x.type_=='fixed' and x.name != 'Intercept'], axis=1)
self.R2X = pd.Series({x:sm.OLS(X[x], sm.add_constant(X.drop(x, axis=1))).fit().rsquared
for x in list(X.columns)})
self.R2Y = pd.Series({x:sm.OLS(self.y.data, sm.add_constant(X.drop(x, axis=1))).fit().rsquared
for x in list(X.columns)})
self.SDX = X.std()
self.SDY = self.y.data.std()
self.meanX = X.mean(0)
default_prior_info = {
'r2_x':pd.Series({x:pd.stats.api.ols(y=X[x], x=X.drop(x, axis=1)).r2
for x in list(X.columns)}),
'r2_y':pd.Series({x:pd.stats.api.ols(y=self.y.data.squeeze(), x=X.drop(x, axis=1)).r2
for x in list(X.columns)}),
'sd_x':X.std(),
'sd_y':self.y.data.std(),
'mean_x':X.mean(axis=0),
'mean_y':self.y.data.mean()
}

# save some info possibly useful for diagnostics, and send to ModelResults
# mat = correlation matrix of X, w/ diagonal replaced by X means
mat = X.corr()
for x in list(mat.columns): mat.loc[x,x] = default_prior_info['mean_x'][x]
self._diagnostics = {
# the Variance Inflation Factors (VIF), which is possibly useful for diagnostics
'VIF':1/(1 - default_prior_info['r2_x']),
'corr_mean_X':mat
}

for t in self.terms.values():
t._setup()
t._setup(model=self, default_prior_info=default_prior_info)
self.backend.build(self)
self.built = True

Expand Down Expand Up @@ -222,10 +234,10 @@ def add_term(self, variable, data=None, label=None, categorical=False,
label = variable

if random:
term = RandomTerm(self, label, data, categorical=categorical,
term = RandomTerm(name=label, data=data, categorical=categorical,
prior=prior)
else:
term = Term(self, label, data, categorical=categorical,
term = Term(name=label, data=data, categorical=categorical,
prior=prior)
self.terms[term.name] = term
self.built = False
Expand All @@ -242,11 +254,10 @@ class Term(object):

type_ = 'fixed'

def __init__(self, model, name, data, categorical=False, prior=None,
def __init__(self, name, data, categorical=False, prior=None,
**kwargs):
'''
Args:
model (Model): The associated Model instance.
name (str): Name of the term.
data (DataFrame, ndarray): The pandas DataFrame or numpy array from
containing the data. If a DF is passed, the variable names
Expand All @@ -261,7 +272,6 @@ def __init__(self, model, name, data, categorical=False, prior=None,
kwargs: Optional keyword arguments passed to the model-building
back-end.
'''
self.model = model
self.name = name
self.categorical = categorical
self.prior = prior
Expand All @@ -280,11 +290,11 @@ def __init__(self, model, name, data, categorical=False, prior=None,
self.data = data
self.kwargs = kwargs

def _setup(self):
def _setup(self, model, default_prior_info):
# set up default priors if no prior has been explicitly set
if self.prior is None:
term_type = 'intercept' if self.name == 'Intercept' else 'fixed'
self.prior = self.model.default_priors.get(term=term_type)
self.prior = model.default_priors.get(term=term_type)
# these default priors are only defined for Normal priors, although we
# could probably easily handle Cauchy by just substituting 'sd' -> 'beta'
if self.prior.name=='Normal':
Expand All @@ -294,33 +304,33 @@ def _setup(self):
wide = 3**-.5
# handle slopes
if term_type=='fixed':
slope_constant = self.model.SDY * (1 - self.model.R2Y[self.levels]) / \
self.model.SDX[self.levels] / (1 - self.model.R2X[self.levels])
slope_constant = default_prior_info['sd_y'] * (1 - default_prior_info['r2_y'][self.levels]) / \
default_prior_info['sd_x'][self.levels] / (1 - default_prior_info['r2_x'][self.levels])
self.prior.update(sd = wide * slope_constant.values)
# handle the intercept
else:
index = list(self.model.R2Y.index)
intercept_SD = self.model.SDY * (1 - self.model.R2Y[index]) / \
self.model.SDX[index] / (1 - self.model.R2X[index])
index = list(default_prior_info['r2_y'].index)
intercept_SD = default_prior_info['sd_y'] * (1 - default_prior_info['r2_y'][index]) / \
default_prior_info['sd_x'][index] / (1 - default_prior_info['r2_x'][index])
intercept_SD *= wide
intercept_SD = np.dot(intercept_SD**2, self.model.meanX[index]**2)**.5
self.prior.update(sd = intercept_SD)
intercept_SD = np.dot(intercept_SD**2, default_prior_info['mean_x'][index]**2)**.5
self.prior.update(mu=default_prior_info['mean_y'], sd=intercept_SD)


class RandomTerm(Term):

type_ = 'random'

def __init__(self, model, name, data, yoke=False, prior=None, **kwargs):
def __init__(self, name, data, yoke=False, prior=None, **kwargs):

self.yoke = yoke
super(RandomTerm, self).__init__(model, name, data, prior=prior, **kwargs)
super(RandomTerm, self).__init__(name, data, prior=prior, **kwargs)

def _setup(self):
def _setup(self, model, default_prior_info):
# set up default priors if no prior has been explicitly set
if self.prior is None:
term_type = 'intercept' if '|' not in self.name else 'slope'
self.prior = self.model.default_priors.get(term='random')
self.prior = model.default_priors.get(term='random')
# these default priors are only defined for HalfCauchy priors,
if self.prior.args['sd'].name=='HalfCauchy':
# as above, 'wide' prior SD is sqrt(1/3) = .577 on the partial corr scale,
Expand All @@ -335,16 +345,16 @@ def _setup(self):
# note that without this, it would break on random slopes for
# categorical predictors! Here we simply skip that case, but we
# should make it correctly handle default priors for that case
if fix in list(self.model.R2Y.index):
slope_constant = self.model.SDY * (1 - self.model.R2Y[fix]) / \
self.model.SDX[fix] / (1 - self.model.R2X[fix])
if fix in list(default_prior_info['r2_y'].index):
slope_constant = default_prior_info['sd_y'] * (1 - default_prior_info['r2_y'][fix]) / \
default_prior_info['sd_x'][fix] / (1 - default_prior_info['r2_x'][fix])
self.prior.args['sd'].update(beta = wide * slope_constant)
# handle random intercepts
else:
index = list(self.model.R2Y.index)
intercept_beta = self.model.SDY * (1 - self.model.R2Y[index]) / \
self.model.SDX[index] / (1 - self.model.R2X[index])
index = list(default_prior_info['r2_y'].index)
intercept_beta = default_prior_info['sd_y'] * (1 - default_prior_info['r2_y'][index]) / \
default_prior_info['sd_x'][index] / (1 - default_prior_info['r2_x'][index])
intercept_beta *= wide
intercept_beta = np.dot(intercept_beta**2, self.model.meanX[index]**2)**.5
intercept_beta = np.dot(intercept_beta**2, default_prior_info['mean_x'][index]**2)**.5
self.prior.args['sd'].update(beta = intercept_beta)

1 change: 1 addition & 0 deletions bambi/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self, model, trace):
self.model = model
self.terms = list(model.terms.values())
self.trace = trace
self.diagnostics = model._diagnostics
self.n_terms = len(model.terms)
self.n_samples = len(trace)
self._fixed_terms = [t.name for t in self.terms if t.type_=='fixed']
Expand Down
9 changes: 5 additions & 4 deletions bambi/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ def base_model(diabetes_data):


def test_term_init(diabetes_data):
model = Model(diabetes_data)
term = Term(model, 'BMI', diabetes_data['BMI'])
# model = Model(diabetes_data)
# term = Term(model, 'BMI', diabetes_data['BMI'])
term = Term('BMI', diabetes_data['BMI'])
# Test that all defaults are properly initialized
assert term.name == 'BMI'
assert term.categorical == False
Expand Down Expand Up @@ -78,8 +79,8 @@ def test_add_term_to_model(base_model):
def test_one_shot_formula_fit(base_model):
base_model.fit('BMI ~ S1 + S2', samples=50)
nv = base_model.backend.model.named_vars
targets = ['likelihood', 'b_S1', 'likelihood_sd_log_', 'b_Intercept']
assert len(set(nv.keys()) & set(targets)) == 4
targets = ['likelihood', 'b_S1', 'b_Intercept']
assert len(set(nv.keys()) & set(targets)) == 3
assert len(base_model.backend.trace) == 50


Expand Down
446 changes: 370 additions & 76 deletions examples/default_priors.ipynb

Large diffs are not rendered by default.

0 comments on commit a6b4bed

Please sign in to comment.