Permalink
Browse files

ENH: Added patsy support for HDDMRegression.

  • Loading branch information...
1 parent d222c31 commit f7132f48633517fab25cf877b442bb8af4d90ead @twiecki twiecki committed Mar 9, 2013
Showing with 29 additions and 26 deletions.
  1. +19 −7 hddm/models/hddm_regression.py
  2. +7 −17 hddm/tests/test_models.py
  3. +1 −0 requirements.txt
  4. +2 −2 setup.py
@@ -2,6 +2,7 @@
import numpy as np
import pymc as pm
import pandas as pd
+from patsy import dmatrix
import hddm
from hddm_gamma import HDDMGamma
@@ -74,23 +75,22 @@ def create_node(self, name, kwargs, data):
class KnodeRegressPatsy(kabuki.hierarchical.Knode):
def create_node(self, name, kwargs, data):
- from patsy import dmatrix
reg = kwargs['regressor']
# order parents according to user-supplied args
args = []
for arg in reg['args']:
for parent_name, parent in kwargs['parents'].iteritems():
- if parent_name.startswith(arg):
+ if parent_name == arg:
args.append(parent)
cols = data if reg['covariates'] is None else data[reg['covariates']]
parents = {'args': args}
- def func(args, patsy_func=reg['func']):
+ def func(args, design_matrix=dmatrix(reg['func'], cols)):
# convert parents to matrix
params = np.matrix(args)
# Apply design matrix to input data
- predictor = (dmatrix(patsy_func, cols) * params).sum(axis=1)
+ predictor = (design_matrix * params).sum(axis=1)
return pd.DataFrame(predictor, index=cols.index)
return self.pymc_node(func, kwargs['doc'], name, parents=parents)
@@ -127,7 +127,7 @@ class HDDMRegressor(HDDMGamma):
m = hddm.HDDMRegressor(data, reg, depends_on={'v_slope':'trial_type'})
"""
- def __init__(self, data, regressor=None, **kwargs):
+ def __init__(self, data, regressor=None, group_only_regressors=None, **kwargs):
"""Hierarchical Drift Diffusion Model with regressors
"""
#create self.regressor and self.reg_outcome
@@ -136,10 +136,22 @@ def __init__(self, data, regressor=None, **kwargs):
regressor = [regressor]
use_patsy = isinstance(regressor[0]['func'], str)
- self.knode_regress = KnodeRegressPatsy if use_patsy else KnodeRegress
+ if use_patsy:
+ self.knode_regress = KnodeRegressPatsy if use_patsy else KnodeRegress
+ regressor[0]['args'] = dmatrix(regressor[0]['func'], data).design_info.column_names
+ print "Adding these covariates:"
+ print regressor[0]['args']
+ if group_only_regressors:
+ group_only_nodes = list(kwargs.get('group_only_nodes', ()))
+ group_only_nodes += regressor[0]['args']
+ kwargs['group_only_nodes'] = group_only_nodes
+
+ # Sanity checks
+ assert len(regressor) == 1, "Currently only one regressor is allowed when using patsy."
+ depends = set(kwargs.get('depends_on', {}).keys())
+ assert not depends.issubset(set(regressor[0]['args'])), "When using patsy, you can not use any regressor in depends_on."
self.reg_outcomes = set() # holds all the parameters that are going to modeled as outcome
- self.reg_covariates = set()
for reg in regressor:
if isinstance(reg['args'], str):
reg['args'] = [reg['args']]
@@ -303,32 +303,22 @@ def test_HDDMRegressorGroupOnlyDepends(self):
def test_HDDMRegressorPatsy(self):
- reg_func = 'cov'
-
- reg = {'func': reg_func, 'args':['v_slope', 'v_inter'],
+ reg = {'func': 'cov * C(condition)',
'outcome':'v'}
params = hddm.generate.gen_rand_params(cond_dict={'v': [1, 2, 3]})
data, params_true = hddm.generate.gen_rand_data(params[0], size=10, subjs=4)
data = pd.DataFrame(data)
data['cov'] = 1.
- # Create one merged column
- data['condition2'] = 'merged'
- data[data.condition == 'c1']['condition2'] = 'single'
m = hddm.HDDMRegressor(data, regressor=reg,
- depends_on={'v_slope': 'condition', 'a': 'condition2'},
- group_only_nodes=['v_slope', 'v_inter'])
- m.sample(self.iter, burn=self.burn)
- m = hddm.HDDMRegressor(data, regressor=reg,
- depends_on={'v_slope': 'condition2', 'a': 'condition'},
- group_only_nodes=['v_slope', 'v_inter'])
+ depends_on={'a': 'condition'})
m.sample(self.iter, burn=self.burn)
- self.assertTrue(isinstance(m.nodes_db.ix['wfpt(c1.merged).0']['node'].parents['v'].parents['args'][0], pm.Normal))
- self.assertEqual(m.nodes_db.ix['wfpt(c1.merged).0']['node'].parents['v'].parents['args'][0].__name__, 'v_slope(merged)')
- self.assertTrue(isinstance(m.nodes_db.ix['wfpt(c1.merged).0']['node'].parents['v'].parents['args'][1], pm.Normal))
- self.assertEqual(m.nodes_db.ix['wfpt(c1.merged).0']['node'].parents['v'].parents['args'][1].__name__, 'v_inter')
- self.assertEqual(len(np.unique(m.nodes_db.ix['wfpt(c1.merged).0']['node'].parents['v'].value)), 1)
+ self.assertTrue(isinstance(m.nodes_db.ix['wfpt(c1).0']['node'].parents['v'].parents['args'][0], pm.Normal))
+ self.assertEqual(m.nodes_db.ix['wfpt(c1).0']['node'].parents['v'].parents['args'][0].__name__, 'Intercept_subj.0')
+ self.assertTrue(isinstance(m.nodes_db.ix['wfpt(c1).0']['node'].parents['v'].parents['args'][1], pm.Normal))
+ self.assertEqual(m.nodes_db.ix['wfpt(c1).0']['node'].parents['v'].parents['args'][1].__name__, 'C(condition)[T.c1]_subj.0')
+ self.assertEqual(len(np.unique(m.nodes_db.ix['wfpt(c1).0']['node'].parents['v'].value)), 1)
def test_posterior_plots_breakdown():
View
@@ -9,3 +9,4 @@ pytz==2012d
wsgiref==0.1.2
matplotlib==1.1.0
git+https://github.com/hddm-devs/kabuki.git@master
+patsy
View
@@ -26,8 +26,8 @@
package_data={'hddm':['examples/*.csv', 'examples/*.conf']},
scripts=['scripts/hddm_fit.py', 'scripts/hddm_demo.py'],
description='HDDM is a python module that implements Hierarchical Bayesian estimation of Drift Diffusion Models.',
- install_requires=['NumPy >=1.5.0', 'SciPy >= 0.6.0', 'kabuki >= 0.4.1', 'PyMC >= 2.2'],
- setup_requires=['NumPy >=1.5.0', 'SciPy >= 0.6.0', 'kabuki >= 0.4.1', 'PyMC >= 2.2'],
+ install_requires=['NumPy >=1.5.0', 'SciPy >= 0.6.0', 'kabuki >= 0.4.1', 'PyMC >= 2.2', 'patsy'],
+ setup_requires=['NumPy >=1.5.0', 'SciPy >= 0.6.0', 'kabuki >= 0.4.1', 'PyMC >= 2.2', 'patsy'],
include_dirs = [np.get_include()],
classifiers=[
'Development Status :: 5 - Production/Stable',

0 comments on commit f7132f4

Please sign in to comment.