In [1]:
import ipyparallel
import hddm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

print(hddm.__version__)

0.8.0




### Load the data

In [2]:
# read the data to pandas dataframe
data = hddm.load_csv('data/study2.csv')

In [3]:
# flip RTs of choosing lower value options to negative RTs
data = hddm.utils.flip_errors(data)
data

Unnamed: 0.1,Unnamed: 0,subj_idx,stim,rt,response,group,arousal,valence,participant
0,1,0,0V0A,6.05310,1,group4,3,3,96107
1,2,0,0V0A,5.08070,1,group4,3,3,96107
2,3,0,0V0A,4.06150,1,group4,3,3,96107
3,4,0,0V0A,-1.90120,0,group4,3,3,96107
4,5,0,0V0A,5.63570,1,group4,3,3,96107
...,...,...,...,...,...,...,...,...,...
17775,17776,163,1Vne1A,-10.02613,0,group1,4,4,77556
17776,17777,163,1Vne1A,6.66940,1,group1,3,4,77556
17777,17778,163,1Vne1A,2.12080,1,group1,3,4,77556
17778,17779,163,1Vne1A,-2.15360,0,group1,3,4,77556


In [4]:
# code the induction group of each subject into 2*2 between-subject variables
# mood_valence & mood_arousal
# code the decision tasks into 2*2 within-subject variables
# movie_valence & movie_arousal

data = data.drop(data[data.stim == '1Vne1A'].index)
data['movie_valence'] = data.stim.str[:1]
data['movie_arousal'] = data.stim.str[-2:-1]
di = {'group1': "1V1A", 'group2': "1V0A", 'group3': "0V1A", 'group4': "0V0A"}
data['group_code'] = data.group
data = data.replace({"group": di})
data['mood_valence'] = data.group.str[:1]
data['mood_arousal'] = data.group.str[-2:-1]

In [6]:
data

Unnamed: 0.1,Unnamed: 0,subj_idx,stim,rt,response,group,arousal,valence,participant,movie_valence,movie_arousal,group_code,mood_valence,mood_arousal
0,1,0,0V0A,6.0531,1,0V0A,3,3,96107,0,0,group4,0,0
1,2,0,0V0A,5.0807,1,0V0A,3,3,96107,0,0,group4,0,0
2,3,0,0V0A,4.0615,1,0V0A,3,3,96107,0,0,group4,0,0
3,4,0,0V0A,-1.9012,0,0V0A,3,3,96107,0,0,group4,0,0
4,5,0,0V0A,5.6357,1,0V0A,3,3,96107,0,0,group4,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17747,14220,163,1V1A,-5.5979,0,1V1A,4,4,77556,1,1,group1,1,1
17748,14221,163,1V1A,3.0446,1,1V1A,3,4,77556,1,1,group1,1,1
17749,14222,163,1V1A,5.3217,1,1V1A,3,4,77556,1,1,group1,1,1
17750,14223,163,1V1A,2.1794,1,1V1A,3,4,77556,1,1,group1,1,1


# FULL Regression Model Fixed Model

In [7]:
# sepecify the HDDM regression model
# include four decision parameters: 
# drift rate: v; decision boundary: a; non-decision time: t; decision bias: z
# specify mixture model with 5% of the RTs as contations RT from a uniform distribution

m_FULL_hypothesis = hddm.HDDMRegressor(data, ["v ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal",
                                   "a ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal",
                                   "t ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal",
                                   "z ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal"],
                            include=('v', 'a', 't', 'z'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_valence[T.1]', 'v_movie_arousal[T.1]', 'v_mood_valence[T.1]', 'v_mood_arousal[T.1]', 'v_movie_valence[T.1]:mood_valence[T.1]', 'v_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['a_Intercept', 'a_movie_valence[T.1]', 'a_movie_arousal[T.1]', 'a_mood_valence[T.1]', 'a_mood_arousal[T.1]', 'a_movie_valence[T.1]:mood_valence[T.1]', 'a_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['t_Intercept', 't_movie_valence[T.1]', 't_movie_arousal[T.1]', 't_mood_valence[T.1]', 't_mood_arousal[T.1]', 't_movie_valence[T.1]:mood_valence[T.1]', 't_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['z_Intercept', 'z_movie_valence[T.1]', 'z_movie_arousal[T.1]', 'z_mood_valence[T.1]', 'z_mood_arousal[T.1]', 'z_movie_valence[T.1]:mood_valence[T.1]', 'z_movie_arousal[T.1]:mood_arousal[T.1]']


In [None]:
# find initial fitting estimate for each parameter
# speed up MCMC chain fitting
m_FULL_hypothesis.find_starting_values()

# start sampling MCMC chains
# start drawing 10000 samples, with 2000 as burn-in samples
# save the fitting data into a bd file
m_FULL_hypothesis.sample(10000, burn=2000, dbname='study2_regression_full_hypothesis.db', db='pickle')

# save the model
m_FULL_hypothesis.save('study2_regression_full_hypothesis')

### Fixed effect interaction model

In [8]:
# interaction model gives effect of movie_valence at different level of mood valence
# and effect of movie arousal at different level of mood arousal
# note: this model is equivalent to m_FULL_hypothesis
# check pasty documentation for more details
# https://patsy.readthedocs.io/en/latest/

m_FULL_hypothesis_interaction = hddm.HDDMRegressor(data, ["v ~ movie_valence:mood_valence + movie_arousal:mood_arousal",
                                   "a ~ movie_valence:mood_valence + movie_arousal:mood_arousal",
                                   "t ~ movie_valence:mood_valence + movie_arousal:mood_arousal",
                                   "z ~ movie_valence:mood_valence + movie_arousal:mood_arousal"],
                            include=('v', 'a', 't', 'z'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_mood_valence[T.1]', 'v_movie_valence[T.1]:mood_valence[0]', 'v_movie_valence[T.1]:mood_valence[1]', 'v_mood_arousal[T.1]', 'v_movie_arousal[T.1]:mood_arousal[0]', 'v_movie_arousal[T.1]:mood_arousal[1]']
Adding these covariates:
['a_Intercept', 'a_mood_valence[T.1]', 'a_movie_valence[T.1]:mood_valence[0]', 'a_movie_valence[T.1]:mood_valence[1]', 'a_mood_arousal[T.1]', 'a_movie_arousal[T.1]:mood_arousal[0]', 'a_movie_arousal[T.1]:mood_arousal[1]']
Adding these covariates:
['t_Intercept', 't_mood_valence[T.1]', 't_movie_valence[T.1]:mood_valence[0]', 't_movie_valence[T.1]:mood_valence[1]', 't_mood_arousal[T.1]', 't_movie_arousal[T.1]:mood_arousal[0]', 't_movie_arousal[T.1]:mood_arousal[1]']
Adding these covariates:
['z_Intercept', 'z_mood_valence[T.1]', 'z_movie_valence[T.1]:mood_valence[0]', 'z_movie_valence[T.1]:mood_valence[1]', 'z_mood_arousal[T.1]', 'z_movie_arousal[T.1]:mood_arousal[0]', 'z_movie_arousal[T.1]:mood_arousal[1]']


In [None]:
# fit the model and save the model
m_FULL_hypothesis_interaction.find_starting_values()
m_FULL_hypothesis_interaction.sample(10000, burn=2000, dbname='study2_regression_full_hypothesis_interaction.db', db='pickle')
m_FULL_hypothesis_interaction.save('study2_regression_full_hypothesis_interaction')

# Reduced models
### We fitted several reduced models, and tested if the reduced model has better fit with our empirical data.

In [9]:
# reduced model with only drift rate
m_FULL_hypothesis_only_v = hddm.HDDMRegressor(data, ["v ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"],
                            include=('v'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_valence[T.1]', 'v_movie_arousal[T.1]', 'v_mood_valence[T.1]', 'v_mood_arousal[T.1]', 'v_movie_valence[T.1]:mood_valence[T.1]']


In [10]:
# reduced model with only drift rate and decision boundary
m_FULL_hypothesis_only_v_a = hddm.HDDMRegressor(data, ["v ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal",
                                   "a ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal"],
                            include=('v', 'a'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_valence[T.1]', 'v_movie_arousal[T.1]', 'v_mood_valence[T.1]', 'v_mood_arousal[T.1]', 'v_movie_valence[T.1]:mood_valence[T.1]', 'v_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['a_Intercept', 'a_movie_valence[T.1]', 'a_movie_arousal[T.1]', 'a_mood_valence[T.1]', 'a_mood_arousal[T.1]', 'a_movie_valence[T.1]:mood_valence[T.1]', 'a_movie_arousal[T.1]:mood_arousal[T.1]']


In [11]:
# reduced model with only drift rate, decision boundary, and non-decision time
m_FULL_hypothesis_only_v_a_t = hddm.HDDMRegressor(data, ["v ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal",
                                   "a ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal",
                                   "t ~ movie_valence + movie_arousal + mood_valence + mood_arousal"
                                   + "+ movie_valence * mood_valence"
                                   + "+ movie_arousal * mood_arousal"],
                            include=('v', 'a', 't'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_valence[T.1]', 'v_movie_arousal[T.1]', 'v_mood_valence[T.1]', 'v_mood_arousal[T.1]', 'v_movie_valence[T.1]:mood_valence[T.1]', 'v_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['a_Intercept', 'a_movie_valence[T.1]', 'a_movie_arousal[T.1]', 'a_mood_valence[T.1]', 'a_mood_arousal[T.1]', 'a_movie_valence[T.1]:mood_valence[T.1]', 'a_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['t_Intercept', 't_movie_valence[T.1]', 't_movie_arousal[T.1]', 't_mood_valence[T.1]', 't_mood_arousal[T.1]', 't_movie_valence[T.1]:mood_valence[T.1]', 't_movie_arousal[T.1]:mood_arousal[T.1]']


In [12]:
# reduced model with only valence
m_FULL_hypothesis_valence_only = hddm.HDDMRegressor(data, ["v ~ movie_valence + mood_valence + movie_valence * mood_valence",
                                   "a ~ movie_valence + mood_valence + movie_valence * mood_valence",
                                   "t ~ movie_valence + mood_valence + movie_valence * mood_valence",
                                   "z ~ movie_valence + mood_valence + movie_valence * mood_valence"],
                            include=('v', 'a', 't', 'z'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_valence[T.1]', 'v_mood_valence[T.1]', 'v_movie_valence[T.1]:mood_valence[T.1]']
Adding these covariates:
['a_Intercept', 'a_movie_valence[T.1]', 'a_mood_valence[T.1]', 'a_movie_valence[T.1]:mood_valence[T.1]']
Adding these covariates:
['t_Intercept', 't_movie_valence[T.1]', 't_mood_valence[T.1]', 't_movie_valence[T.1]:mood_valence[T.1]']
Adding these covariates:
['z_Intercept', 'z_movie_valence[T.1]', 'z_mood_valence[T.1]', 'z_movie_valence[T.1]:mood_valence[T.1]']


In [13]:
# reduced model with only arousal
m_FULL_hypothesis_valence_only = hddm.HDDMRegressor(data, ["v ~ movie_valence + mood_valence + movie_valence * mood_valence",
                                   "a ~ movie_valence + mood_valence + movie_valence * mood_valence",
                                   "t ~ movie_valence + mood_valence + movie_valence * mood_valence",
                                   "z ~ movie_valence + mood_valence + movie_valence * mood_valence"],
                            include=('v', 'a', 't', 'z'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_valence[T.1]', 'v_mood_valence[T.1]', 'v_movie_valence[T.1]:mood_valence[T.1]']
Adding these covariates:
['a_Intercept', 'a_movie_valence[T.1]', 'a_mood_valence[T.1]', 'a_movie_valence[T.1]:mood_valence[T.1]']
Adding these covariates:
['t_Intercept', 't_movie_valence[T.1]', 't_mood_valence[T.1]', 't_movie_valence[T.1]:mood_valence[T.1]']
Adding these covariates:
['z_Intercept', 'z_movie_valence[T.1]', 'z_mood_valence[T.1]', 'z_movie_valence[T.1]:mood_valence[T.1]']


In [14]:
m_FULL_hypothesis_arousal_only = hddm.HDDMRegressor(data, ["v ~ movie_arousal + mood_arousal + movie_arousal * mood_arousal",
                                   "a ~  movie_arousal + mood_arousal + movie_arousal * mood_arousal",
                                   "t ~  movie_arousal + mood_arousal + movie_arousal * mood_arousal",
                                   "z ~  movie_arousal + mood_arousal + movie_arousal * mood_arousal"],
                            include=('v', 'a', 't', 'z'),
                            p_outlier=0.05)

Adding these covariates:
['v_Intercept', 'v_movie_arousal[T.1]', 'v_mood_arousal[T.1]', 'v_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['a_Intercept', 'a_movie_arousal[T.1]', 'a_mood_arousal[T.1]', 'a_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['t_Intercept', 't_movie_arousal[T.1]', 't_mood_arousal[T.1]', 't_movie_arousal[T.1]:mood_arousal[T.1]']
Adding these covariates:
['z_Intercept', 'z_movie_arousal[T.1]', 'z_mood_arousal[T.1]', 'z_movie_arousal[T.1]:mood_arousal[T.1]']


In [None]:
# fit all the reduced model and save the model
m_FULL_hypothesis_only_v.find_starting_values()
m_FULL_hypothesis_only_v.sample(10000, burn=2000, dbname='traces.db', db='pickle')
m_FULL_hypothesis_only_v.save('m_FULL_hypothesis_only_v')

m_FULL_hypothesis_only_v_a.find_starting_values()
m_FULL_hypothesis_only_v_a.sample(10000, burn=2000, dbname='traces.db', db='pickle')
m_FULL_hypothesis_only_v_a.save('m_FULL_hypothesis_only_v_a')

m_FULL_hypothesis_only_v_a_t.find_starting_values()
m_FULL_hypothesis_only_v_a_t.sample(10000, burn=2000, dbname='traces.db', db='pickle')
m_FULL_hypothesis_only_v_a_t.save('m_FULL_hypothesis_only_v_a_t')

m_FULL_hypothesis_valence_only.find_starting_values()
m_FULL_hypothesis_valence_only.sample(10000, burn=2000, dbname='traces.db', db='pickle')
m_FULL_hypothesis_valence_only.save('m_FULL_hypothesis_valence_only')

m_FULL_hypothesis_arousal_only.find_starting_values()
m_FULL_hypothesis_arousal_only.sample(10000, burn=2000, dbname='traces.db', db='pickle')
m_FULL_hypothesis_arousal_only.save('m_FULL_hypothesis_arousal_only')

## Compare the DIC value of all the models
### Lower dic value indicate better model fitting

In [145]:
print(m_FULL_hypothesis.dic)
print(m_FULL_hypothesis_only_v.dic)
print(m_FULL_hypothesis_only_v_a.dic)
print(m_FULL_hypothesis_only_v_a_t.dic)
print(m_FULL_hypothesis_valence_only.dic)
print(m_FULL_hypothesis_arousal_only.dic)

70243.65691810394
70319.80726439007
70279.45989580629
70276.54737259372
70250.0993725725
70316.23216999679
