## Note:
This script is for parallel processing the HDDM processing for '1a', '1b', '1c', '2', and '6a', which share the same experimental design.

In [1]:
import sys
import hddm, IPython
print('Notebook is running:', sys.executable)
from platform import python_version # further check your python version
print('The current HDDM version is', python_version())
print('The current HDDM version is', hddm.__version__) # 0.8.0
print('The current IPython version is', IPython.__version__) 

Notebook is running: /home/hcp4715/miniconda3/envs/hddm/bin/python
The current HDDM version is 3.7.7
The current HDDM version is 0.8.0
The current IPython version is 7.13.0




In [6]:
%matplotlib inline

# Preparation
import os, hddm, time, csv
import kabuki
from kabuki.analyze import gelman_rubin

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import date
import random

In [3]:
# define a function to run model in parallel for experiments share the same design
def run_model_1a(id):
    import hddm
    
    exp_name = '1a'
    print('running models %i'%id, 'for for exp', exp_name)
    dbname = 'df' + exp_name + '_chain_vtaz_%i.db'%id   # define the database name, which uses pickle format
    mname  = 'df' + exp_name + '_chain__vtaz_%i'%id     # define the name for the model
    fname  = 'df' + exp_name + '.v.hddm_stim.csv'
    df = hddm.load_csv(fname)
    m = hddm.HDDMStimCoding(df, 
                            include='z', 
                            stim_col='stim', 
                            depends_on={'v':['match','val'], 't':['match','val'],'a':['match','val']},
                            split_param='v', 
                            drift_criterion=False,
                            p_outlier=0.05)
    m.find_starting_values()
    m.sample(10000, burn=5000, thin=5, dbname=dbname, db='pickle')
    m.save(mname) # save the model
    return m

def run_model_1b(id):
    import hddm
    
    exp_name = '1b'
    print('running models %i'%id, 'for for exp', exp_name)
    dbname = 'df' + exp_name + '_chain_vtaz_%i.db'%id   # define the database name, which uses pickle format
    mname  = 'df' + exp_name + '_chain__vtaz_%i'%id     # define the name for the model
    fname  = 'df' + exp_name + '.v.hddm_stim.csv'
    df = hddm.load_csv(fname)
    m = hddm.HDDMStimCoding(df, 
                            include='z', 
                            stim_col='stim', 
                            depends_on={'v':['match','val'], 't':['match','val'],'a':['match','val']},
                            split_param='v', 
                            drift_criterion=False,
                            p_outlier=0.05)
    m.find_starting_values()
    m.sample(10000, burn=5000, thin=5, dbname=dbname, db='pickle')
    m.save(mname) # save the model
    return m

def run_model_1c(id):
    import hddm
    
    exp_name = '1c'
    print('running models %i'%id, 'for for exp', exp_name)
    dbname = 'df' + exp_name + '_chain_vtaz_%i.db'%id   # define the database name, which uses pickle format
    mname  = 'df' + exp_name + '_chain__vtaz_%i'%id     # define the name for the model
    fname  = 'df' + exp_name + '.v.hddm_stim.csv'
    df = hddm.load_csv(fname)
    m = hddm.HDDMStimCoding(df, 
                            include='z', 
                            stim_col='stim', 
                            depends_on={'v':['match','val'], 't':['match','val'],'a':['match','val']},
                            split_param='v', 
                            drift_criterion=False,
                            p_outlier=0.05)
    m.find_starting_values()
    m.sample(10000, burn=5000, thin=5, dbname=dbname, db='pickle')
    m.save(mname) # save the model
    return m

# define a function to run model in parallel for experiment 1b
def run_model_2(id):
    import hddm
    
    exp_name = '2'
    print('running models %i'%id, 'for for exp', exp_name)
    dbname = 'df' + exp_name + '_chain_vtaz_%i.db'%id   # define the database name, which uses pickle format
    mname  = 'df' + exp_name + '_chain__vtaz_%i'%id     # define the name for the model
    fname  = 'df' + exp_name + '.v.hddm_stim.csv'
    df = hddm.load_csv(fname)
    m = hddm.HDDMStimCoding(df, 
                            include='z', 
                            stim_col='stim', 
                            depends_on={'v':['match','val'], 't':['match','val'],'a':['match','val']},
                            split_param='v', 
                            drift_criterion=False,
                            p_outlier=0.05)
    m.find_starting_values()
    m.sample(10000, burn=5000, thin=5, dbname=dbname, db='pickle')
    m.save(mname) # save the model
    return m

def run_model_6a(id):
    import hddm
    
    exp_name = '6a'
    print('running models %i'%id, 'for for exp', exp_name)
    dbname = 'df' + exp_name + '_chain_vtaz_%i.db'%id   # define the database name, which uses pickle format
    mname  = 'df' + exp_name + '_chain__vtaz_%i'%id     # define the name for the model
    fname  = 'df' + exp_name + '.v.hddm_stim.csv'
    df = hddm.load_csv(fname)
    m = hddm.HDDMStimCoding(df, 
                            include='z', 
                            stim_col='stim', 
                            depends_on={'v':['match','val'], 't':['match','val'],'a':['match','val']},
                            split_param='v', 
                            drift_criterion=False,
                            p_outlier=0.05)
    m.find_starting_values()
    m.sample(10000, burn=5000, thin=5, dbname=dbname, db='pickle')
    m.save(mname) # save the model
    return m

In [5]:
# exp_list = ['1a', '1b', '1c', '2', '6a']
exp_list = ['1a', '1b', '1c']
for mm in range(len(exp_list)):
    
    
    if exp_list[mm] == '1a': 
        start_time = time.time()   # the start time of the processing#
        print('\nrun model for exp', exp_list[mm])
        from ipyparallel import Client
        rc = Client()
        jobs = rc[4:8].map(run_model_1a, range(4)) # 4 is the number of chains
        df_models = jobs.get()
        print("Running 4 chains for exp", exp_list[mm], "used: %f seconds." % (time.time() - start_time))
        
    elif exp_list[mm] == '1b':
        start_time = time.time()   # the start time of the processing#
        print('\nrun model for exp', exp_list[mm])
        from ipyparallel import Client
        rc = Client()
        jobs = rc[4:8].map(run_model_1b, range(4)) # 4 is the number of chains
        df_models = jobs.get()
        print("Running 4 chains for exp", exp_list[mm], "used: %f seconds." % (time.time() - start_time))
        
    elif exp_list[mm] == '1c':
        start_time = time.time()   # the start time of the processing#
        print('\nrun model for exp', exp_list[mm])
        from ipyparallel import Client
        rc = Client()
        jobs = rc[4:8].map(run_model_1c, range(4)) # 4 is the number of chains
        df_models = jobs.get()
        print("Running 4 chains for exp", exp_list[mm], "used: %f seconds." % (time.time() - start_time))


run model for exp 1a
Running 4 chains for exp 1a used: 25711.408953 seconds.

run model for exp 1b
Running 4 chains for exp 1b used: 26624.204552 seconds.

run model for exp 1c
Running 4 chains for exp 1c used: 11342.836805 seconds.


In [7]:
gelman_rubin(df_models)

{'a(Match.Bad)': 1.0005572708625463,
 'a(Match.Good)': 1.0005371625122395,
 'a(Match.Neutral)': 1.0333170101117481,
 'a(Mismatch.Bad)': 1.000289690497579,
 'a(Mismatch.Good)': 1.0006208081794707,
 'a(Mismatch.Neutral)': 1.0016289948313746,
 'a_std': 1.0196730412076918,
 'a_subj(Match.Bad).1215': 0.99963300826818,
 'a_subj(Match.Good).1215': 1.0002900085236002,
 'a_subj(Match.Neutral).1215': 1.0001156100462698,
 'a_subj(Match.Bad).1216': 1.0016492821353016,
 'a_subj(Match.Good).1216': 1.0018406965017423,
 'a_subj(Match.Neutral).1216': 1.0031429656394635,
 'a_subj(Match.Bad).1217': 0.999687545140149,
 'a_subj(Match.Good).1217': 1.0003152961353579,
 'a_subj(Match.Neutral).1217': 1.0004039326875251,
 'a_subj(Match.Bad).1218': 1.0009018983620015,
 'a_subj(Match.Good).1218': 0.9996470324710969,
 'a_subj(Match.Neutral).1218': 1.001449588837532,
 'a_subj(Match.Bad).1219': 0.9998901252157019,
 'a_subj(Match.Good).1219': 1.0037397612658887,
 'a_subj(Match.Neutral).1219': 1.0069067141384187,
 'a_