In [None]:
import hddm
import pandas as pd
import kabuki
from IPython.parallel import Client
from radd.hd import stats_df
from myhddm import mle, vis
from myhddm.parse import parse_stats
import statsmodels.api as sm
from scipy import stats
from scipy.stats.stats import pearsonr, spearmanr
import scikits.bootstrap as boot


In [2]:
os.mkdir("/home/kyle/Dropbox/PFH/iPFH/msm_allcor")
os.chdir("/home/kyle/Dropbox/PFH/iPFH/msm_allcor")
data=pd.read_csv("/home/kyle/Dropbox/PFH/iPFH/HDDM/n19/allsx_ewma_19.csv")
params=['v','z','t','a','sv', 'sz', 'st']

In [3]:
cdata = data[data.acc==1]

In [9]:
cdata.groupby(['cue', 'stim']).acc.mean()

cue   stim 
a80H  face     1
      house    1
b50N  face     1
      house    1
c80F  face     1
      house    1
Name: acc, dtype: int64

# Fit Subjects with MLE then Read in simulated data and plot

In [None]:
os.chdir("/home/kyle/Dropbox/PFH/iPFH/HDDM/Final/msm_final/opt_sim")

mle.optimize_sx('msm')

simdf=pd.read_csv("simdf_opt.csv")

vis.predict_from_simdfs(data, simdf, mname='msm_final')

# HDDM Model Function

In [None]:
def run_cor_model(jobn):

      import hddm
      import pandas as pd

      os.chdir("/home/kyle/Dropbox/PFH/iPFH/msm_allcor")
      data=pd.read_csv("/home/kyle/Dropbox/PFH/iPFH/HDDM/n19/allsx_ewma_19.csv")
      cdata = data[data.acc==1]
      params=['v','z','t','a','sv', 'sz', 'st']
      m=hddm.HDDM(cdata, depends_on={'v':['stim','cue'],'z':'cue'}, informative=False, bias=True, include=params)

      m.sample(8000, burn=3000, dbname="msm_traces"+str(jobn)+".db", db='pickle')
      m.print_stats("stats"+str(jobn)+".txt")
      return m

In [None]:
def run_model(jobn):

      import hddm
      import pandas as pd

      os.chdir("/home/kyle/Dropbox/PFH/iPFH/HDDM/Final/msm_final")
      data=pd.read_csv("/home/kyle/Dropbox/PFH/iPFH/HDDM/Final/allsx_ewma.csv")
      params=['v','z','t','a','sv', 'sz', 'st']
      m=hddm.HDDM(data, depends_on={'v':['stim','cue'],'z':'cue'}, informative=False, bias=True, include=params)

      m.sample(8000, burn=3000, dbname="msm_traces"+str(jobn)+".db", db='pickle')
      m.print_stats("stats"+str(jobn)+".txt")
      return m

## Set up IPython Parallel & Run HDDM Model (3 Chains)

In [None]:
c = Client()
c.block = True
cview = c.load_balanced_view()
models = cview.map(run_model, range(len(c))) # range(len(c)) is the number of engines in cluster

## Re-Init Model and Load Traces (For all 3) 

In [None]:
m0=hddm.HDDM(data, depends_on={'v':['stim','cue'],'z':'cue'},informative=False,bias=True,include=params)
m1=hddm.HDDM(data, depends_on={'v':['stim','cue'],'z':'cue'},informative=False,bias=True,include=params)
m2=hddm.HDDM(data, depends_on={'v':['stim','cue'],'z':'cue'},informative=False,bias=True,include=params)

m0.load_db('msm_traces0.db', db='pickle')
m1.load_db('msm_traces1.db', db='pickle')
m2.load_db('msm_traces2.db', db='pickle')

## Get Model Stats

In [None]:
models=pd.Series([m0,m1,m2])
grdf=pd.DataFrame.from_dict(kabuki.analyze.gelman_rubin(models), orient='index')
grdf.columns=['gr']
grdf.to_csv("gelman-rubin.csv")

allmodels = kabuki.utils.concat_models(models)
allmodels.plot_posteriors(save=True)
plt.close('all')
allmodels.print_stats("combined_model_stats.txt")
allmodels.save('msm_all')

## Aggregate all 3 model stats into df

In [None]:
df0=m0.gen_stats()
df1=m1.gen_stats()
df2=m2.gen_stats()

df0['m']='m0'
df1['m']='m1'
df2['m']='m2'

dfall=pd.concat([df0, df1, df2])
dfall.to_csv('stats_groupedby_model.csv')

## Re-Format For Subject Grouping

In [None]:
dfall['param']=dfall.index
sx=stats_df(dfall)
sxdf=sx.copy() 
sxdf = sxdf[(sxdf.idx!='GRP')]
sxdf[['idx', 'param', 'mean', 'm']] 
plist=[p.split('_')[0] for p in sxdf.param]
sxdf_parsed=sxdf.copy()
sxdf_parsed['param']=plist
sxdf_parsed.to_csv('sxdf_all_models.csv')

# PREPARE CORRELATION DF

## Generate Mean Parameters DF Across All 3 Runs

In [None]:
m0stats=parse_stats(m0)
m1stats=parse_stats(m1)
m2stats=parse_stats(m2)

condsdf0=m0stats[1].convert_objects(convert_numeric=True)
condsdf1=m1stats[1].convert_objects(convert_numeric=True)
condsdf2=m2stats[1].convert_objects(convert_numeric=True)

alldf=pd.concat([condsdf0, condsdf1, condsdf2], axis=0)
condsdf_mean_of_allmodels=alldf.groupby(['sub', 'param']).mean().reset_index()
condsdf_mean_of_allmodels.to_csv("condsdf_mean_of_allmodels.csv")

## Make zCue DF (Subj x ParamCondition)

In [None]:
cdfall=condsdf_mean_of_allmodels.copy()
adf=cdfall[cdfall.param=='a']
zdf=cdfall[cdfall.param=='z']
#cdfall[cdfall.param=='a']*cdfall[cdfall.param=='z'])
a=adf.groupby('sub').mean().reset_index().iloc[:,2]
zzdf=zdf.loc[:, ['sub','param','a80H_face', 'b50N_face', 'c80F_face']]
zzdf.columns=['sub','param','zH', 'zN', 'zF']
zzdf['a']=a.values

## Make vCueStim DF (Subj x ParamCondition)

In [None]:
vdf=cdfall[cdfall.param=='v']
vvdf=vdf.iloc[:, 2:]
vvdf.columns=['vHF','vNF','vFF','vHH','vNH','vFH']
vvdf.index=zzdf['sub'].values
zzdf.index=zzdf['sub'].values

## rho_df --> Concat zCue and vCueStim, Remove Idx Excl. from fMRI Analysis

In [None]:
rhodf=pd.concat([zzdf, vvdf], axis=1)
rhodf.drop('param', axis=1, inplace=True)
rhodf_img_subjects_only=rhodf.query('sub != [25,28]')
rho_df=rhodf_img_subjects_only.copy()

## HDDM MSM Model Parameters for Correlations with BOLD data

In [None]:
# Reverse Sign of House Drifts 
rho_df[['vHH','vNH','vFH']]=-hddm_rho_df[['vHH','vNH','vFH']]

In [None]:
# Face Cue to a (Face bound)
rho_df['zF2a']=rho_df['a']-(rho_df['zF']*rho_df['a'])
# Face Cue to b (House bound)
rho_df['zF2b']=rho_df['zF']*rho_df['a']

# House Cue to a (Face bound)
rho_df['zH2a']=rho_df['a']-(rho_df['zH']*rho_df['a'])
# House Cue to b (House bound)
rho_df['zH2b']=rho_df['zH']*rho_df['a']

# (Face Cue) Face Drifts (divided by distance to face bound)
rho_df['vFF_n_zF2a']=rho_df['vFF']/rho_df['zF2a']
# (Face Cue) Face Drifts (divided by boundary height)
rho_df['vFF_n_a']=rho_df['vFF']/rho_df['a']

# (Face Cue) House Drifts (divided by distance to house bound)
rho_df['vFH_n_zF2b']=rho_df['vFH']/rho_df['zF2b']
# (Face Cue) House Drifts (divided by boundary height)
rho_df['vFH_n_a']=rho_df['vFH']/rho_df['a']

# (House Cue) Face Drifts (divided by distance to face bound)
rho_df['vHF_n_zH2a']=rho_df['vHF']/rho_df['zH2a']
# (House Cue) Face Drifts (divided by boundary height)
rho_df['vHF_n_a']=rho_df['vHF']/rho_df['a']

# (House Cue) House Drifts (divided by distance to house bound)
rho_df['vHH_n_zH2b']=rho_df['vHH']/rho_df['zH2b']
# (House Cue) House Drifts (divided by boundary height)
rho_df['vHH_n_a']=rho_df['vHH']/rho_df['a']

## Save rho_df

In [None]:
rho_df.to_csv("/home/kyle/Dropbox/PFH/iPFH/hddm_rho_df.csv")

## Get BOLD Vectors, Format, and Save PPA, FFA, and Diff DFs

In [None]:
os.chdir("/home/kyle/Dropbox/PFH/iPFH/")
ppa=pd.read_csv('PPA_WinPeakBold.csv')
ffa=pd.read_csv('FFA_WinPeakBold.csv')

cols=['hc', 'nc', 'fc', 'hcF', 'ncF', 'fcF', 'hcH', 'ncH', 'fcH']
ffa.columns=cols
ppa.columns=cols

for df in [ffa, ppa]:
    df['fc-fcH']=df['fcH']-df['fc']
    df['fc-fcF']=df['fcF']-df['fc']
    df['hc-hcH']=df['hcH']-df['hc']
    df['hc-hcF']=df['hcF']-df['hc']

diff=ffa-ppa
for df in [ffa, ppa, diff]:
    df['idx']=hddm_rho_df.idx.values

ffa.to_csv("/home/kyle/Dropbox/PFH/iPFH/ffa_rho_df.csv")
ppa.to_csv("/home/kyle/Dropbox/PFH/iPFH/ppa_rho_df.csv")
diff.to_csv("/home/kyle/Dropbox/PFH/iPFH/diff_rho_df.csv")