# Hierarchical Bayesian Logistic Regression using PyStan
**Florian Ott, 2022**

Here we fit the models which explained behaviour best within their strategy class: Planning (PM), Simple (SM) and Hybrid (HM). Further explanation of the models, model comparison results and information about model validation can be found in the main manuscript.                  

In [1]:
import numpy as np
import pandas as pd
import pystan
import arviz as az
import glob as glob
import time as time

# Load particpant data 
filename = glob.glob('../data/behaviour/data_all_participants_20220215120148.csv') 
dat = pd.read_csv(filename[0],index_col = 0)

# SM

#### Specifying the model

In [7]:
m01 = '''
data {
  int<lower=0> N;
  int<lower=0,upper=1> response[N];
  vector[N] is_basic_1;
  vector[N] is_basic_2;
  vector[N] is_basic_3;
  vector[N] is_basic_4;
  vector[N] is_full_energy;
  vector[N] is_low_energy_LC;
  vector[N] is_low_energy_HC;
  int<lower=0> N_subjects;
  int<lower = 1> vpn[N];  
}

parameters {
// hyper paramters 
  real mu_theta_basic_1;
  real mu_theta_basic_2;
  real mu_theta_basic_3;
  real mu_theta_basic_4;
  real<lower=0> sigma_theta_basic_1;
  real<lower=0> sigma_theta_basic_2;
  real<lower=0> sigma_theta_basic_3;
  real<lower=0> sigma_theta_basic_4;

// parameters
  vector[N_subjects] theta_basic_1;
  vector[N_subjects] theta_basic_2;
  vector[N_subjects] theta_basic_3;
  vector[N_subjects] theta_basic_4;
  real theta_full_energy;
  real theta_low_energy_LC;
  real theta_low_energy_HC;
}

model {
//hyper priors
  mu_theta_basic_1 ~ normal(0,2);
  mu_theta_basic_2 ~ normal(0,2);
  mu_theta_basic_3 ~ normal(0,2);
  mu_theta_basic_4 ~ normal(0,2);
  sigma_theta_basic_1 ~ normal(0,2);
  sigma_theta_basic_2 ~ normal(0,2);
  sigma_theta_basic_3 ~ normal(0,2);
  sigma_theta_basic_4 ~ normal(0,2);

// priors
  theta_basic_1 ~ normal(mu_theta_basic_1,sigma_theta_basic_1);
  theta_basic_2 ~ normal(mu_theta_basic_2,sigma_theta_basic_2);
  theta_basic_3 ~ normal(mu_theta_basic_3,sigma_theta_basic_3);
  theta_basic_4 ~ normal(mu_theta_basic_4,sigma_theta_basic_4);
  theta_full_energy ~ normal(0,2);
  theta_low_energy_LC ~ normal(0,2);
  theta_low_energy_HC ~ normal(0,2);

// likelihood 
  response ~ bernoulli_logit(theta_full_energy * is_full_energy + theta_low_energy_LC * is_low_energy_LC + theta_low_energy_HC * is_low_energy_HC + theta_basic_1[vpn] .* is_basic_1 + theta_basic_2[vpn] .* is_basic_2 + theta_basic_3[vpn] .* is_basic_3 + theta_basic_4[vpn] .* is_basic_4);
}

generated quantities {
  vector[N] log_lik;
  vector[N] response_new;
  vector[N_subjects] theta_basic_1_rep;
  vector[N_subjects] theta_basic_2_rep;
  vector[N_subjects] theta_basic_3_rep;
  vector[N_subjects] theta_basic_4_rep;


// pointwise log-likelihood
  for (n in 1:N) {
    log_lik[n] = bernoulli_logit_lpmf(response[n]  |  (theta_full_energy * is_full_energy[n] + theta_low_energy_LC * is_low_energy_LC[n] + theta_low_energy_HC * is_low_energy_HC[n] + theta_basic_1[vpn[n]] * is_basic_1[n] + theta_basic_2[vpn[n]] * is_basic_2[n] + theta_basic_3[vpn[n]] * is_basic_3[n] + theta_basic_4[vpn[n]] * is_basic_4[n]));
    }

// posterior predictive simulation  
  for (n in 1:N_subjects){
    theta_basic_1_rep[n] = normal_rng(mu_theta_basic_1, sigma_theta_basic_1);
    theta_basic_2_rep[n] = normal_rng(mu_theta_basic_2, sigma_theta_basic_2);
    theta_basic_3_rep[n] = normal_rng(mu_theta_basic_3, sigma_theta_basic_3);
    theta_basic_4_rep[n] = normal_rng(mu_theta_basic_4, sigma_theta_basic_4);
    }  

  for (n in 1:N){
    response_new[n] = bernoulli_logit_rng(theta_full_energy * is_full_energy[n] + theta_low_energy_LC * is_low_energy_LC[n] + theta_low_energy_HC * is_low_energy_HC[n] + theta_basic_1_rep[vpn[n]] * is_basic_1[n] + theta_basic_2_rep[vpn[n]] * is_basic_2[n] + theta_basic_3_rep[vpn[n]] * is_basic_3[n] + theta_basic_4_rep[vpn[n]] * is_basic_4[n]);
    } 
}
'''

#### Compiling

In [8]:
sm01 = pystan.StanModel(model_code=m01,verbose = False)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_e4ff3fb425a2014d4aad18ac4f9e5dff NOW.


##### Specifying the data

In [9]:

idx = (dat['timeout'] == 0)
response = (dat.loc[idx,['response']] == 0).to_numpy(dtype='int').squeeze()
is_full_energy = dat.loc[idx,['is_full_energy']].to_numpy(dtype='int').squeeze()
is_low_energy_LC = dat.loc[idx,['is_low_energy_LC']].to_numpy(dtype='int').squeeze()
is_low_energy_HC = dat.loc[idx,['is_low_energy_HC']].to_numpy(dtype='int').squeeze()
is_basic_1 = dat.loc[idx,['is_basic_1']].to_numpy(dtype='int').squeeze()
is_basic_2 = dat.loc[idx,['is_basic_2']].to_numpy(dtype='int').squeeze()
is_basic_3 = dat.loc[idx,['is_basic_3']].to_numpy(dtype='int').squeeze()
is_basic_4 = dat.loc[idx,['is_basic_4']].to_numpy(dtype='int').squeeze()
vpn = dat.loc[idx,['vpn']].to_numpy().squeeze() - 100
N_subjects = len(np.unique(vpn))

dat_dict = {'N':len(response),         
            'response':response,
            'is_full_energy':is_full_energy ,
            'is_low_energy_LC':is_low_energy_LC,
            'is_low_energy_HC':is_low_energy_HC,
            'is_basic_1':is_basic_1,
            'is_basic_2':is_basic_2,
            'is_basic_3':is_basic_3,
            'is_basic_4':is_basic_4,
            'N_subjects':N_subjects,
            'vpn':vpn
            } 

#### Sampling posterior

In [10]:
res_sm01 = sm01.sampling(data=dat_dict, iter=2000,  warmup=1000, thin=1, chains=4,control = dict(adapt_delta=0.99),seed=101, verbose = False);

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


#### Computing leave-one-out cross-validation information criterion (LOOIC) for model comparison 

In [11]:
idata_sm01 = az.from_pystan(posterior=res_sm01,log_likelihood='log_lik');
looic_sm01 = az.loo(idata_sm01,pointwise=True,scale='deviance')

  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


# PM

#### Specifying the model

In [12]:
m02 = '''
data {
  int<lower=0> N;
  int<lower=0,upper=1> response[N];
  vector[N] dv;
  vector[N] is_basic;
  vector[N] is_full_energy;
  vector[N] is_low_energy_LC;
  vector[N] is_low_energy_HC;
  int<lower=0> N_subjects;
  int<lower = 1> vpn[N];  
}

parameters {
//hyper parameters
  real mu_theta_basic;
  real mu_beta_dv;
  real<lower=0> sigma_theta_basic;
  real<lower=0> sigma_beta_dv;
  
//parameters 
  vector[N_subjects] theta_basic;
  real theta_full_energy;
  real theta_low_energy_LC;
  real theta_low_energy_HC;
  vector[N_subjects] beta_dv;
}

model {
//hyper priors
  mu_theta_basic ~ normal(0,2);
  mu_beta_dv ~ normal(0,2);
  sigma_theta_basic ~ normal(0,2);
  sigma_beta_dv ~ normal(0,2);

// priors 
  theta_basic ~ normal(mu_theta_basic, sigma_theta_basic);
  theta_full_energy ~ normal(0, 2);
  theta_low_energy_LC ~ normal(0, 2);
  theta_low_energy_HC ~ normal(0, 2);
  beta_dv ~ normal(mu_beta_dv,sigma_beta_dv);  

// likelihood 
  response ~ bernoulli_logit(theta_full_energy * is_full_energy + theta_low_energy_LC * is_low_energy_LC + theta_low_energy_HC * is_low_energy_HC + theta_basic[vpn] .* is_basic + beta_dv[vpn] .* dv);
}

generated quantities {
  vector[N] log_lik;
  vector[N] response_new;
  vector[N_subjects] theta_basic_rep;
  vector[N_subjects] beta_dv_rep;

// pointwise log-likelihood
  for (n in 1:N) {
    log_lik[n] = bernoulli_logit_lpmf(response[n]  |  (theta_full_energy * is_full_energy[n] + theta_low_energy_LC * is_low_energy_LC[n] + theta_low_energy_HC * is_low_energy_HC[n] + theta_basic[vpn[n]] * is_basic[n] + beta_dv[vpn[n]] * dv[n]));
    }

// posterior predictive simulation  
  for (n in 1:N_subjects){
    theta_basic_rep[n] = normal_rng(mu_theta_basic, sigma_theta_basic);
    beta_dv_rep[n] = normal_rng(mu_beta_dv, sigma_beta_dv);
    }

  for (n in 1:N){
    response_new[n] = bernoulli_logit_rng(theta_full_energy * is_full_energy[n] + theta_low_energy_LC * is_low_energy_LC[n] + theta_low_energy_HC * is_low_energy_HC[n] + theta_basic_rep[vpn[n]] * is_basic[n] + beta_dv_rep[vpn[n]] * dv[n]);
    } 
}
'''

#### Compiling

In [13]:
sm02 = pystan.StanModel(model_code=m02,verbose = False)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_430bf6b44e78bc5e7461815b80fab04b NOW.


##### Specifying the data

In [14]:
idx = (dat['timeout'] == 0)
response = (dat.loc[idx,['response']] == 0).to_numpy(dtype='int').squeeze()
is_full_energy = dat.loc[idx,['is_full_energy']].to_numpy(dtype='int').squeeze()
is_low_energy_LC = dat.loc[idx,['is_low_energy_LC']].to_numpy(dtype='int').squeeze()
is_low_energy_HC = dat.loc[idx,['is_low_energy_HC']].to_numpy(dtype='int').squeeze()
is_basic = dat.loc[idx,['is_basic']].to_numpy(dtype='int').squeeze()
vpn = dat.loc[idx,['vpn']].to_numpy().squeeze() - 100
N_subjects = len(np.unique(vpn))
dv = dat.loc[idx,['dv_planning']].to_numpy().squeeze()

dat_dict = {'N':len(response),         
            'response':response,
            'dv':dv,      
            'is_full_energy':is_full_energy ,
            'is_low_energy_LC':is_low_energy_LC,
            'is_low_energy_HC':is_low_energy_HC,
            'is_basic':is_basic,
            'N_subjects':N_subjects,
            'vpn':vpn
            } 

#### Sampling posterior

In [15]:
res_sm02 = sm02.sampling(data=dat_dict, iter=2000,  warmup=1000, thin=1, chains=4,control = dict(adapt_delta=0.95),seed=101, verbose = False);

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


#### Computing leave-one-out cross-validation information criterion (LOOIC) for model comparison 

In [16]:
idata_sm02 = az.from_pystan(posterior=res_sm02,log_likelihood='log_lik');
looic_sm02 = az.loo(idata_sm02,pointwise=True,scale='deviance')

# HM

#### Specifying the model

In [37]:
m03 = '''
data {
  int<lower=0> N;
  int<lower=0,upper=1> response[N];
  vector[N] dv;
  vector[N] is_basic_1;
  vector[N] is_basic_2;
  vector[N] is_basic_3;
  vector[N] is_basic_4;
  vector[N] is_23;
  vector[N] is_14;
  vector[N] is_full_energy;
  vector[N] is_low_energy_LC;
  vector[N] is_low_energy_HC;
  int<lower=0> N_subjects;
  int<lower = 1> vpn[N];  
}

parameters {
// hyper paramters 
  real mu_theta_basic_1;
  real mu_theta_basic_2;
  real mu_theta_basic_3;
  real mu_theta_basic_4;
  real mu_beta_dv_23;  
  real mu_beta_dv_14;  
  real<lower=0> sigma_theta_basic_1;
  real<lower=0> sigma_theta_basic_2;
  real<lower=0> sigma_theta_basic_3;
  real<lower=0> sigma_theta_basic_4;
  real<lower=0> sigma_beta_dv_23;
  real<lower=0> sigma_beta_dv_14;


// parameters
  vector[N_subjects] theta_basic_1;
  vector[N_subjects] theta_basic_2;
  vector[N_subjects] theta_basic_3;
  vector[N_subjects] theta_basic_4;
  real theta_full_energy;
  real theta_low_energy_LC;
  real theta_low_energy_HC;
  vector[N_subjects] beta_dv_23;
  vector[N_subjects] beta_dv_14;

}

model {
//hyper priors
  mu_theta_basic_1 ~ normal(0,2);
  mu_theta_basic_2 ~ normal(0,2);
  mu_theta_basic_3 ~ normal(0,2);
  mu_theta_basic_4 ~ normal(0,2);
  mu_beta_dv_23 ~ normal(0,2);
  mu_beta_dv_14 ~ normal(0,2);  
  sigma_theta_basic_1 ~ normal(0,2);
  sigma_theta_basic_2 ~ normal(0,2);
  sigma_theta_basic_3 ~ normal(0,2);
  sigma_theta_basic_4 ~ normal(0,2);
  sigma_beta_dv_23 ~ normal(0,2);
  sigma_beta_dv_14 ~ normal(0,2);

// priors
  theta_basic_1 ~ normal(mu_theta_basic_1,sigma_theta_basic_1);
  theta_basic_2 ~ normal(mu_theta_basic_2,sigma_theta_basic_2);
  theta_basic_3 ~ normal(mu_theta_basic_3,sigma_theta_basic_3);
  theta_basic_4 ~ normal(mu_theta_basic_4,sigma_theta_basic_4);
  theta_full_energy ~ normal(0,2);
  theta_low_energy_LC ~ normal(0,2);
  theta_low_energy_HC ~ normal(0,2);
  beta_dv_23 ~ normal(mu_beta_dv_23,sigma_beta_dv_23);  
  beta_dv_14 ~ normal(mu_beta_dv_14,sigma_beta_dv_14);  

// likelihood 
  response ~ bernoulli_logit(theta_full_energy * is_full_energy + theta_low_energy_LC * is_low_energy_LC + theta_low_energy_HC * is_low_energy_HC + theta_basic_1[vpn] .* is_basic_1 + theta_basic_2[vpn] .* is_basic_2 + theta_basic_3[vpn] .* is_basic_3 + theta_basic_4[vpn] .* is_basic_4 + beta_dv_23[vpn] .* dv .* is_23 + beta_dv_14[vpn] .* dv .* is_14);
}

generated quantities {
  vector[N] log_lik;
  vector[N] response_new;
  vector[N_subjects] theta_basic_1_rep;
  vector[N_subjects] theta_basic_2_rep;
  vector[N_subjects] theta_basic_3_rep;
  vector[N_subjects] theta_basic_4_rep;
  vector[N_subjects] beta_dv_rep_23;
  vector[N_subjects] beta_dv_rep_14;


// pointwise log-likelihood
  for (n in 1:N) {
    log_lik[n] = bernoulli_logit_lpmf(response[n]  |  (theta_full_energy * is_full_energy[n] + theta_low_energy_LC * is_low_energy_LC[n] + theta_low_energy_HC * is_low_energy_HC[n] + theta_basic_1[vpn[n]] * is_basic_1[n] + theta_basic_2[vpn[n]] * is_basic_2[n] + theta_basic_3[vpn[n]] * is_basic_3[n] + theta_basic_4[vpn[n]] * is_basic_4[n] + beta_dv_23[vpn[n]] * dv[n] * is_23[n] + beta_dv_14[vpn[n]] * dv[n] * is_14[n]));
    }

// posterior predictive simulation  
  for (n in 1:N_subjects){
    theta_basic_1_rep[n] = normal_rng(mu_theta_basic_1, sigma_theta_basic_1);
    theta_basic_2_rep[n] = normal_rng(mu_theta_basic_2, sigma_theta_basic_2);
    theta_basic_3_rep[n] = normal_rng(mu_theta_basic_3, sigma_theta_basic_3);
    theta_basic_4_rep[n] = normal_rng(mu_theta_basic_4, sigma_theta_basic_4);
    beta_dv_rep_23[n] = normal_rng(mu_beta_dv_23, sigma_beta_dv_23);
    beta_dv_rep_14[n] = normal_rng(mu_beta_dv_14, sigma_beta_dv_14);

    }  

  for (n in 1:N){
    response_new[n] = bernoulli_logit_rng(theta_full_energy * is_full_energy[n] + theta_low_energy_LC * is_low_energy_LC[n] + theta_low_energy_HC * is_low_energy_HC[n] + theta_basic_1_rep[vpn[n]] * is_basic_1[n] + theta_basic_2_rep[vpn[n]] * is_basic_2[n] + theta_basic_3_rep[vpn[n]] * is_basic_3[n] + theta_basic_4_rep[vpn[n]] * is_basic_4[n] + beta_dv_rep_23[vpn[n]] * dv[n] * is_23[n] + beta_dv_rep_14[vpn[n]] * dv[n] * is_14[n]);
    } 
}
'''

#### Compiling

In [38]:
sm03 = pystan.StanModel(model_code=m03,verbose = False)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_e96711c7616c1e7d3db6e6183c21a3cb NOW.


##### Specifying the data

In [39]:
idx = (dat['timeout'] == 0)
response = (dat.loc[idx,['response']] == 0).to_numpy(dtype='int').squeeze()
is_full_energy = dat.loc[idx,['is_full_energy']].to_numpy(dtype='int').squeeze()
is_low_energy_LC = dat.loc[idx,['is_low_energy_LC']].to_numpy(dtype='int').squeeze()
is_low_energy_HC = dat.loc[idx,['is_low_energy_HC']].to_numpy(dtype='int').squeeze()
is_basic_1 = dat.loc[idx,['is_basic_1']].to_numpy(dtype='int').squeeze()
is_basic_2 = dat.loc[idx,['is_basic_2']].to_numpy(dtype='int').squeeze()
is_basic_3 = dat.loc[idx,['is_basic_3']].to_numpy(dtype='int').squeeze()
is_basic_4 = dat.loc[idx,['is_basic_4']].to_numpy(dtype='int').squeeze()
is_14 = dat.loc[idx,['is_14']].to_numpy(dtype='int').squeeze()
is_23 = dat.loc[idx,['is_23']].to_numpy(dtype='int').squeeze()
vpn = dat.loc[idx,['vpn']].to_numpy().squeeze() - 100
N_subjects = len(np.unique(vpn))
dv = dat.loc[idx,['dv_planning']].to_numpy().squeeze()

dat_dict  = {'N':len(response),         
            'response':response,
            'dv':dv,      
            'is_full_energy':is_full_energy ,
            'is_low_energy_LC':is_low_energy_LC,
            'is_low_energy_HC':is_low_energy_HC,
            'is_basic_1':is_basic_1,
            'is_basic_2':is_basic_2,
            'is_basic_3':is_basic_3,
            'is_basic_4':is_basic_4,
            'is_23':is_23,
            'is_14':is_14,
            'N_subjects':N_subjects,
            'vpn':vpn
            } 

#### Sampling posterior

In [40]:
res_sm03 = sm03.sampling(data=dat_dict, iter=8000,  warmup=4000, thin=1, chains=4,control = dict(adapt_delta=0.99),seed=101, verbose = False);

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


#### Computing leave-one-out cross-validation information criterion (LOOIC) for model comparison 

In [41]:
idata_sm03 = az.from_pystan(posterior=res_sm03,log_likelihood='log_lik');
looic_sm03 = az.loo(idata_sm03,pointwise=True,scale='deviance')

  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
