In [1]:
import numpy as np
import pandas as pd
import arviz as az
from cmdstanpy import CmdStanModel

In [2]:
def center(vals: np.ndarray) -> np.ndarray:
    return vals - np.nanmean(vals)


def standardize(vals: np.ndarray) -> np.ndarray:
    centered_vals = center(vals)
    return centered_vals / np.nanstd(centered_vals)

In [3]:
d = pd.read_csv('../../data/WaffleDivorce.csv', sep=';', header=0)
d['A'] = standardize(d['MedianAgeMarriage'])
d['D'] = standardize(d['Divorce'])
d['M'] = standardize(d['Marriage'])

In [6]:
divorce_code = '''
data{
    int<lower=1> N;
    int<lower=1> N_M;
    vector[N] D;
    vector[N_M] M;
    vector[N] A;
}
parameters{
    real a;
    real bM;
    real bA;
    real<lower=0> sigma;
    real aM;
    real bAM;
    real<lower=0> sigma_M;
}
model{
    vector[N_M] mu_M;
    vector[N] mu;
    sigma_M ~ exponential( 1 );
    bAM ~ normal( 0 , 0.5 );
    aM ~ normal( 0 , 0.2 );
    for ( i in 1:N_M ) {
        mu_M[i] = aM + bAM * A[i];
    }
    M ~ normal( mu_M , sigma_M );
    sigma ~ exponential( 1 );
    bA ~ normal( 0 , 0.5 );
    bM ~ normal( 0 , 0.5 );
    a ~ normal( 0 , 0.2 );
    for ( i in 1:N ) {
        mu[i] = a + bM * M[i] + bA * A[i];
    }
    D ~ normal( mu , sigma );
}
generated quantities{
    vector[N_M] mu_M;
    vector[N] mu;
    for ( i in 1:N_M ) {
        mu_M[i] = aM + bAM * A[i];
    }
    for ( i in 1:N ) {
        mu[i] = a + bM * M[i] + bA * A[i];
    }
}

'''

stan_file = './stn/waffledivorce.stan'
with open(stan_file, 'w') as f:
    print(divorce_code, file=f)

stan_model = CmdStanModel(stan_file=stan_file, force_compile=True)

14:08:29 - cmdstanpy - INFO - compiling stan file /Users/rehabnaeem/Documents/Coding-Projects/bayesian-analysis/references/Stan-Modelling/stn/waffledivorce.stan to exe file /Users/rehabnaeem/Documents/Coding-Projects/bayesian-analysis/references/Stan-Modelling/stn/waffledivorce
14:08:39 - cmdstanpy - INFO - compiled model executable: /Users/rehabnaeem/Documents/Coding-Projects/bayesian-analysis/references/Stan-Modelling/stn/waffledivorce


In [11]:
data = d[['A', 'M', 'D']].to_dict(orient='list')
data['N'] = len(data['A'])
data['N_M'] = len(data['M'])

In [13]:
model_sample = stan_model.sample(data=data)

14:11:45 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

14:11:45 - cmdstanpy - INFO - CmdStan done processing.
Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in 'waffledivorce.stan', line 35, column 4 to column 29)
Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in 'waffledivorce.stan', line 35, column 4 to column 29)
Consider re-running with show_console=True if the above output is unclear!





In [None]:
cmdstanpy_data = az.from_cmdstanpy(
    posterior=model_sample,
    observed_data={"D": d.D},
)

