In [2]:
import numpy as np
import pandas as pd
import arviz as az
from cmdstanpy import CmdStanModel

In [3]:
def center(vals: np.ndarray) -> np.ndarray:
    return vals - np.nanmean(vals)


def standardize(vals: np.ndarray) -> np.ndarray:
    centered_vals = center(vals)
    return centered_vals / np.nanstd(centered_vals)

In [4]:
d = pd.read_csv('../../data/WaffleDivorce.csv', sep=';', header=0)
d['A'] = standardize(d['MedianAgeMarriage'])
d['D'] = standardize(d['Divorce'])
d['M'] = standardize(d['Marriage'])

In [5]:
divorce_code = '''
data{
    int<lower=1> N;
    int<lower=1> N_M;
    vector[N] D;
    vector[N_M] M;
    vector[N] A;
}

parameters{
    real a;
    real bM;
    real bA;
    real<lower=0> sigma;
    real aM;
    real bAM;
    real<lower=0> sigma_M;
}

model{
    vector[N_M] mu_M;
    sigma_M ~ exponential(1);
    bAM ~ normal(0, 0.5);
    aM ~ normal(0, 0.2);
    for ( i in 1:N_M ) {
        mu_M[i] = aM + bAM * A[i];
    }
    M ~ normal(mu_M, sigma_M);
    
    vector[N] mu;
    sigma ~ exponential(1);
    bA ~ normal(0, 0.5);
    bM ~ normal(0, 0.5);
    a ~ normal(0, 0.2);
    for ( i in 1:N ) {
        mu[i] = a + bM * M[i] + bA * A[i];
    }
    D ~ normal(mu, sigma);
}

generated quantities{
    vector[N_M] mu_M;
    vector[N] mu;
    for ( i in 1:N_M ) {
        mu_M[i] = aM + bAM * A[i];
    }
    for (i in 1:N) {
        mu[i] = a + bM * M[i] + bA * A[i];
    }
}

'''

In [9]:
stan_file = './stan_models/waffledivorce.stan'
with open(stan_file, 'w') as f:
    print(divorce_code, file=f)

stan_model = CmdStanModel(stan_file=stan_file)

20:56:07 - cmdstanpy - INFO - compiling stan file /Users/rehabnaeem/Documents/Coding-Projects/bayesian-analysis/references/Stan-Modelling/stan_models/waffledivorce.stan to exe file /Users/rehabnaeem/Documents/Coding-Projects/bayesian-analysis/references/Stan-Modelling/stan_models/waffledivorce
20:56:16 - cmdstanpy - INFO - compiled model executable: /Users/rehabnaeem/Documents/Coding-Projects/bayesian-analysis/references/Stan-Modelling/stan_models/waffledivorce


In [10]:
data = d[['A', 'M', 'D']].to_dict(orient='list')
data['N'] = len(data['A'])
data['N_M'] = len(data['M'])

In [16]:
fit = stan_model.sample(data=data)

21:03:31 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

21:03:32 - cmdstanpy - INFO - CmdStan done processing.
Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in 'waffledivorce.stan', line 28, column 4 to column 30)
Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in 'waffledivorce.stan', line 38, column 4 to column 26)
Consider re-running with show_console=True if the above output is unclear!





In [37]:
fit.summary(percentiles=(5.5, 94.5))

Unnamed: 0,Mean,MCSE,StdDev,MAD,5.5%,94.5%,ESS_bulk,ESS_tail,R_hat
lp__,-27.887000,0.049254,1.960530,1.775340,-31.462400,-25.444600,1708.53,2084.20,1.001090
a,0.000776,0.001288,0.099667,0.097259,-0.161443,0.159908,6046.80,2649.27,0.999834
bM,-0.058046,0.002694,0.157668,0.154681,-0.312126,0.189295,3490.08,2810.22,1.000650
bA,-0.604529,0.002757,0.160781,0.158365,-0.867456,-0.343911,3446.94,3009.61,1.001340
sigma,0.836453,0.001259,0.087902,0.086275,0.707221,0.987352,5195.01,2881.47,1.000880
...,...,...,...,...,...,...,...,...,...
mu[46],-0.175081,0.001661,0.115163,0.114572,-0.363443,0.007722,4898.73,2790.25,1.000440
mu[47],0.056541,0.001446,0.107605,0.106710,-0.117042,0.229302,5573.71,2945.99,0.999933
mu[48],0.486122,0.001863,0.139857,0.138142,0.265113,0.711012,5670.73,3265.24,1.001940
mu[49],-0.075030,0.002100,0.144548,0.139461,-0.306950,0.155971,4815.31,3481.04,1.000940


In [None]:
cmdstanpy_data = az.from_cmdstanpy(
    posterior=fit,
    observed_data={"D": d.D},
)
cmdstanpy_data

In [52]:
new_data = {'A': np.linspace(-2,2,30), 'N': 30, 'N_M': 30, 'M': np.zeros(30), 'D':np.zeros(30)}
new_quant = stan_model.generate_quantities(data=new_data, previous_fit=fit)

21:25:43 - cmdstanpy - INFO - Chain [1] start processing
21:25:43 - cmdstanpy - INFO - Chain [2] start processing
21:25:43 - cmdstanpy - INFO - Chain [3] start processing
21:25:43 - cmdstanpy - INFO - Chain [4] start processing
21:25:43 - cmdstanpy - INFO - Chain [1] done processing
21:25:43 - cmdstanpy - INFO - Chain [3] done processing
21:25:43 - cmdstanpy - INFO - Chain [2] done processing
21:25:43 - cmdstanpy - INFO - Chain [4] done processing
