In [42]:
# Ruochong Liu's hw6q1
import arviz as az
import numpy as np
import pymc as pm

x = [24, 32, 48, 56, np.nan, 70, 72, 75, 80, 96]
y = [102.8, 104.5, 106.5, 107.0, 107.1, 105.1, 103.9, np.nan, 103.2, 102.1]

# create masked data
y = y.copy()
y = np.nan_to_num(y, nan=-1)
y = np.ma.masked_values(y, value=-1)

x = x.copy()
x = np.nan_to_num(x, nan=-1)
x = np.ma.masked_values(x, value=-1)

with pm.Model() as m:

    tau = pm.Gamma("tau", alpha=0.001, beta=0.001)
    beta0 = pm.Normal("beta0", mu=0, tau=0.001)
    beta1 = pm.Normal("beta1", mu=0, tau=0.001)
    beta2 = pm.Normal("beta2", mu=0, tau=0.001)
    variance = 1/tau
    
    # tried a bunch of different imputed priors here but no luck
    x_imputed = pm.TruncatedNormal("x_imputed", 61, 23, lower=0, observed=x)
    # x_imputed = pm.Uniform("x_imputed",lower=0,upper=200,observed=x)

    mu = beta0 + beta1 * x_imputed + beta2 * pm.math.sqr(x_imputed)

    likelihood = pm.Normal("likelihood", mu=mu, tau=tau, observed=y, shape=y.shape[0])
    
    # Bayesian R2 from fat1.odc (U
    sse = (10 - 3) * variance
    cy = y - y.mean()
    sst = pm.math.dot(cy, cy)
    br2 = pm.Deterministic("br2", 1 - sse/sst)
    

    trace = pm.sample(
        10000,
        tune=2000,
        cores=4,
        init="jitter+adapt_diag",
        random_seed=20,
        step=[pm.NUTS(target_accept=0.9)],
    )


Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, beta0, beta1, beta2, x_imputed_missing, likelihood_missing]


Sampling 4 chains for 2_000 tune and 2_807 draw iterations (8_000 + 11_228 draws total) took 51 seconds.


In [41]:
az.summary(trace, hdi_prob=.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,96.397,3.273,89.637,102.548,0.038,0.027,7960.0,7311.0,1.0
beta1,0.354,0.119,0.119,0.588,0.001,0.001,7732.0,7118.0,1.0
beta2,-0.003,0.001,-0.005,-0.001,0.0,0.0,8126.0,7630.0,1.0
likelihood_missing[0],105.04,1.51,102.105,108.12,0.012,0.008,16902.0,14275.0,1.0
tau,0.813,0.477,0.066,1.728,0.01,0.009,3294.0,1490.0,1.0
x_imputed_missing[0],56.723,11.477,34.379,78.682,0.095,0.067,14487.0,13988.0,1.0
br2,0.533,0.509,-0.182,0.915,0.006,0.004,3294.0,1490.0,1.0


In [39]:
ppc = pm.sample_posterior_predictive(trace, model=m)

In [38]:
ppc

In [40]:
# get the mean y_pred across all chains
y_pred = np.array(ppc.posterior_predictive.likelihood.mean(axis=(0, 1)))

az.r2_score(y, y_pred)

r2        0.066541
r2_std    0.000000
dtype: float64

In [45]:
# Hanbit Kim's hw6q1
import arviz as az
import numpy as np
import pymc as pm

x = [24, 32, 48, 56, np.nan, 70, 72, 75, 80, 96]
y = [102.8, 104.5, 106.5, 107.0, 107.1, 105.1, 103.9, np.nan, 103.2, 102.1]

# create masked data
y = y.copy()
y = np.nan_to_num(y, nan=-1)
y = np.ma.masked_values(y, value=-1)

x = x.copy()
x = np.nan_to_num(x, nan=-1)
x = np.ma.masked_values(x, value=-1)

with pm.Model() as m:

    tau = pm.Gamma("tau", alpha=0.001, beta=0.001)
    beta0 = pm.Normal("beta0", mu=0, tau=0.001)
    beta1 = pm.Normal("beta1", mu=0, tau=0.001)
    beta2 = pm.Normal("beta2", mu=0, tau=0.001)
    variance = 1/tau
    
    #x_imputed = pm.TruncatedNormal("x_imputed", 61, 23, lower=0, observed=x)
    x_imputed = pm.Uniform("x_imputed",lower=24,upper=100, observed=x)

    mu = beta0 + beta1 * x_imputed + beta2 * x_imputed**2

    likelihood = pm.Normal("likelihood", mu=mu, tau=tau, observed=y)
    
    # Bayesian R2 from fat1.odc (U
    sse = (10 - 3) * variance
    cy = y - y.mean()
    sst = pm.math.dot(cy, cy)
    br2 = pm.Deterministic("br2", 1 - sse/sst)
    

    trace = pm.sample(
        10000,
        tune=2000,
        cores=4,
        init="jitter+adapt_diag",
        random_seed=20,
        step=[pm.NUTS(target_accept=0.9)],
    )
    ppc = pm.sample_posterior_predictive(trace)

az.summary(trace, hdi_prob=.95)

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, beta0, beta1, beta2, x_imputed_missing, likelihood_missing]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 127 seconds.
There were 10 divergences after tuning. Increase `target_accept` or reparameterize.
There were 86 divergences after tuning. Increase `target_accept` or reparameterize.
There were 159 divergences after tuning. Increase `target_accept` or reparameterize.
There were 406 divergences after tuning. Increase `target_accept` or reparameterize.


Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,96.54,3.24,89.964,102.819,0.036,0.026,8090.0,9976.0,1.0
beta1,0.349,0.118,0.12,0.591,0.001,0.001,7793.0,9198.0,1.0
beta2,-0.003,0.001,-0.005,-0.001,0.0,0.0,8074.0,9526.0,1.0
likelihood_missing[0],105.049,1.519,102.06,108.152,0.011,0.008,20444.0,15300.0,1.0
tau,0.805,0.476,0.077,1.751,0.009,0.008,3737.0,1705.0,1.0
x_imputed_missing[0],55.613,12.751,30.423,79.948,0.099,0.07,16078.0,12754.0,1.0
br2,0.526,0.5,-0.226,0.915,0.006,0.004,3737.0,1705.0,1.0
