In [None]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt

rng = np.random.default_rng(0)

In [None]:
near_zero = 1e-12

def min_max_scale(x, min=near_zero, max=1. - near_zero):
    x_scaled = (x - np.min(x, axis=0)) / (np.max(x, axis=0) - np.min(x, axis=0))
    x_scaled = x_scaled * (max - min) + min
    return x_scaled

In [None]:
gapminder_series = pd.read_csv("./data/gapminder_quality_of_life_2005_dataset.csv", index_col="country")
gapminder_data = gapminder_series.values

gapminder_data = min_max_scale(gapminder_data)

gapminder_data[:, 1] = 1. - gapminder_data[:, 1]
gapminder_data[:, 2] = 1. - gapminder_data[:, 2]

X = np.transpose(gapminder_data)

In [None]:
num_scores, dims = gapminder_data.shape

alpha_prior = np.repeat([.75], dims) + .25 * (rng.random(dims) - .5)
p_1_prior = np.repeat([.3125], dims) + .25 * (rng.random(dims) - .5)
p_2_prior = np.repeat([.6875], dims) + .25 * (rng.random(dims) - .5)
s_prior = np.mean(X, axis=0)

point_sigma_prior_mu = .1
score_sigma_prior_mu = .135
X_reconstruction_sigma_prior_mu = np.repeat(.125, dims)

sigma_prior_sigma = .1
sigma_prior_upper_bound_delta = .01

In [None]:
with pm.Model() as model:
    point_sigma = pm.TruncatedNormal(
        name="point_sigma", 
        mu=point_sigma_prior_mu, 
        sigma=sigma_prior_sigma, 
        lower=near_zero, 
        upper=point_sigma_prior_mu + sigma_prior_upper_bound_delta - near_zero
        )
    score_sigma = pm.TruncatedNormal(
        name="score_sigma", 
        mu=score_sigma_prior_mu,
        sigma=sigma_prior_sigma, 
        lower=near_zero, 
        upper=score_sigma_prior_mu + sigma_prior_upper_bound_delta - near_zero
        )
    X_reconstruction_sigma = pm.TruncatedNormal(
        name="X_reconstruction_sigma", 
        mu=X_reconstruction_sigma_prior_mu,
        sigma=sigma_prior_sigma,
        lower=near_zero, 
        upper=X_reconstruction_sigma_prior_mu + sigma_prior_upper_bound_delta - near_zero
        )
    alpha = pm.Beta(
        name="alpha", 
        mu=alpha_prior, 
        sigma=point_sigma
        )
    p_0 = pm.Deterministic(
        name="p_0", var=.5 * (1. - alpha)
        )
    p_1 = pm.Beta(
        name="p_1", 
        mu=p_1_prior, 
        sigma=point_sigma
        )
    p_2 = pm.Beta(
        name="p_2", 
        mu=p_2_prior, 
        sigma=point_sigma
        )
    p_3 = pm.Deterministic(
        name="p_3", var=.5 * (1. + alpha)
        )
    s = pm.Beta(
        name="s", 
        mu=s_prior, 
        sigma=score_sigma
        )

    Z = pm.math.stack(
        [pt.ones((num_scores,)),
         s,
         s ** 2.,
         s ** 3.],
         axis=0
    )
    M = pt.as_tensor(
        [[1., -3.,  3., -1.], 
         [0.,  3., -6.,  3.],
         [0.,  0.,  3., -3.], 
         [0.,  0.,  0.,  1.]]
        )
    P = pm.math.stack(
        [pt.transpose(p_0), 
         pt.transpose(p_1), 
         pt.transpose(p_2), 
         pt.transpose(p_3)], 
        axis=1
        )
    X_reconstruction_mu = pm.math.matmul(pm.math.matmul(P, M), Z)

    for i in range(dims):
        X_reconstruction = pm.Beta(
            name=f"X_reconstruction_{i}", 
            mu=X_reconstruction_mu[i, :], 
            sigma=X_reconstruction_sigma[i],
            observed=X[i, :]
            )

In [None]:
pm.model_to_graphviz(model)

In [None]:
with model:
    predictions = pm.sample_prior_predictive(samples=1000, random_seed=rng)

az.plot_ppc(predictions, group="prior")

In [None]:
initvals = {
    "point_sigma": point_sigma_prior_mu,
    "score_sigma": score_sigma_prior_mu,
    "X_reconstruction_sigma": X_reconstruction_sigma_prior_mu,
    "alpha": alpha_prior,
    "p_1": p_1_prior,
    "p_2": p_2_prior,
    "s": s_prior
    }

with model:
    idata1 = pm.sample(
        nuts_sampler="pymc",
        nuts_sampler_kwargs={"chain_method": "vectorized"},
        target_accept=.99,
        random_seed=rng,
        tune=1000,
        draws=1000,
        chains=4,
        cores=4,
        initvals=initvals
    )

In [None]:
with model:
    idata1 = pm.sample_posterior_predictive(
        idata1, extend_inferencedata=True, random_seed=rng)

In [None]:
date_time = "2024-05-19_0945"
az.to_netcdf(idata1, f"./results/rpc_pymc_{date_time}.nc")

In [None]:
ppc_plot = az.plot_ppc(idata1)
plt.savefig(f'./results/rpc_pymc_{date_time}_ppc_plot.png')

In [None]:
alpha_trace_plot = az.plot_trace(idata1, var_names=['alpha'])
fig = alpha_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_alpha_trace_plot.png')

In [None]:
p_0_trace_plot = az.plot_trace(idata1, var_names=['p_0'])
fig = p_0_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_p_0_trace_plot.png')

In [None]:
p_1_trace_plot = az.plot_trace(idata1, var_names=['p_1'])
fig = p_1_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_p_1_trace_plot.png')

In [None]:
p_2_trace_plot = az.plot_trace(idata1, var_names=['p_2'])
fig = p_2_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_p_2_trace_plot.png')

In [None]:
p_3_trace_plot = az.plot_trace(idata1, var_names=['p_3'])
fig = p_3_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_p_3_trace_plot.png')

In [None]:
point_sigma_trace_plot = az.plot_trace(idata1, var_names=['point_sigma'])
fig = point_sigma_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_point_sigma_trace_plot.png')

In [None]:
score_sigma_trace_plot = az.plot_trace(idata1, var_names=['score_sigma'])
fig = score_sigma_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_score_sigma_trace_plot.png')

In [None]:
X_reconstruction_sigma_trace_plot = az.plot_trace(idata1, var_names=['X_reconstruction_sigma'])
fig = X_reconstruction_sigma_trace_plot.flatten()[0].get_figure()
fig.savefig(f'./results/rpc_pymc_{date_time}_X_reconstruction_sigma_trace_plot.png')

In [None]:
posterior = idata1.posterior.stack(sample=("chain", "draw"))
plt.hist(posterior["s"][0], 25, alpha=0.2, color='k')
plt.savefig(f'./results/rpc_pymc_{date_time}_s_hist_plot.png')

In [None]:
posterior_s = idata1.posterior['s']
mean_score = posterior_s.mean(('chain', 'draw'))
y = np.linspace(0, 1, len(mean_score))
hdi = az.hdi(posterior_s).sortby(mean_score)
plt.plot(mean_score.sortby(mean_score), y)
plt.fill_betweenx(y, hdi['s'].values[:, 0], hdi['s'].values[:, 1], alpha=0.3)
plt.savefig(f'{date_time}_scores_hdi_plot.png')

In [None]:
scores_mean = np.mean(posterior["s"], axis=1)
gapminder_series.insert(4, "score", scores_mean)
# gapminder_series["score"] = scores
gapminder_series

In [None]:
gapminder_series.sort_values("score", inplace=True, ascending=False)
gapminder_series

In [None]:
gapminder_series.to_csv(f"./data/gapminder_quality_of_life_{date_time}_with_scores.csv")