In [None]:
!pip -q install "pymc>=5" arviz pytensor

import os
import numpy as np
import pandas as pd
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt

np.random.seed(123)
az.style.use("arviz-whitegrid")

csv_path = "date_colesterol.csv"
if not os.path.exists(csv_path):
    from google.colab import files
    uploaded = files.upload()
    if len(uploaded) == 0:
        raise FileNotFoundError("Nu am gasit date_colesterol.csv si nu s-a incarcat niciun fisier.")
    csv_path = list(uploaded.keys())[0]

df = pd.read_csv(csv_path)

cols = list(df.columns)
if len(cols) < 2:
    raise ValueError("CSV trebuie sa aiba cel putin 2 coloane: ore_exercitiu si colesterol.")
x = df[cols[0]].to_numpy(dtype=float)
y = df[cols[1]].to_numpy(dtype=float)

x = x - x.mean()

K_list = [3, 4, 5]
idatas = {}

# Subpunctul 1
for K in K_list:
    with pm.Model() as model:
        p = pm.Dirichlet("p", a=np.ones(K))

        alpha = pm.Normal(
            "alpha",
            mu=np.linspace(y.min(), y.max(), K),
            sigma=20,
            shape=K,
            transform=pm.distributions.transforms.ordered,
        )
        beta = pm.Normal("beta", mu=0, sigma=10, shape=K)
        gamma = pm.Normal("gamma", mu=0, sigma=10, shape=K)
        sigma = pm.HalfNormal("sigma", sigma=20, shape=K)

        x_data = pm.Data("x", x)
        mu = alpha + beta * x_data[:, None] + gamma * (x_data**2)[:, None]

        y_obs = pm.NormalMixture("y_obs", w=p, mu=mu, sigma=sigma, observed=y)

        idata = pm.sample(
            draws=1500,
            tune=2500,
            chains=4,
            target_accept=0.95,
            random_seed=123,
            return_inferencedata=True,
            idata_kwargs={"log_likelihood": True},
        )

    idatas[str(K)] = idata

    print("\n=======================")
    print(f"K = {K}")
    print(az.summary(idata, var_names=["p", "alpha", "beta", "gamma", "sigma"]))

# Subpunctul 2
cmp_waic = az.compare(idatas, ic="waic", scale="deviance", method="BB-pseudo-BMA")
cmp_loo  = az.compare(idatas, ic="loo",  scale="deviance", method="BB-pseudo-BMA")

print("\n\n=== Comparatie WAIC (mai mic e mai bun) ===")
print(cmp_waic)

print("\n\n=== Comparatie LOO (mai mic e mai bun) ===")
print(cmp_loo)

best_waic = cmp_waic.index[0]
best_loo = cmp_loo.index[0]
print(f"\nBest dupa WAIC: K = {best_waic}")
print(f"Best dupa LOO : K = {best_loo}")

az.plot_compare(cmp_waic)
plt.show()

az.plot_compare(cmp_loo)
plt.show()

for K in K_list:
    idata = idatas[str(K)]
    az.plot_trace(idata, var_names=["p", "alpha", "beta", "gamma", "sigma"])
    plt.show()


Output()

ERROR:pymc.stats.convergence:There were 1261 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc.stats.convergence:The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details



K = 3
             mean      sd   hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
p[0]        0.326   0.084    0.158    0.414      0.040    0.023       6.0   
p[1]        0.308   0.048    0.234    0.388      0.019    0.004       7.0   
p[2]        0.365   0.078    0.242    0.522      0.036    0.018       5.0   
alpha[0]  182.454   1.709  180.495  186.204      0.768    0.480       7.0   
alpha[1]  204.377  10.200  186.356  216.268      5.055    2.698       5.0   
alpha[2]  219.526   2.070  215.521  222.331      0.914    0.374       6.0   
beta[0]    -4.395   2.361   -8.968   -2.716      1.167    0.686       7.0   
beta[1]    -1.804   3.106   -5.084    2.825      1.545    0.494       5.0   
beta[2]    -4.493   1.865   -7.652   -2.545      0.918    0.330       5.0   
gamma[0]    0.473   0.488    0.082    1.444      0.239    0.142       7.0   
gamma[1]    0.278   0.606   -0.370    1.316      0.299    0.130       5.0   
gamma[2]   -0.009   0.613   -0.956    0.726      0.302    0.088      

Output()