# Data Extraction

In [16]:
import arviz as az
import numpy as np
import pymc3 as pm

time_after_injection = np.array([24, 32, 48, 56, np.nan, 70, 72, 75, 80, 96])
temperature = np.array([102.8, 104.5, 106.5, 107.0, 107.2, 105.1, 103.9, np.nan, 103.2, 102.1])

# Model

In [29]:
with pm.Model() as model:
    # Priors for unknown model parameters
    alpha = pm.Normal('alpha', mu=0, sigma=100)
    beta = pm.Normal('beta', mu=0, sigma=100)
    tau = pm.Gamma("tau", alpha=0.001, beta=0.001)

    # Impute missing 'x' data
    time_imputed = pm.Normal('time_imputed', mu=np.nanmean(time_after_injection), sigma=np.nanstd(time_after_injection), observed=time_after_injection)

    # Linear regression equation
    mu = alpha + beta * time_imputed

    # Likelihood (sampling distribution) of observations with missing 'y' data handled automatically
    likelihood = pm.Normal('likelihood', mu=mu, tau=tau, observed=temperature)

    # Inference
    trace = pm.sample(3000, target_accept=0.95)
    ppc = pm.sample_posterior_predictive(trace)
    inference_data = az.from_pymc3(trace=trace, posterior_predictive=ppc)

  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [likelihood_missing, time_imputed_missing, tau, beta, alpha]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 11 seconds.


# Show Statistics

In [31]:
az.summary(trace)

Got error No model on context stack. trying to find log_likelihood in translation.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha,105.858,2.205,101.452,109.855,0.034,0.024,4124.0,4479.0,1.0
beta,-0.02,0.035,-0.086,0.046,0.001,0.0,4017.0,4700.0,1.0
time_imputed_missing[0],55.899,22.14,16.327,98.775,0.266,0.191,6942.0,7539.0,1.0
likelihood_missing[0],104.366,2.47,99.689,109.126,0.031,0.022,6497.0,5214.0,1.0
tau,0.277,0.152,0.04,0.549,0.002,0.001,4802.0,5244.0,1.0


# Calculate R²

In [30]:
# Get temperatures where data is available.
observed_temps = temperature[~np.isnan(temperature)]

# Convert trace and ppc to ArviZ InferenceData and stack samples.
y_pred = inference_data.posterior_predictive.stack(sample=("chain", "draw"))["likelihood"].values.T

# Reshape y_pred and filter to match non-missing observed data.
y_pred_reshaped = y_pred.reshape(-1, temperature.shape[0])
non_missing_indices = np.where(~np.isnan(temperature))[0]
y_pred_filtered = y_pred_reshaped[:, non_missing_indices]

# Calculate and print Bayesian R-squared.
r2_score = az.r2_score(observed_temps, y_pred_filtered)
print(f'Bayesian R-squared:\n{r2_score}')


Bayesian R-squared:
r2        0.361737
r2_std    0.116677
dtype: float64


# Conslusion and Answers

### 1. Bayesian R²
    A linear regression with one predictor (time) gives relatively low Bayesian R²: 0.3617

### 2. Missing Data Estimators
    Missing time estimator (mean): 55.899 hours
    Missing temperature estimator (mean): 104.366 degrees F

### 3. Slope Credible Set Implications
    The 90% credible set for the slope (beta) is [-0.086, 0.046]. This range includes 0, implying that there is likely not a strong linear relationship between time after injection and temperature.