# Title: Uncertainty Quantification and Model Evaluation in Atmospheric Data


Description: Hands-on Python notebook for quantifying uncertainty, propagating errors, evaluating models, and visualizing prediction confidence.


In [None]:
# =======================
# 1. Import Required Libraries
# =======================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

In [None]:
# =======================
# 2. Simulate Observed and Predicted Atmospheric Data
# =======================
# Observed ozone measurements (ppb)
y_obs = np.array([50.2, 49.8, 51.1, 50.5, 50.0])
# Predicted values from a model
y_pred = np.array([50.0, 50.2, 50.8, 50.3, 49.9])

# Uncertainty in observations
y_unc = np.array([0.3, 0.25, 0.4, 0.3, 0.35])

In [None]:
# =======================
# 3. Measurement Uncertainty
# =======================
mean_val = np.mean(y_obs)
std_val = np.std(y_obs, ddof=1)  # sample standard deviation
conf_interval = 1.96 * std_val / np.sqrt(len(y_obs))  # 95% confidence interval

print(f"Observed mean: {mean_val:.2f} ± {conf_interval:.2f} ppb (95% CI)")


In [None]:
# =======================
# 4. Model Evaluation Metrics
# =======================
mae = mean_absolute_error(y_obs, y_pred)
rmse = np.sqrt(mean_squared_error(y_obs, y_pred))
r2 = r2_score(y_obs, y_pred)

print(f"MAE: {mae:.3f}, RMSE: {rmse:.3f}, R²: {r2:.3f}")

In [None]:
# =======================
# 5. Cross-Validation Example
# =======================
# Simulate predictor variables for multivariate regression
np.random.seed(42)
X = np.random.rand(100, 3)
y = X @ np.array([1.5, -2.0, 0.5]) + np.random.normal(0, 0.5, 100)

model = LinearRegression()
scores = cross_val_score(model, X, y, cv=5, scoring='r2')
print("Cross-validated R² scores:", scores)
print("Mean R²:", scores.mean())

In [None]:
# =======================
# 6. Monte Carlo Simulation for Prediction Uncertainty
# =======================
n_sim = 1000
predictions = []

for _ in range(n_sim):
    X_perturbed = X + np.random.normal(0, 0.1, X.shape)
    model.fit(X_perturbed, y)
    predictions.append(model.predict(X_perturbed[0].reshape(1,-1))[0])

predictions = np.array(predictions)
mean_pred = predictions.mean()
ci_lower = np.percentile(predictions, 2.5)
ci_upper = np.percentile(predictions, 97.5)

print(f"Monte Carlo Prediction: {mean_pred:.3f} [{ci_lower:.3f}, {ci_upper:.3f}] 95% CI")

In [None]:
# =======================
# 7. Visualizing Measurement and Prediction Uncertainty
# =======================
time = np.arange(len(y_obs))

plt.figure(figsize=(8,6))
# Plot observed data with uncertainty
plt.errorbar(time, y_obs, yerr=y_unc, fmt='o', label='Observed ± uncertainty')
# Plot predicted values
plt.plot(time, y_pred, 'r-', label='Predicted')
# Shaded prediction interval
plt.fill_between(time, y_pred-0.5, y_pred+0.5, color='red', alpha=0.2, label='Prediction interval')
plt.xlabel('Time')
plt.ylabel('O3 [ppb]')
plt.title('Observations and Model Predictions with Uncertainty')
plt.legend()
plt.show()

In [None]:
# =======================
# 8. Summary
# =======================
print("""
Summary:
- Quantified measurement uncertainty using standard deviation and confidence intervals.
- Evaluated model performance using MAE, RMSE, and R².
- Applied cross-validation for robust model evaluation.
- Used Monte Carlo simulation to propagate input uncertainty and generate prediction intervals.
- Visualized both measurement and prediction uncertainty for atmospheric observations.
""")