# Model calibration via MCMC sampling

This notebook implements Metropolis-Hastings MCMC to infer posterior distributions of wall decay coefficients in the Bristol Water Field Lab. It employs a GP surrogate model in place of the (expensive) EPANET water quality simulator (see `04-GP_surrogate_modelling.ipynb`) and field data organized in `01-data-period-features.ipynb`.

In [2]:
import pandas as pd
import numpy as np
import joblib
from scipy.stats import truncnorm, triang, uniform, norm
from tqdm.notebook import tqdm

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

from bayesian_wq_calibration.constants import TIMESERIES_DIR, RESULTS_DIR
# from bayesian_wq_calibration.calibration import

### Load data

Load operational data for selected sensing period.

In [11]:
data_period = 18 # 20 calibration events (as at 30 October 2024)
wq_sensors_used = 'kiosk only' # 'kiosk only', 'kiosk + hydrant'
demand_resolution = 'wwmd' # 'dma', 'wwmd'

try:
    wq_df = pd.read_csv(TIMESERIES_DIR / f"processed/{str(data_period).zfill(2)}-wq.csv", low_memory=False)
    cl_df = wq_df[wq_df['data_type'] == 'chlorine']
    
    source_ids = ['BW1', 'BW4']
    kiosk_ids = ['BW1', 'BW2', 'BW4', 'BW5', 'BW9', 'BW12']
    
    if wq_sensors_used == 'kiosk only':
        cl_df = cl_df[(cl_df['bwfl_id'].isin(kiosk_ids)) & (~cl_df['bwfl_id'].isin(source_ids))]
    else:
        cl_df = cl_df[~cl_df['bwfl_id'].isin(source_ids)]
    
except FileNotFoundError:
    print(f"Data period {data_period} does not exist.")

Set modelling parameters.

In [13]:
sim_days = 5 # must match number of days used to train GP model
time_steps = sim_days * 24 * 4
time_range = range(time_steps)
datetime = cl_df['datetime'].unique()[time_range]

sensor_names = cl_df['bwfl_id'].unique()