In [None]:
# If running from Colab, please uncomment and run the following cell to get summer installed.
#!pip install estival[pymc,nevergrad]==0.5.1 summerepi2==1.3.5

In [None]:
# Basic imports for summer2 compartmental modelling
import numpy as np
import pandas as pd
from datetime import datetime
from summer2 import CompartmentalModel
from summer2.parameters import Parameter
pd.options.plotting.backend = 'plotly'

## Model construction and execution
The following cells constitute the epidemiological analysis of this notebook.
The code is intended to be very simple and adaptable.
Please feel free to adjust any of the data sources or parameters,
which is the purpose of this notebook.
### Country selection
First choose your country using it's ISO code,
with options only available for Malaysia, the Philippines and Vietnam.
This will determine the Our World in Data (OWID) case data
that can be used to compare your model results against.
You can use our approximate estimate of the total population of the country,
or replace the value as per your preference.
### Dates
Feel free to edit the datetime objects that specify
the start and end dates for the analysis, the reference ("zero") date
and the left limit for the x-axis in the final plot.
### Model
The model is a parameterised summer2 SEIR model
with frequency-dependent transmission and
partial observation of incidence (termed notifications).
### Results
The SEIR model provides a poor-to-moderate fit to the target data
for the Omicron waves in each of the three countries.
Please adjust parameters and dates to achieve a better fit for your country.

In [None]:
iso = 'MYS'

In [None]:
cases_data = pd.read_csv('https://github.com/monash-emu/wpro_working/raw/main/data/new_cases.csv', index_col=0)[iso]
cases_data.index = pd.to_datetime(cases_data.index)
approx_pops = {
    'MYS': 33e6,
    'PHL': 114e6,
    'VNM': 97e6,
}

In [None]:
analysis_start_date = datetime(2022, 1, 1)
analysis_end_date = datetime(2022, 6, 1)
epi_model = CompartmentalModel(
    [analysis_start_date, analysis_end_date],
    ['susceptible', 'exposed', 'infectious', 'recovered'],
    ['infectious'],
    ref_date=datetime(2019, 12, 31),
)
epi_model.add_infection_frequency_flow('infection', Parameter('contact_rate'), 'susceptible', 'exposed')
epi_model.add_transition_flow('progression', 1.0 / Parameter('incubation_period'), 'exposed', 'infectious')
epi_model.add_transition_flow('recovery', 1.0 / Parameter('infectious_period'), 'infectious', 'recovered')
epi_model.set_initial_population({'susceptible': approx_pops[iso], 'infectious': Parameter('initial_infected')})
incidence = epi_model.request_output_for_flow('incidence', 'progression', save_results=False)
epi_model.request_function_output('notifications', incidence * Parameter('detection_prop'));

In [None]:
parameters = {
    'contact_rate': 0.5,
    'incubation_period': 5.0,
    'infectious_period': 5.0,
    'detection_prop': 0.07,
    'initial_infected': 100000.0,
}
epi_model.run(parameters)

In [None]:
plot_start_date = datetime(2021, 10, 1)
comparison_df = pd.concat([epi_model.get_derived_outputs_df(), cases_data])
comparison_df.columns = ['modelled', 'reported']
comparison_plot = comparison_df.plot()
comparison_plot.update_xaxes(range=(plot_start_date, analysis_end_date))

## Model calibration

### Import calibration tools and create calibration functions

In [None]:
# Estival is our calibration/optimization framework - for connecting models and parameters to data
# The following imports are the 'building blocks' of estival models

# Targets represent data we are trying to fit to
from estival import targets as est

# We specify parameters using (Bayesian) priors
from estival import priors as esp

# Finally we combine these with our summer2 model in a BayesianCompartmentalModel (BCM)
from estival.model import BayesianCompartmentalModel

In [None]:
# import nevergrad tool using estival. nevergrad is a python package for optimisation
from estival.wrappers.nevergrad import optimize_model

# PyMC imports - PyMC is a widely used probabilistic programming framework for Python
from estival.wrappers import pymc as epm
import pymc as pm

# This is required for pymc parallel evaluation in notebooks
import multiprocessing as mp
import platform

if platform.system() != "Windows":
    mp.set_start_method('forkserver', True)

In [None]:
def get_calibration_model(model, data):
    """
    Creates a calibration model object using the estival package. This object belongs to the BayesianCompartmentalModel class of estival.

    Args:
        model: the model we are calibrating
        data: the data used for calibration
    """

    # Describe a 'target'; some observed data against which the model will be evaluated
    # (and a description of how this evaluation will be performed)
    targets = [
        est.TruncatedNormalTarget("notifications", data, (0.0,np.inf), 3000.)
    ]

    # Describe priors for the calibrated parameters - the ranges (and statistical distributions) of what we believe
    # the parameters might (or should) be
    priors = [
        esp.UniformPrior("contact_rate", (0.01,1.0)),
        esp.UniformPrior("detection_prop", (0.01,0.5)),
    ]

    fixed_parameters = {
        'incubation_period': 5.0,
        'infectious_period': 5.0,
        'initial_infected': 100000.0,
    }

    # The BayesianCompartmentalModel class is the primary entry point to all optimization and calibration
    # methods in estival
    # It takes a CompartmentalModel object, default parameters, priors, and targets
    # The default parameters will be used as fixed values when no prior is specified for a given parameter
    bcm = BayesianCompartmentalModel(model, fixed_parameters, priors, targets)

    return bcm


def calibrate_model_with_optimisation(bcm):
    """
    This function performs a model calibration using optimisation. 
    Calibration is performed using the estival package, which implements a wrapper for optimisation methods provided by the nevergrad package. 

    Args:
        bcm: the calibration model object (type BayesianCompartmentalModel) 
    """
    # create a nevergrad optimisation runner
    from nevergrad.optimization.differentialevolution import TwoPointsDE
    orunner = optimize_model(bcm, opt_class=TwoPointsDE, num_workers=4, budget=4000)
    # perform optimisation, allowing for up to 1000 model evaluations
    rec = orunner.minimize(4000)
    # retrieve optimised parameter values
    optimised_params = rec.value[1]    
   
    return optimised_params


def calibrate_model_with_sampling(bcm):
    """
    This function performs a model calibration using Bayesian sampling. 
    Calibration is performed using the estival package, which implements a wrapper for sampling methods provided by the PyMC package. 

    Args:
        bcm: the calibration model object (type BayesianCompartmentalModel) 
    """

    with pm.Model() as model:
        # This is all you need - a single call to use_model
        variables = epm.use_model(bcm)

        # Now call a sampler using the variables from use_model
        # In this case we use the Differential Evolution Metropolis(Z) sampler
        # See the PyMC docs for more details
        idata = pm.sample(step=[pm.DEMetropolisZ(variables)], draws=2000, tune=2000,cores=4,chains=4)
    
    return idata

### Create a calibration model object, which encapsulates our model and the data used for calibration

In [None]:
# Select a subset of the data which will be used for model evaluation during calibration
cases_data_select = cases_data["jan 2022":"may 2022"]
cases_data_select.plot()

In [None]:
bcm = get_calibration_model(model=epi_model, data=cases_data_select)

### Perform calibration using parameter optimisation

In [None]:
optimised_params = calibrate_model_with_optimisation(bcm)
optimised_params

In [None]:
# run the modle with the optimised parameter set
res = bcm.run(optimised_params)

In [None]:
# plot optimal model fit
plot_start_date = datetime(2022, 1, 1)
comparison_df = pd.concat([res.derived_outputs, cases_data])
comparison_df.columns = ['modelled', 'reported']
comparison_plot = comparison_df.plot()
comparison_plot.update_xaxes(range=(plot_start_date, analysis_end_date))

### Perform calibration using parameter sampling

In [None]:
idata = calibrate_model_with_sampling(bcm=bcm)

In [None]:
idata.posterior.to_dataframe()

In [None]:
# Arviz is a library for examining MCMC outputs and producing plots/summary statistics/wrangling data
# It supports a variety of frameworks (including PyMC, which we use above for producing our samples)
import arviz as az

az.plot_pair(idata)

In [None]:
# Run the model for all parameter samples
# Estival includes a variety of tools for working with parameter sets
from estival.sampling import tools as esamptools
mres = esamptools.model_results_for_samples(idata, bcm)

# Note that we have run the model for all the samples here. In practice: we often select a subset of samples.

In [None]:
# Compare model output against data for a single selected sample.
sample_number = 220 

plot_start_date = datetime(2021, 10, 1)
comparison_plot = pd.DataFrame({
    "modelled": mres.results['notifications', 0, sample_number],
    "reported": cases_data
}).plot()
comparison_plot.update_xaxes(range=(plot_start_date, analysis_end_date))

In [None]:
# Plot the uncertainty quantiles of the above sampled model results, against case data
pd.options.plotting.backend="matplotlib"
esamptools.quantiles_for_results(mres.results,[0.025,0.25,0.5,0.75,0.975])["notifications"].plot()
cases_data_select.plot(style='.',color='black')
pd.options.plotting.backend="plotly"

#### Exploring more details about the parameter samples

In [None]:
# Get summary stats for the calibration
# These are useful to give a rough assessment of the quality of the outputs
az.summary(idata)

In [None]:
# Plot the traces (the values of the parameters at each sampling iteration)
az.plot_trace(idata, figsize=(16,3.2*len(idata.posterior)),compact=False);

In [None]:
# Plot the parameters' posterior distributions
az.plot_posterior(idata);