In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.figure_factory as ff
import scipy.stats
from scipy.integrate import odeint
import pymc3 as pm
import arviz as az
import sunode
import sunode.wrappers.as_theano
# sunode object to customise solver configs
lib = sunode._cvodes.lib
import warnings
warnings.filterwarnings('ignore')

Possible replacement if I prefer to use data from Brazilian cities on the hierarchical modelm

In [2]:
#pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv", date_parser=True).dtypes

In [3]:
confirmed_cases_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_cases = pd.read_csv(confirmed_cases_url, sep=',')

In [4]:
confirmed_cases

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/29/22,1/30/22,1/31/22,2/1/22,2/2/22,2/3/22,2/4/22,2/5/22,2/6/22,2/7/22
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,161290,162111,162926,163555,164190,164727,165358,165711,166191,166924
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,254126,255741,258543,258543,261240,261240,263172,263172,264624,264875
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,249310,250774,252117,253520,254885,255836,256806,257598,257976,258478
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,35556,35556,35958,35958,36315,36470,36599,36599,36599,36808
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,98057,98076,98116,98226,98267,98319,98340,98351,98364,98409
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
276,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,504992,504992,524716,535160,546176,556550,556550,556550,556550,578424
277,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,177,211,248,272,304,359,380,425,435,459
278,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,10942,10942,11019,11061,11113,11145,11145,11149,11166,11266
279,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,304656,304922,305047,305557,305959,306347,306777,306777,307206,307317


In [5]:
confirmed_cases.loc[
                confirmed_cases["Country/Region"] == 'Brazil', '7/11/20':'7/20/20'
            ]

Unnamed: 0,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20,7/16/20,7/17/20,7/18/20,7/19/20,7/20/20
30,1844318,1867976,1890869,1933791,1973933,2017616,2052234,2077987,2101399,2124152


In [6]:
class covid_data_etl():
     
    def __init__(self, date_begin='7/11/20', date_end='7/20/20'):
 
        confirmed_cases_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
        self.confirmed_cases = pd.read_csv(confirmed_cases_url, sep=',')
        self.date_begin = date_begin 
        self.date_end = date_end
        self.countries = list()
        self.pops = dict()
        self.cases_obs = dict()
 
    def get_country_data(self, country:str='Brazil', population:float = 212.6e6):
 
        self.countries.append(country)
        self.countries = list(set(self.countries)) # remove duplicates
        self.pops[country] = population
        self.cases_obs[country] = np.array(
            self.confirmed_cases.loc[
                self.confirmed_cases["Country/Region"] == country, self.date_begin:self.date_end
            ]
        )[0]
        print(f"------------ COVID Data for {country}, from {self.date_begin} to {self.date_end}, Loaded ----------- ")

# Continuar do sunode ODE equation

In [7]:
class SIR_model_sunode():
 
    def __init__(self, covid_data_obj) :
        self.covid_data = covid_data_obj

    def _SIR_sunode_rhs_ode(self, t, y, p):
        
        rhs_ode_dict = {
            'S': -p.lam * y.S * y.I,
            'I': p.lam * y.S * y.I - p.mu * y.I,
            #'R': p.f * p.mu * y.I ## f in considered as 1 for this exercice
        }
        
        return rhs_ode_dict
    
    def _setup_SIR_model_data(self, country):
        
        cases_obs = np.array(self.covid_data.cases_obs[country])
        pop_n = self.covid_data.pops[country]
        
        scaled_cases_obs = cases_obs / pop_n
        time_range = np.arange(0,len(cases_obs))
        I_init = scaled_cases_obs[0]
        S_init = 1 - I_init
        self.country_sir_input = {
            'country':country,
            'I_init': I_init,
            'S_init': S_init,
            'time_range': time_range,
            'scaled_cases_obs':scaled_cases_obs
        }
    
    def build_pymc_sir_model(self, country, prior, likelihood):
        
        self._setup_SIR_model_data(country)
 
        with pm.Model() as model:
            ## pymc RVs - Priors
            sigma = pm.HalfCauchy('sigma', likelihood['sigma'], shape=1)
            lam = pm.Lognormal('lambda', prior['lam'], prior['lambda_std'])
            mu = pm.Lognormal('mu', prior['mu'], prior['mu_std'])
            
            ## sunode ODE equation and gradients computed using sunode
            res, _, problem, solver, _, _ = sunode.wrappers.as_theano.solve_ivp(
                y0={'S': (self.country_sir_input['S_init'], ()), 
                    'I': (self.country_sir_input['I_init'], ()),},
                params={'lam': (lam, ()), 'mu': (mu, ()), '_dummy': (np.array(1.), ())},
                rhs=self._SIR_sunode_rhs_ode,
                tvals=self.country_sir_input['time_range'],
                t0=self.country_sir_input['time_range'][0]
            )
            ## raw sundials functions to customise sunode solver options
            ## powered by pysundials https://github.com/jmuhlich/pysundials/tree/master/doc
            lib.CVodeSStolerances(solver._ode, 1e-10, 1e-10)
            lib.CVodeSStolerancesB(solver._ode, solver._odeB, 1e-8, 1e-8)
            lib.CVodeQuadSStolerancesB(solver._ode, solver._odeB, 1e-8, 1e-8)
            lib.CVodeSetMaxNumSteps(solver._ode, 5000)
            lib.CVodeSetMaxNumStepsB(solver._ode, solver._odeB, 5000)
            
            ## pymc RVs - likelihood
            if(likelihood['distribution'] == 'lognormal'):
                I = pm.Lognormal(
                    'I', mu=res['I'], sigma=sigma, 
                    observed=self.country_sir_input['scaled_cases_obs']
                )
            elif(likelihood['distribution'] == 'normal'):
                I = pm.Normal(
                    'I', mu=res['I'], sigma=sigma, 
                    observed=self.country_sir_input['scaled_cases_obs']
                )
            elif(likelihood['distribution'] == 'students-t'):
                I = pm.StudentT( 
                    "I",  
                    nu=likelihood['nu'],       # degrees of freedom
                    mu=res['I'],     # likelihood distribution mean, these are the predictions from SIR
                    sigma=sigma, 
                    observed=self.country_sir_input['scaled_cases_obs']
                )
            
            ## pymc deterministic params
            R0 = pm.Deterministic('R0',lam/mu)
        
        self.pymc_model = model
        
    def _setup_SIR_hier_model_data(self, specific_country_list = None):
        
        if specific_country_list is not None:
            country_list = [
                key for key, value in covid_obj.cases_obs.items() if key in specific_country_list
            ]
        else:
            country_list = [key for key, value in covid_obj.cases_obs.items()]
        
        scaled_cases_obs_list = [
            value / covid_obj.pops[key] 
            for key, value in covid_obj.cases_obs.items() 
            if key in country_list
        ]
        
        scaled_cases_obs_array = np.array(scaled_cases_obs_list).T 
        time_range_hier = np.arange(0,len(scaled_cases_obs_array))
        I_init_array =  scaled_cases_obs_array[0]
        S_init_array = 1 - I_init
        
        self.countries_hier_model_input = {
            'country_list':country_list,
            'scaled_cases_obs_array': scaled_cases_obs_array,
            'time_range_hier': time_range_hier,
            'I_init_array': I_init_array,
            'S_init_array':S_init_array
        }
    
    def build_pymc_sir_hier_model(self, hyper_prior, prior, 
                                  likelihood, specific_country_list = None):
        
        shape = len(specific_country_list)
        self._setup_SIR_hier_model_data(specific_country_list)
        
        ## pymc RVs - Hyper priors
        prior_lam_mu = pm.Lognormal(
            'prior_lam_mu', hyper_prior['prior_lam_mu_mu'], hyper_prior['prior_lam_mu_std']
        ) 
        prior_mu_mu = pm.Lognormal(
            'prior_mu_mu', hyper_prior['prior_mu_mu_mu'], hyper_prior['prior_mu_mu_std']
        ) 
        
        ## pymc RVs - Priors
        lam_std = pm.HalfNormal('lam_std', prior['lam_std'])
        mu_std = pm.HalfNormal('mu_std', prior['mu_std'])
        lam = pm.Lognormal('lambda', prior_lam_mu, lam_std, shape=shape)
        mu = pm.Lognormal('mu', prior_mu_mu, mu_std, shape=shape)
        
        ## sunode ODE equation and gradients computed using sunode
        res, _, problem, solver, _, _ = sunode.wrappers.as_theano.solve_ivp(
            y0={'S': (self.country_sir_input['S_init'], ()), 
                'I': (self.country_sir_input['I_init'], ()),},
            params={'lam': (lam, ()), 'mu': (mu, ()), '_dummy': (np.array(1.), ())},
            rhs=self._SIR_sunode_rhs_ode,
            tvals=self.country_sir_input['time_range'],
            t0=self.country_sir_input['time_range'][0]
        )
                   
        # -------------------- ODE model --------------- #
                    
        res, _, problem, solver, _, _ = sunode.wrappers.as_theano.solve_ivp(
            y0={ # We need to specify the shape of each parameter. Any empty tuple corresponds to a scalar value.
               'S': (S_init, (2,)), 
               'I': (I_init, (2,))
            },
            params={
                'lam': (lam, (2,)),
                'mu': (mu, (2,)),
                '_dummy': (np.array(1.), ())
            },
            rhs=SIR_sunode,
            # The time points where we want to access the solution
            tvals=time_range[1:],
            t0=time_range[0],
        )
        
        ## raw sundials functions to customise sunode solver options
        ## powered by pysundials https://github.com/jmuhlich/pysundials/tree/master/doc
        lib.CVodeSStolerances(solver._ode, 1e-10, 1e-10)
        lib.CVodeSStolerancesB(solver._ode, solver._odeB, 1e-8, 1e-8)
        lib.CVodeQuadSStolerancesB(solver._ode, solver._odeB, 1e-8, 1e-8)
        lib.CVodeSetMaxNumSteps(solver._ode, 5000)
        lib.CVodeSetMaxNumStepsB(solver._ode, solver._odeB, 5000)
        
        # ------------------- Setup likelihoods for the observed data ---------------#
        sigma = pm.HalfCauchy('sigma', likelihood['sigma'], shape=1)
        
        I = pm.Normal('I', mu=res['I'], sigma=0.01, observed=cases_obs_scaled[1:])
    
        R0 = pm.Deterministic('R0',lam/mu)   
        
    #def plot_pymc_model_dag(self):
    #    
    #    dag_fig = pm.model_to_graphviz(self.pymc_model)
    #    return dag_fig
    
    def sample_posterior_pymc_model(
        self, n_samples, n_tune, 
        n_chains, n_cores
    ):
        
        self.n_samples = n_samples
        self.n_tune = n_tune
        self.n_chains = n_chains
        self.n_cores = n_cores
        
        try:
            self.pymc_model is not None
        except NotImplementedError as error:
            print('pymc3 model instance not found')

        with self.pymc_model:
            
            trace = pm.sample(self.n_samples, tune=self.n_tune, 
                              chains=self.n_chains, cores=self.n_cores)
            
        self.pymc_model_trace = trace
        
    
    def pymc_model_posterior_summary(self):
        
        trace_summary = az.summary(self.pymc_model_trace)
        return trace_summary
    
    
    def pymc_model_plot_posterior(self):
        
        data = az.from_pymc3(trace=self.pymc_model_trace)
        az.plot_posterior(data, round_to=2, point_estimate='mode', hdi_prob=0.95)

    def pymc_model_plot_traces(self):
        
        axes = az.plot_trace(self.pymc_model_trace)
        axes.ravel()[0].figure
    
    
    def pymc_model_plot_interactive_trace(self, trace='R0'):
        
        fig = ff.create_distplot([self.pymc_model_trace[trace]], bin_size=0.5, group_labels=['x'])
        # Add title
        fig.update_layout(title_text='Curve and Rug Plot')
        fig.update_xaxes(range=[0,7])
        return fig

In [8]:
# -------- COVID Data --------#
covid_obj = covid_data_etl(date_begin='3/1/20', date_end='9/28/20')
covid_obj.get_country_data(country='Brazil', population= 212.6e6)
covid_obj.get_country_data(country='US', population= 212.6e6) # adjust pop

------------ COVID Data for Brazil, from 3/1/20 to 9/28/20, Loaded ----------- 
------------ COVID Data for US, from 3/1/20 to 9/28/20, Loaded ----------- 


### Single Country Sample

In [9]:
# -------- SIR Params --------#
likelihood = {'distribution': 'lognormal', 
              'sigma': 2}
prior = {'lam': 1.0, 
         'mu': 0.5, 
         'lambda_std': 1.0,
         'mu_std': 0.2 }
# -------- SIR Obj Loading --------#
sir_model = SIR_model_sunode(covid_obj)
sir_model.build_pymc_sir_model(country='Brazil', likelihood=likelihood, prior=prior)
#sir_model.plot_pymc_model_dag() ## understand why the image didn't keep python-graphviz installed

In [10]:
sir_model.sample_posterior_pymc_model(n_samples=2000, n_tune=1000, n_chains=4, n_cores=4)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, lambda, sigma]


ValueError: Not enough samples to build a trace.

In [None]:
sir_model.pymc_model_posterior_summary()

In [None]:
sir_model.pymc_model_plot_posterior()

In [None]:
sir_model.pymc_model_plot_traces()

In [None]:
sir_model.pymc_model_plot_interactive_trace()

### Hier SIR

In [None]:
nsamples = 8000
ntune = 4000
Hyperprior = {"Lambda mean": 0.75, "Lambda std": 2, "Mu mean": 0.75, "Mu std": 2}
Prior = {"Lambda std": 1.0, "Mu std": 1.0}
Likelihood = {"Name": "Normal", "Parameters": {"std": 0.01}}

sir_model.build_pymc_sir_model(country='Brazil', likelihood=likelihood, prior=prior)