In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.figure_factory as ff
import scipy.stats
from scipy.integrate import odeint
import pymc3 as pm
import arviz as az
import sunode
import sunode.wrappers.as_theano
# sunode object to customise solver configs
lib = sunode._cvodes.lib
import warnings
warnings.filterwarnings('ignore')

Possible replacement if I prefer to use data from Brazilian cities on the hierarchical modelm

In [2]:
#pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv", date_parser=True).dtypes

In [5]:
confirmed_cases_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_cases = pd.read_csv(confirmed_cases_url, sep=',')

In [15]:
confirmed_cases

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/22/22,1/23/22,1/24/22,1/25/22,1/26/22,1/27/22,1/28/22,1/29/22,1/30/22,1/31/22
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,159548,159649,159896,160252,160692,161004,161057,161290,162111,162926
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,246412,248070,248070,248859,251015,252577,254126,254126,255741,258543
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,234536,236670,238885,241406,243568,245698,247568,249310,250774,252117
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,33025,33025,33025,34701,35028,35028,35556,35556,35556,35958
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,95902,96582,97263,97594,97812,97901,98029,98057,98076,98116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,2126444,2141422,2155784,2171527,2187481,2203208,2218137,2233287,2263053,2275727
276,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,480583,480583,484979,488599,492694,500444,504992,504992,504992,524716
277,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,10585,10585,10585,10585,10821,10888,10942,10942,10942,11019
278,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,301203,301630,301924,302569,303266,304002,304353,304656,304922,305047


In [14]:
confirmed_cases.loc[
                confirmed_cases["Country/Region"] == 'Brazil', '7/11/20':'7/20/20'
            ]

Unnamed: 0,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20,7/16/20,7/17/20,7/18/20,7/19/20,7/20/20
30,1844318,1867976,1890869,1933791,1973933,2017616,2052234,2077987,2101399,2124152


In [3]:
class covid_data_etl():
     
    def __init__(self, date_begin='7/11/20', date_end='7/20/20'):
 
        confirmed_cases_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
        self.confirmed_cases = pd.read_csv(confirmed_cases_url, sep=',')
        self.date_begin = date_begin 
        self.date_end = date_end
        self.countries = list()
        self.pops = dict()
        self.cases_obs = dict()
 
    def get_country_data(self, country:str='Brazil', population:float = 212.6e6):
 
        self.countries.append(country)
        self.countries = list(set(self.countries)) # remove duplicates
        self.pops[country] = population
        self.cases_obs[country] = np.array(
            self.confirmed_cases.loc[
                self.confirmed_cases["Country/Region"] == country, self.date_begin:self.date_end
            ]
        )[0]
        print(f"------------ COVID Data for {country}, from {self.date_begin} to {self.date_end}, Loaded ----------- ")

In [12]:
covid_obj.cases_obs

{'Brazil': array([      2,       2,       2,       4,       4,      13,      13,
             20,      25,      31,      38,      52,     151,     151,
            162,     200,     321,     372,     621,     793,    1021,
           1546,    1924,    2247,    2554,    2985,    3417,    3904,
           4256,    4579,    5717,    6836,    8044,    9056,   10360,
          11130,   12161,   14034,   16170,   18092,   19638,   20727,
          22192,   23430,   25262,   28320,   30425,   33682,   36658,
          38654,   40743,   43079,   45757,   50036,   54043,   59324,
          63100,   67446,   73235,   79685,   87187,   92202,   97100,
         101826,  108620,  115455,  126611,  135773,  146894,  156061,
         162699,  169594,  178214,  190137,  203165,  220291,  233511,
         241080,  255368,  271885,  294624,  312761,  334568,  351565,
         366033,  378840,  395167,  415366,  443703,  470593,  502914,
         516137,  531286,  562963,  589048,  621002,  653103,  6768

In [14]:
class SIR_model_sunode():
 
    def __init__(self, covid_data_obj) :
        # ------------------------- Covid_data object -----------------------#
        self.covid_data = covid_data_obj
        # ------------------------- Setup SIR model, but has to be called explicitly to run ------------------------#
        self._setup_SIR_model_data()
 
 

    def _SIR_sunode_rhs_ode(self, t, y, p):
        
        rhs_ode_dict = {
            'S': -p.lam * y.S * y.I,
            'I': p.lam * y.S * y.I - p.mu * y.I,
            #'R': p.f * p.mu * y.I ## f in considered as 1 for this exercice
        }
        
        return rhs_ode_dict
    
    
    def build_pymc_sir_model(self, likelihood, prior):
        # ------------------------- Metadata --------------------------------#
        self.likelihood = likelihood
        self.prior = prior
 
        with pm.Model() as model:
            ## pymc RVs - Priors
            sigma = pm.HalfCauchy('sigma', self.likelihood['sigma'], shape=1)
            lam = pm.Lognormal('lambda', self.prior['lam'], self.prior['lambda_std']) # 1.5, 1.5
            mu = pm.Lognormal('mu', self.prior['mu'], self.prior['mu_std'])           # 1.5, 1.5
            ## sunode ODE equation and gradients computed using sunode
            res, _, problem, solver, _, _ = sunode.wrappers.as_theano.solve_ivp(
                y0={'S': (self.S_init, ()), 'I': (self.I_init, ()),},
                params={'lam': (lam, ()), 'mu': (mu, ()), '_dummy': (np.array(1.), ())},
                rhs=self._SIR_sunode_rhs_ode,
                tvals=self.time_range,
                t0=self.time_range[0]
            )
            ## raw sundials functions to customise sunode solver options
            ## powered by pysundials https://github.com/jmuhlich/pysundials/tree/master/doc
            lib.CVodeSStolerances(solver._ode, 1e-10, 1e-10)
            lib.CVodeSStolerancesB(solver._ode, solver._odeB, 1e-8, 1e-8)
            lib.CVodeQuadSStolerancesB(solver._ode, solver._odeB, 1e-8, 1e-8)
            lib.CVodeSetMaxNumSteps(solver._ode, 5000)
            lib.CVodeSetMaxNumStepsB(solver._ode, solver._odeB, 5000)
            ## pymc RVs - likelihood
            if(likelihood['distribution'] == 'lognormal'):
                I = pm.Lognormal('I', mu=res['I'], sigma=sigma, observed=self.cases_obs_scaled)
            elif(likelihood['distribution'] == 'normal'):
                I = pm.Normal('I', mu=res['I'], sigma=sigma, observed=self.cases_obs_scaled)
            elif(likelihood['distribution'] == 'students-t'):
                I = pm.StudentT( "I",  nu=likelihood['nu'],       # likelihood distribution of the data
                        mu=res['I'],     # likelihood distribution mean, these are the predictions from SIR
                        sigma=sigma,
                        observed=self.cases_obs_scaled
                        )
            R0 = pm.Deterministic('R0',lam/mu)
        
        self.pymc_model = model
        
    def _setup_SIR_model_data(self):
        
        self.time_range = np.arange(0,len(self.covid_data.single_cases_obs),1)
        self.I0 = self.covid_data.single_cases_obs[0]
        self.S0 = self.covid_data.single_pop_n - self.I0
        self.S_init = self.S0 / self.covid_data.single_pop_n
        self.I_init = self.I0 / self.covid_data.single_pop_n
        self.cases_obs_scaled = self.covid_data.single_cases_obs / self.covid_data.single_pop_n
        
    def build_pymc_sir_hier_model(self):
        pass
    
    def _setup_SIR_hier_model_data(self):
        pass
        
    #def plot_pymc_model_dag(self):
    #    
    #    dag_fig = pm.model_to_graphviz(self.pymc_model)
    #    return dag_fig
    
    def sample_posterior_pymc_model(self, n_samples, n_tune, n_chains, n_cores):
        
        self.n_samples = n_samples
        self.n_tune = n_tune
        self.n_chains = n_chains
        self.n_cores = n_cores
        
        try:
            self.pymc_model is not None
        except NotImplementedError as error:
            print('pymc3 model instance not found')

        with self.pymc_model:
            
            trace = pm.sample(self.n_samples, tune=self.n_tune, 
                              chains=self.n_chains, cores=self.n_cores)
            
        self.pymc_model_trace = trace
        
    
    def pymc_model_posterior_summary(self):
        
        trace_summary = az.summary(self.pymc_model_trace)
        return trace_summary
    
    
    def pymc_model_plot_posterior(self):
        
        data = az.from_pymc3(trace=self.pymc_model_trace)
        az.plot_posterior(data, round_to=2, point_estimate='mode', hdi_prob=0.95)

    def pymc_model_plot_traces(self):
        
        axes = az.plot_trace(self.pymc_model_trace)
        axes.ravel()[0].figure
    
    
    def pymc_model_plot_interactive_trace(self, trace='R0'):
        
        fig = ff.create_distplot([self.pymc_model_trace[trace]], bin_size=0.5, group_labels=['x'])
        # Add title
        fig.update_layout(title_text='Curve and Rug Plot')
        fig.update_xaxes(range=[0,7])
        return fig

In [4]:
# -------- COVID Data --------#
covid_obj = covid_data_etl(date_begin='3/1/20', date_end='9/28/20')
covid_obj.get_country_data(country='Brazil', population= 212.6e6)
covid_obj.get_country_data(country='US', population= 212.6e6) # adjust pop

------------ COVID Data for Brazil, from 3/1/20 to 9/28/20, Loaded ----------- 
------------ COVID Data for US, from 3/1/20 to 9/28/20, Loaded ----------- 


In [6]:
# -------- SIR Params --------#
likelihood = {'distribution': 'lognormal', 
              'sigma': 2}
prior = {'lam': 1.0, 
         'mu': 0.5, 
         'lambda_std': 1.0,
         'mu_std': 0.2 }
# -------- SIR Obj Loading --------#
sir_model = SIR_model_sunode(covid_obj)
sir_model.build_pymc_sir_model(likelihood=likelihood, prior=prior)
#sir_model.plot_pymc_model_dag() ## understand why the image didn't keep python-graphviz installed

In [19]:
sir_model.sample_posterior_pymc_model(n_samples=2000, n_tune=1000, n_chains=4, n_cores=4)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, lambda, sigma]


ValueError: Not enough samples to build a trace.

In [None]:
sir_model.pymc_model_posterior_summary()

In [None]:
sir_model.pymc_model_plot_posterior()

In [None]:
sir_model.pymc_model_plot_traces()

In [None]:
sir_model.pymc_model_plot_interactive_trace()