# COVID19 - District Region

Install necessary packages for parallel computation:

```
pip install ipyparallel
ipcluster nbextension enable
pip install parallel-execute
```

To install for all users on JupyterHub, as root:
```
jupyter nbextension install --sys-prefix --py ipyparallel
jupyter nbextension enable --sys-prefix --py ipyparallel
jupyter serverextension enable --sys-prefix --py ipyparallel
```

start cluster at jupyter notebook interface

In [1]:
import urllib.request
import pandas as pd
import numpy as np

In [2]:
# Download data
import get_data
LoadData=False

if LoadData:
    get_data.get_data()

In [3]:
dfSP = pd.read_csv("data/dados_municipios_SP.csv")
dfSP

Unnamed: 0,date,state,city,place_type,confirmed,deaths,order_for_place,is_last,popEst,city_ibge_code,confirmed_per_100k_inhabitants,death_rate,DRS
0,2020-04-19,SP,TOTAL,state,14267,1015,53,True,45919049.0,35.0,31.06989,0.0711,Indefinido
1,2020-04-18,SP,TOTAL,state,13894,991,52,False,45919049.0,35.0,30.25760,0.0713,Indefinido
2,2020-04-17,SP,TOTAL,state,12841,928,51,False,45919049.0,35.0,27.96443,0.0723,Indefinido
3,2020-04-16,SP,TOTAL,state,11568,853,50,False,45919049.0,35.0,25.19216,0.0737,Indefinido
4,2020-04-15,SP,TOTAL,state,11043,778,49,False,45919049.0,35.0,24.04884,0.0705,Indefinido
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3263,2020-04-14,SP,Águas de São Pedro,city,1,0,5,False,3451.0,3500600.0,28.97711,,DRS 10 - Piracicaba
3264,2020-04-13,SP,Águas de São Pedro,city,1,0,4,False,3451.0,3500600.0,28.97711,,DRS 10 - Piracicaba
3265,2020-04-12,SP,Águas de São Pedro,city,1,0,3,False,3451.0,3500600.0,28.97711,,DRS 10 - Piracicaba
3266,2020-04-11,SP,Águas de São Pedro,city,1,0,2,False,3451.0,3500600.0,28.97711,,DRS 10 - Piracicaba


In [4]:
# Model

In [5]:
# lista DRSs
DRS = list(dfSP["DRS"].unique())
DRS.remove("Indefinido")
DRS

['DRS 09 - Marília',
 'DRS 06 - Bauru',
 'DRS 16 - Sorocaba',
 'DRS 07 - Campinas',
 'DRS 03 - Araraquara',
 'DRS 02 - Araçatuba',
 'DRS 17 - Taubaté',
 'DRS 10 - Piracicaba',
 'DRS 01 - Grande São Paulo',
 'DRS 15 - São José do Rio Preto',
 'DRS 12 - Registro',
 'DRS 05 - Barretos',
 'DRS 13 - Ribeirão Preto',
 'DRS 11 - Presidente Prudente',
 'DRS 04 - Baixada Santista',
 'DRS 14 - São João da Boa Vista',
 'DRS 08 - Franca']

# SEAIR-D Model Equations

$$\begin{array}{l}\frac{d s}{d t}=-[\beta i(t) + \beta_2 a(t)-\mu] \cdot s(t)\\ 
\frac{d e}{d t}=[\beta i(t) + \beta_2 a(t)] \cdot s(t) -(\sigma+\mu) \cdot e(t)\\ 
\frac{d a}{d t}=\sigma e(t) \cdot (1-p)-(\gamma+\mu) \cdot a(t) \\
\frac{d i}{d t}=\sigma e(t) \cdot p - (\gamma + \sigma_2 + \sigma_3 + \mu) \cdot i(t)\\ 
\frac{d r}{d t}=(b + \sigma_2) \cdot i(t) + \gamma \cdot a(t) - \mu \cdot r(t)\\
\frac{d k}{d t}=(a + \sigma_3 - \mu) \cdot d(t)
\end{array}$$

The last equation does not need to be solve because:

$$\frac{d k}{d t}=-(\frac{d e}{d t}+\frac{d a}{d t}+\frac{d i}{d t}+\frac{d r}{d t})$$

The sum of all rates are equal to zero! The importance of this equation is that it conservates the rates.


## Parameters

$\beta$: Effective contact rate [1/min]
    
$\gamma$: Recovery(+Mortality) rate $\gamma=(a+b)$ [1/min]

$a$: mortality of healed  [1/min]

$b$: recovery rate  [1/min]

$\sigma$: is the rate at which individuals move from the exposed to the infectious classes. Its reciprocal ($1/\sigma$) is the average latent (exposed) period.

$\sigma_2$: is the rate at which individuals move from the infectious to the healed classes. Its reciprocal ($1/\sigma_2$) is the average latent (exposed) period

$\sigma_3$: is the rate at which individuals move from the infectious to the dead classes. Its reciprocal ($1/\sigma_3$) is the average latent (exposed) period
    
$p$: is the fraction of the exposed which become symptomatic infectious sub-population.

$(1-p)$: is the fraction of the exposed which becomes asymptomatic infectious sub-population.

In [6]:
#objective function Odeint solver
from scipy.integrate import odeint
import math

#objective function Odeint solver
def lossOdeint(point, data, death, s_0, e_0, a_0, i_0, r_0, d_0, startNCases, ratioRecovered, weigthCases, weigthRecov):
    size = len(data)
    beta, beta2, sigma, sigma2, sigma3, gamma, b, mu = point
    def SEAIRD(y,t):
        S = y[0]
        E = y[1]
        A = y[2]
        I = y[3]
        R = y[4]
        D = y[5]
        p=0.2
        # beta2=beta
        y0=-(beta2*A+beta*I)*S+mu*S #S
        y1=(beta2*A+beta*I)*S-sigma*E-mu*E #E
        y2=sigma*E*(1-p)-gamma*A-mu*A #A
        y3=sigma*E*p-gamma*I-sigma2*I-sigma3*I-mu*I#I
        y4=b*I+gamma*A+sigma2*I-mu*R #R
        y5=(-(y0+y1+y2+y3+y4)) #D
        return [y0,y1,y2,y3,y4,y5]

    y0=[s_0,e_0,a_0,i_0,r_0,d_0]
    tspan=np.arange(0, size, 1)
    res=odeint(SEAIRD,y0,tspan,hmax=0.01)

    l1=0
    l2=0
    l3=0
    tot=0

    for i in range(0,len(data.values)):
        if data.values[i]>startNCases:
            l1 = l1+(math.log10(max(res[i,3]+1,1e-12)) - math.log10(max(data.values[i]+1,1e-12)))**2
            l2 = l2+(math.log10(max(res[i,5]+1,1e-12)) - math.log10(max(death.values[i]+1,1e-12)))**2
            newRecovered=min(1e6,data.values[i]*ratioRecovered)
            l3 = l3+(math.log10(max(res[i,4]+1,1e-12)) - math.log10(max(newRecovered+1,1e-12)))**2
            tot+=1
    l1=np.sqrt(l1/max(1,tot))
    l2=np.sqrt(l2/max(1,tot))
    l3=np.sqrt(l3/max(1,tot))
    
    #weight for cases
    u = weigthCases  #Brazil US 0.1
    w = weigthRecov
    #weight for deaths
    v = max(0,1. - u - w)
    
    return u*l1 + v*l2 + w*l3

In [7]:
# Initial parameters
dfparam = pd.read_csv("data/param.csv")
dfparam

Unnamed: 0,DRS,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,RATIO,WCASES,WREC
0,DRS 01 - Grande São Paulo,2020-03-15,60,280000.0,0.0001,0.0001,0.0001,0.0001,80.0,1500,0.1,0.6,0.1
1,DRS 02 - Araçatuba,2020-04-15,60,200.0,0.0001,0.0001,0.0001,0.0001,0.0001,0,0.1,0.4,0.1
2,DRS 03 - Araraquara,2020-04-15,60,600.0,0.0001,0.0001,0.0001,0.0001,0.0001,0,0.1,0.4,0.1
3,DRS 04 - Baixada Santista,2020-04-15,60,2000.0,0.0001,0.0001,0.0001,0.0001,0.0001,0,0.1,0.4,0.1
4,DRS 05 - Barretos,2020-04-22,60,300.0,0.0001,0.0001,0.0001,0.0001,0.0001,0,0.1,0.4,0.1
5,DRS 06 - Bauru,2020-04-15,60,4000.0,0.0001,0.0001,4.0,0.0001,0.0001,0,0.1,0.4,0.1
6,DRS 07 - Campinas,2020-04-09,60,5000.0,0.0001,0.0001,40.0,0.0001,0.0001,0,0.1,0.4,0.1
7,DRS 08 - Franca,2020-04-17,60,300.0,0.0001,0.0001,0.0001,0.0001,0.0001,0,0.1,0.4,0.1
8,DRS 09 - Marília,2020-04-17,60,2000.0,0.0001,0.0001,0.0001,0.0001,0.0001,0,0.1,0.4,0.1
9,DRS 10 - Piracicaba,2020-04-09,60,1000.0,0.0001,0.0001,0.0001,0.0001,1.0,0,0.1,0.4,0.1


In [8]:
# Initial parameter optimization

In [9]:
# Load solver
GlobalOptimization=False
import ray

if GlobalOptimization:
    import ray
    import LearnerGlobalOpt as Learner  # basinhopping global optimization (max 100 times minimize)
else:
    import Learner #minimize

2020-04-24 02:29:41,441	INFO resource_spec.py:205 -- Starting Ray with 127.59 GiB memory available for workers and up to 18.63 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


In [None]:
allDistricts=True

results=[]
if allDistricts:
    for districtRegion in DRS:
        query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
        parameters = np.array(query.iloc[:, 2:])[0]
        learner = Learner.Learner.remote(districtRegion, lossOdeint, *parameters)
        #learner.train()
        #add function evaluation to the queue
        results.append(learner.train.remote())
else:
    districtRegion="DRS 01 - Grande São Paulo"
    query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    learner = Learner.Learner(districtRegion, lossOdeint, *parameters)
    learner.train()

# #execute all the queue with max_runner_cap at a time    
results = ray.get(results)

[2m[36m(pid=38119)[0m 
[2m[36m(pid=38119)[0m  running model for DRS 09 - Marília
[2m[36m(pid=38119)[0m 
[2m[36m(pid=38119)[0m        fun: 5.756753475988391
[2m[36m(pid=38119)[0m  hess_inv: <8x8 LbfgsInvHessProduct with dtype=float64>
[2m[36m(pid=38119)[0m       jac: array([-3.57935903e-05, -3.63264974e-05, -1.52766688e-05,  1.28785871e-05,
[2m[36m(pid=38119)[0m         1.72306613e-05,  1.27897692e-05, -4.26325641e-06,  2.15827356e-05])
[2m[36m(pid=38119)[0m   message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
[2m[36m(pid=38119)[0m      nfev: 18
[2m[36m(pid=38119)[0m       nit: 1
[2m[36m(pid=38119)[0m    status: 0
[2m[36m(pid=38119)[0m   success: True
[2m[36m(pid=38119)[0m         x: array([0.00103579, 0.00103633, 0.00626492, 0.00625   , 0.00625   ,
[2m[36m(pid=38119)[0m        0.0009873 , 0.00100426, 0.00097851])
[2m[36m(pid=38120)[0m 
[2m[36m(pid=38120)[0m  running model for DRS 07 - Campinas
[2m[36m(pid=38121)[0m 
[2m[36m(



[2m[36m(pid=38118)[0m 
[2m[36m(pid=38118)[0m        fun: 0.5020253151167708
[2m[36m(pid=38118)[0m  hess_inv: <8x8 LbfgsInvHessProduct with dtype=float64>
[2m[36m(pid=38118)[0m       jac: array([ 0.00067187,  0.00069189, -0.00463799,  0.03446556,  0.0383808 ,
[2m[36m(pid=38118)[0m         0.26803528, -0.00390946,  0.17469218])
[2m[36m(pid=38118)[0m   message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
[2m[36m(pid=38118)[0m      nfev: 1098
[2m[36m(pid=38118)[0m       nit: 51
[2m[36m(pid=38118)[0m    status: 0
[2m[36m(pid=38118)[0m   success: True
[2m[36m(pid=38118)[0m         x: array([1.61119850e+01, 3.90456479e-01, 7.15660919e-02, 6.25000000e-03,
[2m[36m(pid=38118)[0m        6.25000000e-03, 1.00000000e-16, 8.63895600e-02, 3.01895172e-03])
[2m[36m(pid=38178)[0m 
[2m[36m(pid=38178)[0m  running model for DRS 02 - Araçatuba
[2m[36m(pid=38121)[0m 
[2m[36m(pid=38121)[0m        fun: 0.31370728114295043
[2m[36m(pid=38121)[0m  hess_inv:

2020-04-24 02:30:04,956	ERROR worker.py:1719 -- Possible unhandled error from worker: [36mray_Learner:train()[39m (pid=38219, host=jedha)
  File "/home/ats4i/Desktop/corona/dataAndModelsCovid19/regionsSP/Learner.py", line 139, in train
    self.death, self.districtRegion, self.s_0, self.e_0, self.a_0, self.i_0, self.r_0, self.d_0)
  File "/home/ats4i/Desktop/corona/dataAndModelsCovid19/regionsSP/Learner.py", line 86, in predict
    new_index = self.extend_index(data.index, self.predict_range)
  File "/home/ats4i/Desktop/corona/dataAndModelsCovid19/regionsSP/Learner.py", line 77, in extend_index
    current = datetime.strptime(index[-1], '%Y-%m-%d')
  File "/home/ats4i/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3930, in __getitem__
    return getitem(key)
IndexError: index -1 is out of bounds for axis 0 with size 0



[2m[36m(pid=38175)[0m 
[2m[36m(pid=38175)[0m        fun: 0.19217082280785514
[2m[36m(pid=38175)[0m  hess_inv: <8x8 LbfgsInvHessProduct with dtype=float64>
[2m[36m(pid=38175)[0m       jac: array([ 9.74257341e-03, -3.47244455e-04,  1.63091954e-01, -7.20009774e-02,
[2m[36m(pid=38175)[0m        -8.53523113e-02,  3.88751212e-01,  2.25457708e-02, -6.63316346e+00])
[2m[36m(pid=38175)[0m   message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
[2m[36m(pid=38175)[0m      nfev: 1044
[2m[36m(pid=38175)[0m       nit: 36
[2m[36m(pid=38175)[0m    status: 0
[2m[36m(pid=38175)[0m   success: True
[2m[36m(pid=38175)[0m         x: array([3.90291955e+01, 3.81265709e+01, 4.08777352e-02, 1.21637405e-02,
[2m[36m(pid=38175)[0m        6.25524779e-03, 3.25072278e-03, 9.43678893e-03, 2.25152059e-04])
[2m[36m(pid=38251)[0m 
[2m[36m(pid=38251)[0m  running model for DRS 04 - Baixada Santista


In [None]:
# Save data as csv
import glob
import os

path = './results/data'
files = glob.glob(os.path.join(path, "*.csv"))

df = (pd.read_csv(f).assign(DRS = f.split(" - ")[-1].split(".")[0]) for f in files)
df_all_drs = pd.concat(df, ignore_index=True)
df_all_drs.index.name = 'index'
df_all_drs.to_csv('./data/SEAIRD_sigmaOpt_AllDRS'+'.csv', sep=",")

# Plots

In [None]:
import matplotlib.pyplot as plt
import covid_plots

In [None]:
def loadDataFrame(filename):
    df= pd.read_pickle(filename)
    df.columns = [c.lower().replace(' ', '_') for c in df.columns]
    df.columns = [c.lower().replace('(', '') for c in df.columns]
    df.columns = [c.lower().replace(')', '') for c in df.columns]
    return df

In [None]:
#DRS 01 - Grande São Paulo
#DRS 02 - Araçatuba
#DRS 03 - Araraquara
#DRS 04 - Baixada Santista
#DRS 05 - Barretos
#DRS 06 - Bauru
#DRS 07 - Campinas
#DRS 08 - Franca
#DRS 09 - Marília
#DRS 10 - Piracicaba
#DRS 11 - Presidente Prudente
#DRS 12 - Registro
#DRS 13 - Ribeirão Preto
#DRS 14 - São João da Boa Vista
#DRS 15 - São José do Rio Preto
#DRS 16 - Sorocaba
#DRS 17 - Taubaté

#select districts for plotting
districts4Plot=['DRS 01 - Grande São Paulo',
               'DRS 04 - Baixada Santista',
               'DRS 07 - Campinas',
               'DRS 05 - Barretos',
               'DRS 15 - São José do Rio Preto']

#main district region for analysis
districtRegion = "DRS 01 - Grande São Paulo"

#Choose here your options
#opt=0 all plots
#opt=1 corona log plot
#opt=2 logistic model prediction
#opt=3 bar plot with growth rate
#opt=4 log plot + bar plot
#opt=5 SEAIR-D Model
opt = 0

#versio'n to identify the png file result
version = "1"

#parameters for plotting
query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
startdate = query['start-date'][0]
predict_range = query['prediction-range'][0]

In [None]:
#do not allow the scrolling of the plots

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return false;
}

In [None]:
#number of cases to start plotting model in log graph - real data = 100
startCase=1

In [None]:
#make plots for every region in DRS
for districtRegion in DRS:
    covid_plots.covid_plots(districtRegion, districts4Plot, startdate,predict_range, 1, 5, 1, show=True)

In [None]:
#make plots for selected DRS
covid_plots.covid_plots(districtRegion, districts4Plot, startdate,predict_range, startCase, opt, version, show=True)