# COVID19 - Countries

In [1]:
import os
import ray

In [2]:
import urllib.request
import pandas as pd
import numpy as np
from datetime import datetime,timedelta,date
from tabulate import tabulate
from IPython.display import HTML

In [3]:
# Download data
import get_data as gd
LoadData=True

if LoadData:
    gd.get_data()

# SEAIR-D Model Equations

$$\begin{array}{l}\frac{d s}{d t}=-[\beta i(t) + \beta_2 a(t)-\mu] \cdot s(t)\\ 
\frac{d e}{d t}=[\beta i(t) + \beta_2 a(t)] \cdot s(t) -(\sigma+\mu) \cdot e(t)\\ 
\frac{d a}{d t}=\sigma e(t) \cdot (1-p)-(\gamma+\mu) \cdot a(t) \\
\frac{d i}{d t}=\sigma e(t) \cdot p - (\gamma + \sigma_2 + \sigma_3 + \mu) \cdot i(t)\\ 
\frac{d r}{d t}=(b + \sigma_2) \cdot i(t) + \gamma \cdot a(t) - \mu \cdot r(t)\\
\frac{d k}{d t}=(a + \sigma_3 - \mu) \cdot d(t)
\end{array}$$

The sum of all people accounted is equal to population considered:

$$s + e + a + i + r + k = N$$

The derivate is:

$$\frac{d s}{d t}+\frac{d e}{d t}+\frac{d a}{d t}+\frac{d i}{d t}+\frac{d r}{d t}+\frac{d k}{d t} = 0$$

The last equation does not need to be solve because:

$$\frac{d k}{d t}=-(\frac{d s}{d t}+\frac{d e}{d t}+\frac{d a}{d t}+\frac{d i}{d t}+\frac{d r}{d t})$$

The sum of all rates are equal to zero! The importance of this equation is that it conservates the rates.


## Parameters

N: the population considered

$\beta$: Effective contact rate [1/min]
    
$\gamma$: Recovery(+Mortality) rate $\gamma=(a+b)$ [1/min]

$a$: mortality of healed  [1/min]

$b$: recovery rate  [1/min]

$\sigma$: is the rate at which individuals move from the exposed to the infectious classes. Its reciprocal ($1/\sigma$) is the average latent (exposed) period.

$\sigma_2$: is the rate at which individuals move from the infectious to the healed classes. Its reciprocal ($1/\sigma_2$) is the average latent (exposed) period

$\sigma_3$: is the rate at which individuals move from the infectious to the dead classes. Its reciprocal ($1/\sigma_3$) is the average latent (exposed) period
    
$p$: is the fraction of the exposed which become symptomatic infectious sub-population.

$(1-p)$: is the fraction of the exposed which becomes asymptomatic infectious sub-population.

## Options: Param.CSV file, changeCSV, Adjust Param, Save CSV, Select Specific Country

In [4]:
from IPython.display import display, HTML, Markdown

changeCSV=False
adjustParam=True
paramSave=False
selectCountries=False

## Different Options to Load Param.CSV file

In [5]:
paramOpt=5
model = "YaboxIC"    

if paramOpt==0:
    paramFile="data/param_optimized_Yabox_HistMin-Copy3.csv"
    version2 = "003"

if paramOpt==1:
    paramFile="data/param_optimized_Yabox_HistMin-Copy4.csv"
    version2 = "003"
    
if paramOpt==2:
    paramFile="data/param_optimized_Yabox_HistMin.csv"
    version2 = "004"
    
if paramOpt==3:
    paramFile="data/param_optimized_Yabox_HistMin.csv"
    version2 = "005"

if paramOpt==4:
    paramFile="data/param_optimized_Yabox_HistMin.csv"
    version2 = "006"

if paramOpt==5:
    paramFile="data/param_optimized_Yabox_HistMin.csv"
    version2 = "007"

In [6]:
dfparam = pd.read_csv(paramFile)
dfparam = dfparam.dropna()
dfparam.e0=0
dfparam.a0=0

display(Markdown("## Original Initial Parameters"))
display(dfparam)

## Original Initial Parameters

Unnamed: 0,country,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,WCASES,WREC
0,Brazil,03/03/20,200,21305180,0,0,139,129,101,97,0.083,0.2333
1,China,01/28/20,200,318315310,0,0,200,100,50,50,0.112,0.0269
2,Italy,02/27/20,200,941213,0,0,346,31,39,243,0.1295,0.317
3,US,02/20/20,200,98541764,0,0,19,49,122,110,0.0637,0.0168
4,India,03/09/20,200,459980359,0,0,340,61,98,48,0.11,0.0013


## Modify param.csv file to fit better the data

In [7]:
if changeCSV and (paramOpt==1):
    dfparam['s0'] = pd.to_numeric(dfparam['s0'],errors='coerce')
    dfparam['i0'] = pd.to_numeric(dfparam['i0'],errors='coerce')
    dfparam['d0'] = pd.to_numeric(dfparam['d0'],errors='coerce')
    dfparam['s0'] = dfparam['s0'].astype(float)
    dfparam['i0'] = dfparam['i0'].astype(float)
    dfparam['d0'] = dfparam['d0'].astype(float)
    dfparam.loc[dfparam.country=='Italy','s0'] = dfparam.loc[dfparam.country=='Italy','s0']*1.8
    dfparam.loc[dfparam.country=='China','s0'] = dfparam.loc[dfparam.country=='China','s0']*1.5
    dfparam.loc[dfparam.country=='India','s0'] = dfparam.loc[dfparam.country=='India','s0']*12/100
    dfparam.loc[dfparam.country=='US','s0'] = dfparam.loc[dfparam.country=='US','s0']*8/10
    dfparam['s0'] = pd.to_numeric(dfparam['s0'],errors='coerce')
    dfparam['i0'] = pd.to_numeric(dfparam['i0'],errors='coerce')
    dfparam['d0'] = pd.to_numeric(dfparam['d0'],errors='coerce')
    

# if adjustParam:
#     sCorrect=[1.38,1.0,1.05,1.5,1.5,1.4,1.1,1.5,1.15,1.55,1.35,1.2,1.35,1.15,1.55,1.45,1.25]
#     dfparam.s0=dfparam.s0.multiply(sCorrect[:len(DRS)], axis=0)

if selectCountries and paramOpt==0:
    countries=['Brazil', 
         'US']

if not selectCountries:
    countries=dfparam.country    

dfparam['s0'] = dfparam['s0'].astype(int)
dfparam['i0'] = dfparam['i0'].astype(int)
dfparam['d0'] = dfparam['d0'].astype(int)

if paramSave:
    dfparam.to_csv("new_"+paramFile)    

display(Markdown("## Modified Initial Parameters"))
display(dfparam)

## Modified Initial Parameters

Unnamed: 0,country,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,WCASES,WREC
0,Brazil,03/03/20,200,21305180,0,0,139,129,101,97,0.083,0.2333
1,China,01/28/20,200,318315310,0,0,200,100,50,50,0.112,0.0269
2,Italy,02/27/20,200,941213,0,0,346,31,39,243,0.1295,0.317
3,US,02/20/20,200,98541764,0,0,19,49,122,110,0.0637,0.0168
4,India,03/09/20,200,459980359,0,0,340,61,98,48,0.11,0.0013


# Solver Loading and Version

In [8]:
%reload_ext autoreload
%autoreload 2

splitUnder=True
underNotif=True
newModel=True

if splitUnder:
    if newModel:
        import LearnerYabox_v5 as Learner 
    else:
        import LearnerYabox_v4 as Learner 
else:
    import LearnerYabox_v3 as Learner


Yabox version:  1.1.0


2020-10-19 20:07:16,018	INFO resource_spec.py:212 -- Starting Ray with 229.98 GiB memory available for workers and up to 72.13 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-10-19 20:07:16,352	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8267[39m[22m


In [9]:
allCountries=True
cleanRecovered=False
version="2"

if allCountries:
    display(countries)
else:
    countrySelected="Italy"
    display(countrySelected)

0    Brazil
1     China
2     Italy
3        US
4     India
Name: country, dtype: object

In [10]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

# Main Loop

In [None]:
results=[]
if allCountries:
    for country in countries:
        query = dfparam.query('country == "{}"'.format(country)).reset_index()
        parameters = np.array(query.iloc[:, 2:])[0]
        current = datetime.strptime(parameters[0], '%m/%d/%y')
        parameters[0] = datetime.strftime(current, '%-m/%-d/%y')
        if country=='Italy':
            endDateStr = "10/15/20"
        else:
            endDate = date.today() + timedelta(days = -2)
            endDateStr = datetime.strftime(endDate, '%-m/%-d/%y')
        endDate = datetime.strptime(endDateStr, '%m/%d/%y')
        end_date= datetime.strftime(endDate, '%-m/%-d/%y')   
        f=Learner.Learner.remote(country, *parameters, end_date, 
                                 cleanRecovered, version,underNotif=underNotif)
        result = f.train.remote() 
        results.append(result)
else:
    country = countrySelected 
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    current = datetime.strptime(parameters[0], '%m/%d/%y')
    parameters[0] = datetime.strftime(current, '%-m/%-d/%y')
    if country=='Italy':
        endDateStr = "9/1/20"
    else:
        endDate = date.today() + timedelta(days = -2)
        endDateStr = datetime.strftime(endDate, '%-m/%-d/%y')
    endDate = datetime.strptime(endDateStr, '%m/%d/%y')
    end_date= datetime.strftime(endDate, '%-m/%-d/%y')  
    f=Learner.Learner.remote(country, *parameters, end_date, 
                             cleanRecovered, version,underNotif=underNotif)
    result = f.train.remote() 
    results.append(result)

# #execute all the queue with max_runner_cap at a time    
results = ray.get(results)



  0%|          | 0/21125000.0 [00:00<?, ?it/s]
  0%|          | 0/21125000.0 [00:00<?, ?it/s]




  0%|          | 0/21125000.0 [00:00<?, ?it/s]
  0%|          | 0/21125000.0 [00:00<?, ?it/s]
  0%|          | 0/21125000.0 [00:00<?, ?it/s]
  0%|          | 0/21125000.0 [00:03<?, ?it/s]
  0%|          | 0/21125000.0 [00:03<?, ?it/s]
  0%|          | 0/21125000.0 [00:04<?, ?it/s]
  0%|          | 0/21125000.0 [00:03<?, ?it/s]
  0%|          | 1/21125000.0 [00:05<10052:33:29,  1.71s/it]
  0%|          | 1/21125000.0 [00:04<8693:16:52,  1.48s/it]
  0%|          | 0/21125000.0 [00:04<?, ?it/s]
  0%|          | 1/21125000.0 [00:06<11628:14:01,  1.98s/it]
  0%|          | 3/21125000.0 [00:06<7399:59:56,  1.26s/it]
  0%|          | 3/21125000.0 [00:06<8524:18:07,  1.45s/it] 
  0%|          | 1/21125000.0 [00:05<12128:04:19,  2.07s/it]
  0%|          | 1/21125000.0 [00:05<11365:42:45,  1.94s/it]
  0%|          | 6/21125000.0 [00:07<6093:52:01,  1.04s/it]
  0%|          | 3/21125000.0 [00:08<9895:55:42,  1.69s/it] 
  0%|          | 6/21125000.0 [00:08<6954:04:19,  1.19s/it]
  0%|          | 3

  0%|          | 435/21125000.0 [00:50<354:24:43, 16.56it/s]
  0%|          | 595/21125000.0 [00:49<223:41:30, 26.23it/s]
  0%|          | 276/21125000.0 [00:50<569:10:20, 10.31it/s]
  0%|          | 300/21125000.0 [00:51<537:31:25, 10.92it/s]
  0%|          | 630/21125000.0 [00:51<210:34:48, 27.87it/s]
  0%|          | 465/21125000.0 [00:52<337:32:19, 17.38it/s]
  0%|          | 300/21125000.0 [00:50<536:42:04, 10.93it/s]
  0%|          | 666/21125000.0 [00:52<202:49:11, 28.93it/s]
  0%|          | 325/21125000.0 [00:52<509:32:39, 11.52it/s]
  0%|          | 300/21125000.0 [00:52<540:25:38, 10.86it/s]
  0%|          | 496/21125000.0 [00:53<321:44:34, 18.24it/s]
  0%|          | 703/21125000.0 [00:53<192:04:41, 30.55it/s]
  0%|          | 325/21125000.0 [00:52<509:18:52, 11.52it/s]
  0%|          | 741/21125000.0 [00:54<183:57:35, 31.90it/s]
  0%|          | 351/21125000.0 [00:54<479:46:53, 12.23it/s]
  0%|          | 528/21125000.0 [00:55<314:13:30, 18.67it/s]
  0%|          | 325/211

  0%|          | 946/21125000.0 [01:28<276:34:40, 21.22it/s]
  0%|          | 2701/21125000.0 [01:29<82:16:28, 71.31it/s]
  0%|          | 1953/21125000.0 [01:29<104:44:15, 56.02it/s]
  0%|          | 1035/21125000.0 [01:30<254:00:41, 23.10it/s]
  0%|          | 2775/21125000.0 [01:30<81:21:00, 72.12it/s]
  0%|          | 1035/21125000.0 [01:29<252:30:25, 23.24it/s]
  0%|          | 2016/21125000.0 [01:31<103:59:38, 56.42it/s]
  0%|          | 990/21125000.0 [01:30<269:31:30, 21.77it/s]
  0%|          | 2850/21125000.0 [01:31<80:37:13, 72.78it/s]
  0%|          | 2080/21125000.0 [01:32<101:57:08, 57.55it/s]
  0%|          | 1081/21125000.0 [01:31<247:28:17, 23.71it/s]
  0%|          | 1081/21125000.0 [01:31<245:57:39, 23.86it/s]
  0%|          | 2926/21125000.0 [01:32<81:09:41, 72.29it/s]
  0%|          | 2145/21125000.0 [01:33<100:22:26, 58.46it/s]
  0%|          | 1035/21125000.0 [01:32<262:48:19, 22.33it/s]
  0%|          | 3003/21125000.0 [01:33<79:35:19, 73.72it/s]
  0%|          

  0%|          | 2145/21125000.0 [02:06<171:38:18, 34.18it/s]
  0%|          | 6105/21125000.0 [02:07<54:57:57, 106.73it/s]
  0%|          | 2278/21125000.0 [02:07<138:59:28, 42.21it/s]
  0%|          | 4753/21125000.0 [02:08<68:32:52, 85.59it/s]
  0%|          | 2080/21125000.0 [02:07<175:52:55, 33.36it/s]
  0%|          | 6216/21125000.0 [02:08<55:01:57, 106.60it/s]
  0%|          | 2211/21125000.0 [02:07<166:44:45, 35.19it/s]
  0%|          | 4851/21125000.0 [02:09<67:27:17, 86.97it/s]
  0%|          | 2346/21125000.0 [02:09<135:44:04, 43.23it/s]
  0%|          | 6328/21125000.0 [02:09<54:57:35, 106.74it/s]
  0%|          | 4950/21125000.0 [02:10<66:46:18, 87.86it/s]
  0%|          | 2145/21125000.0 [02:09<170:41:57, 34.37it/s]
  0%|          | 2415/21125000.0 [02:10<134:06:02, 43.75it/s]
  0%|          | 6441/21125000.0 [02:10<54:35:09, 107.47it/s]
  0%|          | 2278/21125000.0 [02:09<164:31:43, 35.66it/s]
  0%|          | 5050/21125000.0 [02:11<65:27:08, 89.63it/s]
  0%|       

In [None]:
ray.shutdown()

# Optimized Parameters and Curves Adjust

In [None]:
cols=['beta0', 'beta01', 'startT', 'beta2', 'sigma', 'sigma2', 
      'sigma3', 'gamma', 'b', 'gamma2', 'd', 'mu', 'sub', 'subRec', 'subDth']
dfVars = pd.DataFrame(data=results, columns=cols)
if allCountries:
    dfVars['country']=dfparam.country
else:
    dfVars['country']=countrySelected
    display(countrySelected)

In [None]:
cols=['beta0', 'beta01', 'startT', 'beta2', 'sigma', 'sigma2', 
      'sigma3', 'gamma', 'b', 'gamma2', 'd', 'mu', 'country','sub', 'subRec', 'subDth']
dfVars=dfVars[cols]

display(Markdown("## Under Notification and Adjusts"))
display(dfVars.iloc[:,12:16])

In [None]:
cols=['country','beta0', 'beta01', 'startT', 'beta2', 'sigma', 'sigma2', 
      'sigma3', 'gamma', 'b', 'gamma2', 'd', 'mu','sub', 'subRec', 'subDth']
dfVars=dfVars[cols]
dfVars.to_csv('./results/underNotif_vars_'+version2+'.csv', sep=",")

display(Markdown("## Simulation Parameters"))

if newModel:
    cols=['index','country','$\\beta_0$', '$\\beta_{01}$', 'startT', '$\\beta_2$', '$\\sigma$', '$\\sigma_2$', 
          '$\\sigma_3$', 'a', 'b', 'c', 'd', '$\\mu$','sub', 'subRec', 'subDth']
else:
    cols=['index','country','$\\beta_0$', '$\\beta_{01}$', 'startT', '$\\beta_2$', '$\\sigma$', '$\\sigma_2$', 
          '$\\sigma_3$', '$\\gamma$', 'b', '$\\gamma_2$', 'd', '$\\mu$','sub', 'subRec', 'subDth']    
HTML(tabulate(dfVars.iloc[:,:13], headers= cols, floatfmt='.3e', tablefmt='html'))

# Plots

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import pandas as pd
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
import covid_plots_v2 as cp

In [None]:
def loadDataFrame(filename):
    df= pd.read_pickle(filename)
    df.columns = [c.lower().replace(' ', '_') for c in df.columns]
    df.columns = [c.lower().replace('(', '') for c in df.columns]
    df.columns = [c.lower().replace(')', '') for c in df.columns]
    return df

## Plot Parameters

In [None]:
#main country for analysis
countryMain = "Brazil"

#select countries for plotting
countries4Plot=['China',
               'US',
               'India',
                countryMain,
               'Italy'
               ]

#Choose here your options
#opt=0 all plots
#opt=1 corona log plot
#opt=2 logistic model prediction
#opt=3 bar plot with growth rate
#opt=4 log plot + bar plot
#opt=5 SEAIR-D Model
opt = 5

#number of cases to start plotting model in log graph - real data = 100
startCase=100

#all Countries for plotting
countries=dfparam.country

In [None]:
#do not allow the scrolling of the plots

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return false;}

## Plot Loop

In [None]:
#plots one country or all countries  
if opt==5:
    allCountries=True
else:
    allCountries=False
    
if allCountries:
    for country in countries:
        query = dfparam.query('country == "{}"'.format(country)).reset_index()
        startCase = query['START'][0]
        startdate = query['start-date'][0]
        predict_range = query['prediction-range'][0]
        #calcula data máxima dos gráficos
        #200 dias é usado como máximo dos cálculos da derivada das mortes
        lastDate='9/22/20'
        maxDate= datetime.strptime(lastDate, "%m/%d/%y") + timedelta(days = 200) #"2020-08-31"
        maxDateStr = maxDate.strftime("%-m/%-d/%y")
        cp.covid_plots(country, countries4Plot, startdate, predict_range, \
                        startCase, opt, version, show=True, maxDate=maxDateStr, model=model)
else: 
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    startdate = query['start-date'][0]
    predict_range = query['prediction-range'][0]
    startCase = query['START'][0]
    #calcula data máxima dos gráficos
    #200 dias é usado como máximo dos cálculos da derivada das mortes
    lastDate='9/22/20'
    maxDate= datetime.strptime(lastDate, "%m/%d/%y") + timedelta(days = 200) #"2020-08-31"
    maxDateStr = maxDate.strftime("%-m/%-d/%y")   
    cp.covid_plots(country, countries4Plot,  startdate, predict_range, \
                       startCase, opt, version, show=True, maxDate=maxDateStr, model=model)