In [1]:
#import libraries necessary for paralelization
import multiprocessing as mp
import sys
#add the path were the models are
sys.path.append("../main/")
import pandas as pd
import numpy as np
from plotnine import *
from mizani.breaks import date_breaks
from mizani.formatters import date_format
from modelos import SIR_EDO
from itertools import repeat
import time

#set default theme for plts
theme_set(theme_linedraw())

In [2]:
data = pd.read_csv("../data/estados.csv")
data.head()

Unnamed: 0,date,state,newCases,mortes,TOTAL,totalCasesPred,sucetivel,Recuperado
0,2020-02-25,TOTAL,1.0,0.0,1.0,1.0,217026000.0,0.0
1,2020-02-26,TOTAL,0.0,0.0,1.0,1.356124,217026000.0,0.079351
2,2020-02-27,TOTAL,0.0,0.0,1.0,1.844646,217026000.0,0.188203
3,2020-02-28,TOTAL,0.0,0.0,1.0,2.371169,217026000.0,0.305522
4,2020-02-29,TOTAL,1.0,0.0,2.0,3.07546,217026000.0,0.462452


In [3]:
#Select only Sao Paulo
sp = data[data["state"] == "SP"]
sp.head()

Unnamed: 0,date,state,newCases,mortes,TOTAL,totalCasesPred,sucetivel,Recuperado
41,2020-02-25,SP,1.0,0.0,1.0,1.0,45919050.0,0.0
42,2020-02-26,SP,0.0,0.0,1.0,1.253639,45919050.0,0.140394
43,2020-02-27,SP,0.0,0.0,1.0,1.571138,45919050.0,0.316136
44,2020-02-28,SP,0.0,0.0,1.0,1.96871,45919050.0,0.5362
45,2020-02-29,SP,1.0,0.0,2.0,2.468231,45919050.0,0.812695


In [4]:
#Remove missing values to not crash the intervals
sp = sp.dropna()

In [5]:
#create a series with the cummulative number of cases
y = sp["TOTAL"]

#Give the number of days since the day of first case confirmed
x = range(1,len(sp["TOTAL"]) + 1)

In [6]:
model = SIR_EDO(N = 45919049)

In [7]:
def predictSIR(x,y,days,bootstrap, lb = 0.0275, ub = 0.975):
    
    """
    x: Number of days since the case
    y: An array with the cumulative cases
    days: for how many days the projection should be done
    bootsrap: how many simulation should be done for compute CI
    lb: The inferior limit of the interval. Default is 0.0275
    ub: The inferior limit of the interval. Default is 0.975
    """
    def q1(x):
        return x.quantile(lb)
    
    def q2(x):
        return x.quantile(ub)
    
    #S = []
    #I = []
    #R = []
    df = []
    predicted = {}
    newx = range(1,days)
    
    #fit model using data
    for i in range(0,bootstrap):
        newy = np.random.choice(a = y, size = len(x), replace = True)
        model.fit(x = x, y = newy)
        #S.append(model.getCoef()[1][3][0])
        #I.append(model.getCoef()[1][3][1])
        #R.append(model.getCoef()[1][3][2])
        df.append(pd.DataFrame.from_dict({"Predicted":model.predict(newx),
                                          "x": np.arange(start = 1, stop = len(newx) + 3)}))
        
    dfs = pd.concat(df)
    
    f = ['mean', 'std', q1, q2]
    dfs = dfs.groupby(["x"])["Predicted"].agg(f).reset_index()
    
    return dfs

In [8]:
start = time.time()
predictSIR(x = x, y = y, days = 50, bootstrap = 100)
end = time.time()
print(end - start)

64.0041675567627


In [9]:
lists = [np.random.choice(a = y, size = len(x), replace = True) for i in repeat(None, 100)]

In [10]:
def runSir(y, x, pop, ndays):
    newx = range(1,ndays)
    model = SIR_EDO(pop)
    model.fit(y = y, x = x)
    return model.predict(newx)

In [11]:
start = time.time()
results1 = []
for i in range(0,len(lists)):
    results1.append(runSir(lists[i],x,50000000,50))
end = time.time()
print(end - start)

60.876667499542236


In [12]:
start = time.time()
pool =  mp.Pool(processes = 8)
if __name__ == '__main__':
    results2 = pool.starmap(runSir, [(lists[i], x, 50000000, 50) for i in range(1,len(lists))])
end = time.time()
print(end - start)

16.73759365081787
