In [9]:
# libraries
import itertools
import jedi
import joblib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pandas_profiling
import pygments
import re
import seaborn as sns
from scipy import stats as st
import sklearn
from tqdm import tqdm
import this

In [2]:
# display for notebooks
%matplotlib inline

In [7]:
# Versions
print("jedi version:", jedi.__version__)
print("numpy version:", np.__version__)
print("pandas version:", pd.__version__)
print("Pygments version:", pygments.__version__)
print("re version:", re.__version__)
print("Seaborn version:", sns.__version__)
print("sklearn version:", sklearn.__version__)

jedi version: 0.16.0
numpy version: 1.18.1
pandas version: 1.0.1
Pygments version: 2.5.2
re version: 2.2.1
Seaborn version: 0.10.0
sklearn version: 0.22.1


In [66]:
# loading the dataset
url = "https://raw.githubusercontent.com/dssg-pt/covid19pt-data/master/data.csv"

In [67]:
# creating a datafr
covid_pt = pd.read_csv(url)

In [68]:
covid_pt.head()
covid_pt.tail()

Unnamed: 0,data,data_dados,confirmados,confirmados_arsnorte,confirmados_arscentro,confirmados_arslvt,confirmados_arsalentejo,confirmados_arsalgarve,confirmados_acores,confirmados_madeira,...,confirmados_f,confirmados_m,obitos_arsnorte,obitos_arscentro,obitos_arslvt,obitos_arsalentejo,obitos_arsalgarve,obitos_acores,obitos_madeira,obitos_estrangeiro
23,20/03/2020,20/03/2020 00:00,1020,506,106,361,2,29,3,1,...,506.0,514.0,1,2,2,0,1,0,0,0
24,21/03/2020,21/03/2020 00:00,1280,644,137,448,3,31,3,5,...,649.0,631.0,4,4,3,0,1,0,0,0
25,22/03/2020,22/03/2020 00:00,1600,825,180,534,5,35,4,7,...,821.0,779.0,5,4,4,0,1,0,0,0
26,23/03/2020,23/03/2020 00:00,2060,1007,238,737,5,42,11,9,...,1080.0,980.0,9,5,8,0,1,0,0,0
27,24/03/2020,24/03/2020 00:00,2362,1130,293,852,6,46,12,11,...,1229.0,1133.0,9,11,8,0,1,1,0,0


In [69]:
covid_pt.dtypes

data                                  object
data_dados                            object
confirmados                            int64
confirmados_arsnorte                   int64
confirmados_arscentro                  int64
confirmados_arslvt                     int64
confirmados_arsalentejo                int64
confirmados_arsalgarve                 int64
confirmados_acores                     int64
confirmados_madeira                    int64
confirmados_estrangeiro              float64
confirmados_novos                      int64
recuperados                            int64
obitos                                 int64
internados                           float64
internados_uci                       float64
lab                                  float64
suspeitos                              int64
vigilancia                           float64
n_confirmados                        float64
cadeias_transmissao                  float64
transmissao_importada                float64
confirmado

In [70]:
covid_pt.shape

(28, 56)

In [71]:
round(covid_pt.describe(), 2)

Unnamed: 0,confirmados,confirmados_arsnorte,confirmados_arscentro,confirmados_arslvt,confirmados_arsalentejo,confirmados_arsalgarve,confirmados_acores,confirmados_madeira,confirmados_estrangeiro,confirmados_novos,...,confirmados_f,confirmados_m,obitos_arsnorte,obitos_arscentro,obitos_arslvt,obitos_arsalentejo,obitos_arsalgarve,obitos_acores,obitos_madeira,obitos_estrangeiro
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,13.0,28.0,...,6.0,6.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,405.57,197.86,44.18,146.07,0.89,10.43,1.5,1.25,6.92,84.36,...,779.17,738.67,1.0,0.96,1.04,0.0,0.18,0.04,0.0,0.0
std,664.16,325.3,78.63,235.6,1.77,14.3,3.09,2.95,3.5,124.59,...,327.91,281.26,2.55,2.41,2.22,0.0,0.39,0.19,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,390.0,395.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.5,2.75,1.0,1.75,0.0,0.0,0.0,0.0,5.0,2.0,...,541.75,543.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,50.0,31.5,2.5,13.5,0.0,2.5,0.0,0.0,9.0,13.5,...,735.0,705.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,496.5,219.25,56.75,195.75,0.5,15.75,1.5,0.25,9.0,123.5,...,1015.25,929.75,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
max,2362.0,1130.0,293.0,852.0,6.0,46.0,12.0,11.0,11.0,460.0,...,1229.0,1133.0,9.0,11.0,8.0,0.0,1.0,1.0,0.0,0.0


In [72]:
covid_pt.columns

Index(['data', 'data_dados', 'confirmados', 'confirmados_arsnorte',
       'confirmados_arscentro', 'confirmados_arslvt',
       'confirmados_arsalentejo', 'confirmados_arsalgarve',
       'confirmados_acores', 'confirmados_madeira', 'confirmados_estrangeiro',
       'confirmados_novos', 'recuperados', 'obitos', 'internados',
       'internados_uci', 'lab', 'suspeitos', 'vigilancia', 'n_confirmados',
       'cadeias_transmissao', 'transmissao_importada', 'confirmados_0_9_f',
       'confirmados_0_9_m', 'confirmados_10_19_f', 'confirmados_10_19_m',
       'confirmados_20_29_f', 'confirmados_20_29_m', 'confirmados_30_39_f',
       'confirmados_30_39_m', 'confirmados_40_49_f', 'confirmados_40_49_m',
       'confirmados_50_59_f', 'confirmados_50_59_m', 'confirmados_60_69_f',
       'confirmados_60_69_m', 'confirmados_70_79_f', 'confirmados_70_79_m',
       'confirmados_80_plus_f', 'confirmados_80_plus_m', 'sintomas_tosse',
       'sintomas_febre', 'sintomas_dificuldade_respiratoria',
     

In [77]:
covid19_pt = covid_pt.filter(['data', 'data_dados', 'confirmados', 'confirmados_novos', 
                           'recuperados', 'obitos', 'internados', 'internados_uci', 'lab',
                           'suspeitos', 'vigilancia', 'n_confirmados', 'cadeias_transmissao',
                           'transmissao_importada'])

In [78]:
covid19_pt.shape

(28, 14)

In [79]:
covid19_pt.dtypes

data                      object
data_dados                object
confirmados                int64
confirmados_novos          int64
recuperados                int64
obitos                     int64
internados               float64
internados_uci           float64
lab                      float64
suspeitos                  int64
vigilancia               float64
n_confirmados            float64
cadeias_transmissao      float64
transmissao_importada    float64
dtype: object

In [80]:
covid19_pt.tail()

Unnamed: 0,data,data_dados,confirmados,confirmados_novos,recuperados,obitos,internados,internados_uci,lab,suspeitos,vigilancia,n_confirmados,cadeias_transmissao,transmissao_importada
23,20/03/2020,20/03/2020 00:00,1020,235,5,6,126.0,26.0,850.0,7732,9008.0,5862.0,24.0,95.0
24,21/03/2020,21/03/2020 00:00,1280,260,5,12,156.0,35.0,1059.0,9854,13155.0,7515.0,,104.0
25,22/03/2020,22/03/2020 00:00,1600,320,5,14,169.0,41.0,1152.0,11779,12562.0,9027.0,,114.0
26,23/03/2020,23/03/2020 00:00,2060,460,14,23,201.0,47.0,1402.0,13674,11842.0,10212.0,,142.0
27,24/03/2020,24/03/2020 00:00,2362,302,22,30,203.0,48.0,1783.0,15474,11842.0,11329.0,,142.0


In [81]:
covid19_pt[["data", "data_dados"]] = covid19_pt[["data", "data_dados"]].apply(pd.to_datetime)

In [82]:
covid19_pt.dtypes

data                     datetime64[ns]
data_dados               datetime64[ns]
confirmados                       int64
confirmados_novos                 int64
recuperados                       int64
obitos                            int64
internados                      float64
internados_uci                  float64
lab                             float64
suspeitos                         int64
vigilancia                      float64
n_confirmados                   float64
cadeias_transmissao             float64
transmissao_importada           float64
dtype: object

In [83]:
round(covid19_pt.describe(), 2)

Unnamed: 0,confirmados,confirmados_novos,recuperados,obitos,internados,internados_uci,lab,suspeitos,vigilancia,n_confirmados,cadeias_transmissao,transmissao_importada
count,28.0,28.0,28.0,28.0,20.0,11.0,19.0,28.0,21.0,10.0,15.0,22.0
mean,405.57,84.36,2.36,3.21,100.75,26.45,466.32,3039.64,5263.62,6001.5,12.2,46.82
std,664.16,124.59,4.87,7.52,65.45,14.11,524.74,4559.67,4481.72,3380.03,7.69,46.46
min,0.0,0.0,0.0,0.0,9.0,9.0,30.0,25.0,81.0,1746.0,4.0,2.0
25%,5.5,2.0,0.0,0.0,39.5,17.5,83.0,113.0,496.0,3462.75,6.0,6.25
50%,50.0,13.5,0.0,0.0,98.0,20.0,281.0,423.0,4923.0,5325.0,11.0,36.0
75%,496.5,123.5,3.0,1.0,143.25,38.0,669.0,4289.25,8091.0,8649.0,18.5,68.75
max,2362.0,460.0,22.0,30.0,206.0,48.0,1783.0,15474.0,13155.0,11329.0,24.0,142.0


In [84]:
covid19_pt.drop("data_dados", axis = 1, inplace = True)

In [85]:
covid19_pt.drop("n_confirmados", axis = 1, inplace = True)

In [None]:
covid19_pt

In [92]:
covid19_pt["factor_propagacao"] = 1 + round(covid19_pt["confirmados"].pct_change(), 2)

In [95]:
covid19_pt["dias_duplicar"] = round(np.log(2) / np.log(covid19_pt["factor_propagacao"]), 0)

In [99]:
reorded = ['data', 'confirmados', 'factor_propagacao', 'dias_duplicar', 'confirmados_novos',
           'recuperados', 'obitos','internados', 'internados_uci', 'lab', 'suspeitos', 
           'vigilancia', 'cadeias_transmissao', 'transmissao_importada']

In [101]:
covid19_pt = covid19_pt[reorded]

In [102]:
covid19_pt

Unnamed: 0,data,confirmados,factor_propagacao,dias_duplicar,confirmados_novos,recuperados,obitos,internados,internados_uci,lab,suspeitos,vigilancia,cadeias_transmissao,transmissao_importada
0,2020-02-26,0,,,0,0,0,,,,25,,,
1,2020-02-27,0,,,0,0,0,,,,51,,,
2,2020-02-28,0,,,0,0,0,,,,59,,,
3,2020-02-29,0,,,0,0,0,,,,70,,,
4,2020-01-03,0,,,0,0,0,,,,85,,,
5,2020-02-03,2,inf,0.0,2,0,0,,,,85,,,
6,2020-03-03,4,2.0,1.0,2,0,0,,,,101,,,2.0
7,2020-04-03,6,1.5,2.0,2,0,0,,,,117,81.0,,4.0
8,2020-05-03,9,1.5,2.0,3,0,0,9.0,,,147,213.0,,5.0
9,2020-06-03,13,1.44,2.0,4,0,0,13.0,,30.0,181,354.0,5.0,5.0
