# Prevendo crescimento do número de casos de COVID-19 no Brasil usando Inteligência Artificial

## Preparando os dados

In [1]:
import pandas as pd
import numpy as np

In [3]:
casosBrasil = pd.read_csv('https://brasil.io/dataset/covid19/caso/?format=csv')
casosBrasil.head()

Unnamed: 0,date,state,city,place_type,confirmed,deaths,is_last,estimated_population_2019,city_ibge_code,confirmed_per_100k_inhabitants,death_rate
0,2020-06-03,SP,Adamantina,city,43,3,True,35068.0,3500105.0,122.61891,0.0698
1,2020-06-03,SP,Adolfo,city,3,0,True,3562.0,3500204.0,84.22235,0.0
2,2020-06-03,SP,Aguaí,city,38,3,True,36305.0,3500303.0,104.66878,0.0789
3,2020-06-03,SP,Águas da Prata,city,1,0,True,8180.0,3500402.0,12.22494,0.0
4,2020-06-03,SP,Águas de Lindóia,city,16,1,True,18705.0,3500501.0,85.53863,0.0625


In [4]:
# excluindo colunas desnecessárias
casosBR = casosBrasil.drop(['place_type', 'deaths','estimated_population_2019', 'city_ibge_code', 'death_rate', 'confirmed_per_100k_inhabitants', 'is_last'], axis=1)
casosBR.head()

Unnamed: 0,date,state,city,confirmed
0,2020-06-03,SP,Adamantina,43
1,2020-06-03,SP,Adolfo,3
2,2020-06-03,SP,Aguaí,38
3,2020-06-03,SP,Águas da Prata,1
4,2020-06-03,SP,Águas de Lindóia,16


In [7]:
# pegando apenas dados dos estados
casosEstados = casosBR[casosBR['city'].isnull()]

In [8]:
casosEstados = casosEstados.drop('city', axis=1)
casosEstados = casosEstados.sort_values(by=['date'])
casosEstados

Unnamed: 0,date,state,confirmed
151897,2020-02-25,SP,1
151895,2020-02-26,SP,1
151893,2020-02-27,SP,1
151891,2020-02-28,SP,2
151889,2020-02-29,SP,2
...,...,...,...
3471,2020-06-02,SC,9660
3547,2020-06-02,SE,7555
4094,2020-06-02,SP,118295
2194,2020-06-02,MG,10939


In [9]:
# Agrupando o número de casos de acordo com estados e as datas
casosEstados = casosEstados.groupby(['state', 'date']).pipe(lambda x: x.confirmed.sum()).unstack().fillna(0)

In [10]:
# Convertendo valores para inteiro
cols = list(casosEstados)
for col in cols:
    casosEstados[col] = pd.to_numeric(casosEstados[col], downcast='integer')

In [12]:
casosEstados.head()

date,2020-02-25,2020-02-26,2020-02-27,2020-02-28,2020-02-29,2020-03-01,2020-03-02,2020-03-03,2020-03-04,2020-03-05,...,2020-05-25,2020-05-26,2020-05-27,2020-05-28,2020-05-29,2020-05-30,2020-05-31,2020-06-01,2020-06-02,2020-06-03
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AC,0,0,0,0,0,0,0,0,0,0,...,4501,4781,5251,5600,5841,6072,6219,6326,6465,0
AL,0,0,0,0,0,0,0,0,0,0,...,6682,7058,7580,8056,8619,9223,10288,10837,11559,0
AM,0,0,0,0,0,0,0,0,0,0,...,30282,31949,33508,36146,38909,40560,41378,41774,43195,0
AP,0,0,0,0,0,0,0,0,0,0,...,6584,6967,7619,8152,8469,9313,9602,9890,10511,0
BA,0,0,0,0,0,0,0,0,0,0,...,14204,14566,15070,15963,16917,17626,18392,18898,21430,0


In [13]:
dias = list(casosEstados)
print(dias)

['2020-02-25', '2020-02-26', '2020-02-27', '2020-02-28', '2020-02-29', '2020-03-01', '2020-03-02', '2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06', '2020-03-07', '2020-03-08', '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28', '2020-03-29', '2020-03-30', '2020-03-31', '2020-04-01', '2020-04-02', '2020-04-03', '2020-04-04', '2020-04-05', '2020-04-06', '2020-04-07', '2020-04-08', '2020-04-09', '2020-04-10', '2020-04-11', '2020-04-12', '2020-04-13', '2020-04-14', '2020-04-15', '2020-04-16', '2020-04-17', '2020-04-18', '2020-04-19', '2020-04-20', '2020-04-21', '2020-04-22', '2020-04-23', '2020-04-24', '2020-04-25', '2020-04-26', '2020-04-27', '2020-04-28', '2020-04-29', '2020-04-30', '2020-05-01', '2020-05-02', '2020-05-03', '2020-05-04', '2020-05-05', '2020

In [14]:
for estado in casosEstados.iterrows():
    for i in range(1, len(dias)):
        a = dias[i]
        b = dias[i-1]
        if casosEstados.loc[estado[0], dias[i]] < casosEstados.loc[estado[0], dias[i-1]]:
            casosEstados.loc[estado[0], dias[i]] = casosEstados.loc[estado[0], dias[i-1]]

In [15]:
casosEstados.head()

date,2020-02-25,2020-02-26,2020-02-27,2020-02-28,2020-02-29,2020-03-01,2020-03-02,2020-03-03,2020-03-04,2020-03-05,...,2020-05-25,2020-05-26,2020-05-27,2020-05-28,2020-05-29,2020-05-30,2020-05-31,2020-06-01,2020-06-02,2020-06-03
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AC,0,0,0,0,0,0,0,0,0,0,...,4501,4781,5251,5600,5841,6072,6219,6326,6465,6465
AL,0,0,0,0,0,0,0,0,0,0,...,6682,7058,7580,8056,8619,9223,10288,10837,11559,11559
AM,0,0,0,0,0,0,0,0,0,0,...,30282,31949,33508,36146,38909,40560,41378,41774,43195,43195
AP,0,0,0,0,0,0,0,0,0,0,...,6584,6967,7619,8152,8469,9313,9602,9890,10511,10511
BA,0,0,0,0,0,0,0,0,0,0,...,14204,14566,15070,15963,16917,17626,18392,18898,21430,21430


In [16]:
# primeiro dia de casos até dia 01/06
x_train = np.array(casosEstados.loc[:,dias[:len(dias)-2]])
print(x_train)
# dia 02/06
y_train = np.array(casosEstados[dias[-2]])
print(y_train)
# do segundo dia de casos até dia 02/06
x_test = casosEstados[dias[1:-1]]
# dia 03/06
y_test = np.array(casosEstados[dias[-1]])

[[     0      0      0 ...   6072   6219   6326]
 [     0      0      0 ...   9223  10288  10837]
 [     0      0      0 ...  40560  41378  41774]
 ...
 [     0      0      0 ...   6805   6999   7233]
 [     1      1      1 ... 107142 109698 111296]
 [     0      0      0 ...   3981   4176   4345]]
[  6465  11559  43195  10511  21430  54683  11256  15151   4486  38174
  10939   1646   2817  43652  13695  35508   5828   4900  56732   8233
   5477   3692   9919   9660   7555 118295   4345]


## Regressão Linear

In [17]:
# importando modelo de regressão linear e a métrica r2_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [18]:
linear_regression = LinearRegression()

In [19]:
linear_regression.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [20]:
# realizando predição do dia 03/06
y_pred = linear_regression.predict(x_test)

In [21]:
# erro percentual
rmspe = (np.sqrt(np.mean(np.square((y_test - y_pred) / y_test)))) * 100
rmspe

7.5763455793825045

In [22]:
r2 = r2_score(y_test, y_pred)
r2

0.9947922175235921

In [23]:
linear_regression.score(x_train, y_pred)

0.9938322516222711

## SVR Linear

In [24]:
# importando SVR
from sklearn.svm import SVR

In [27]:
svr = SVR(kernel='linear', C=0.1)

In [28]:
svr.fit(x_train, y_train)

SVR(C=0.1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [29]:
y_pred=svr.predict(x_test)

In [30]:
# erro percentual
rmspe = (np.sqrt(np.mean(np.square((y_test - y_pred) / y_test)))) * 100
rmspe

7.594874725748417

In [31]:
r2 = r2_score(y_test, y_pred)
r2

0.9947655679077078

## Prevendo número de casos do dia 04/06
Usando regressão linear pois teve resultados melhores


In [37]:
x = casosEstados[dias[2:]]
y_pred = linear_regression.predict(x)
y_pred = y_pred.astype('int')

array([  6354.91025945,  11421.42518865,  41223.72944179,  12056.95453123,
        20272.99588061,  57118.85708576,  11260.42279324,  14966.87284462,
         4451.94030002,  40042.79039252,  10292.59368748,   1730.7333819 ,
         2882.31082648,  44863.63651332,  13606.13376078,  40020.90677981,
         5498.13687494,   4359.63783931,  60059.58097638,   5986.24269029,
         4974.60685481,   3746.68342389,   9103.12332647,   8009.56356951,
         7345.5683082 , 115947.89886786,   3664.09821137])

In [38]:
casosEstados['2020-06-04'] = y_pred
casosEstados

date,2020-02-25,2020-02-26,2020-02-27,2020-02-28,2020-02-29,2020-03-01,2020-03-02,2020-03-03,2020-03-04,2020-03-05,...,2020-05-26,2020-05-27,2020-05-28,2020-05-29,2020-05-30,2020-05-31,2020-06-01,2020-06-02,2020-06-03,2020-06-04
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AC,0,0,0,0,0,0,0,0,0,0,...,4781,5251,5600,5841,6072,6219,6326,6465,6465,6354
AL,0,0,0,0,0,0,0,0,0,0,...,7058,7580,8056,8619,9223,10288,10837,11559,11559,11421
AM,0,0,0,0,0,0,0,0,0,0,...,31949,33508,36146,38909,40560,41378,41774,43195,43195,41223
AP,0,0,0,0,0,0,0,0,0,0,...,6967,7619,8152,8469,9313,9602,9890,10511,10511,12056
BA,0,0,0,0,0,0,0,0,0,0,...,14566,15070,15963,16917,17626,18392,18898,21430,21430,20272
CE,0,0,0,0,0,0,0,0,0,0,...,37021,37275,37821,38395,47822,48489,50504,54683,54683,57118
DF,0,0,0,0,0,0,0,0,0,0,...,7210,7761,8300,8722,9474,9780,10510,11256,11256,11260
ES,0,0,0,0,0,0,0,0,0,1,...,10889,11484,12203,12903,12903,13690,13690,15151,15151,14966
GO,0,0,0,0,0,0,0,0,0,0,...,2706,2906,3101,3521,3609,3726,4017,4486,4486,4451
MA,0,0,0,0,0,0,0,0,0,0,...,26145,27979,30482,32620,34639,35297,36625,38174,38174,40042
