In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('cases-brazil-states.csv')
df.head()

In [None]:
df = df[df['estado'] != 'TOTAL']
df['data'] = pd.to_datetime(df['data']).dt.date 
df.head()

In [None]:
df_sp = df[df['estado']=='SP']
df_sp.head()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
from statsmodels.tsa.seasonal import *

In [None]:
resultados = seasonal_decompose(df_sp.novosObitos, freq= 7)

In [None]:
resultados

In [None]:
df_sp.index = pd.to_datetime(df_sp.data)

In [None]:
def plotar(y1,y2,y3,cor1,cor2,cor3,label1,label2,label3):
  plt.figure(figsize=(20,7))
  plt.plot(y1,color= cor1, label=label1)
  plt.plot(y2,color= cor2, label=label2)
  plt.plot(y3,color= cor3, label=label3)
  plt.legend(fontsize=18)

In [None]:
plotar(resultados.observed, resultados.trend,resultados.seasonal, 'red', 'blue','green', 'Observado', 'Tendencia', 'Sazonalidade' )

In [None]:
df_mg = df[df['estado']=='MG']
df_rj = df[df['estado']=='RJ']
df_mg.index = pd.to_datetime(df_mg.data)
df_rj.index = pd.to_datetime(df_rj.data)


In [None]:
resultados_mg = seasonal_decompose(df_mg.novosObitos, freq= 7)
resultados_rj = seasonal_decompose(df_rj.novosObitos, freq= 7)

In [None]:
plotar(resultados.trend, resultados_rj.trend, resultados_mg.trend, 'red', 'blue','green', 'SP', 'RJ', 'MG' )

In [None]:
plotar(resultados.seasonal, resultados_rj.seasonal, resultados_mg.seasonal, 'red', 'blue','green', 'SP', 'RJ', 'MG' )

In [None]:
resultados = seasonal_decompose(df_sp.obitos_por_casos, freq=7)
resultados_rj = seasonal_decompose(df_rj.obitos_por_casos, freq=7)
resultados_mg = seasonal_decompose(df_mg.obitos_por_casos, freq=7)

plotar(resultados.trend,resultados_rj.trend,resultados_mg.trend,'red','blue','green','SP','RJ','MG')

In [None]:
y = df_sp['Casos']
y.head()

In [None]:
from statsmodels.tsa.stattools import adfuller
adfuller(y.dropna())

In [None]:
print('p-value:', adfuller(y.dropna())[1])

In [None]:
result = seasonal_decompose(df_sp.Casos, freq= 7)
result


In [None]:
plotar(result.observed, result.trend,result.seasonal, 'red', 'blue','green', 'Observado', 'Tendencia', 'Sazonalidade' )

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
def plotarACF(y1,y2,y3):
  fig = plt.figure(figsize=(10,10))
  ax1 = fig.add_subplot(311)
  fig = plot_acf(y1, ax=ax1, title ='Original')
  ax2 = fig.add_subplot(312)
  fig = plot_acf(y2, ax=ax2, title ='1ª Diferenciação')
  ax3 = fig.add_subplot(313)
  fig = plot_acf(y3, ax=ax3, title ='2ª Diferenciação')


In [None]:
plotarACF(y,y.diff().dropna(),y.diff().diff().dropna())

In [None]:
  plotar(y,y.diff(),y.diff().diff(), 'red', 'blue', 'green', 'Original', '1', '2')

In [None]:
plot_pacf(y.diff().dropna(),lags=40);

In [None]:
plot_pacf(y.diff().diff().dropna(),lags=40);

In [None]:
plot_pacf(y.diff().dropna(),lags=20);

## ARIMA ( p,d =1,q =1) 

In [None]:
plot_acf(y.diff().dropna());

In [None]:
plot_acf(y.diff().dropna());

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
modelo = ARIMA(y,order=(1,1,1))
modelo_fit = modelo.fit(disp=0)
print(modelo_fit.summary())

In [None]:
import itertools
p= d =q = range(0,2)
pdq =list(itertools.product(p,d,q))


In [None]:
pdq

In [None]:
seasonal_pdq = [(x[0], x[1], x[2], 7) for x in list(itertools.product(p,d,q))]

In [None]:
print('Exemplo de  combinação de parametros pára ARIMA Sazonal')
print('SARIMAX: {} X {}'.format(pdq[0],seasonal_pdq[0]))
print('SARIMAX: {} X {}'.format(pdq[0],seasonal_pdq[1]))
print('SARIMAX: {} X {}'.format(pdq[0],seasonal_pdq[2]))

In [None]:
 y= df_sp['Casos']
 y

In [None]:
import statsmodels.api as sm
aic_lista = []
for param in pdq:
    for param_seasonal in seasonal_pdq:
      mod =sm.tsa.statespace.SARIMAX(y,order = param,seasonal_order=param_seasonal)
      results = mod.fit()
      aic_lista.append('ARIMA{} X {} - AIC{} '.format(param, param_seasonal,results.aic))

In [None]:
aic_lista

In [None]:
mod = sm.tsa.statespace.SARIMAX(y, order = (1,1,1), seasonal_order=(1,1,1,7))
results = mod.fit()

In [None]:
results.plot_diagnostics(figsize = (16,8))
plt.show()

In [None]:
pred = results.get_prediction(start= pd.to_datetime('2020-07-01'), dynamic=False)
pred_ic = pred.conf_int()
pred_ic

In [None]:
ax = y['2020':].plot(label='Observado')
pred.predicted_mean.plot(ax=ax, label = 'Previsao', color ='r', alpha = 0.7,figsize=(14,7))
ax.fill_between(pred_ic.index,pred_ic.iloc[:,0] , pred_ic.iloc[:,1], color='k', alpha = 0.1 )
ax.set_xlabel('Data')
ax.set_ylabel('Casos de COVID-19')
plt.legend()
plt.show()

In [None]:
pred_uc = results.get_prediction(steps = 150 )
pred_ic = pred_uc.conf_int()
ax = y.plot(label='Observado', figsize=(14,7))
pred_uc.predicted_mean.plot(ax=ax, label = 'Previsto')
ax.fill_between(pred_ic.index,pred_ic.iloc[:,0] , pred_ic.iloc[:,1], color='k', alpha = 0.25 )
ax.set_xlabel('Data')
ax.set_ylabel('Casos de COVID-19')
plt.show()

In [None]:
df_att = pd.read_csv('cases-brazil-states-att.csv')
df_att = df_att[df_att['state'] == 'SP' ]
df_att = df_att.rename(columns = {'date': 'data'})
df_att.head()


In [None]:
df_att['data'] = pd.to_datetime(df_att['data']).dt.date
df_att.index = df_att['data']
df_att.head()

In [None]:
x = df_att['totalCases']
plt.figure(figsize = (20,7))
fig = plt.plot(x,label = 'Correto')
ax = y.plot(label = 'Observado')
pred_uc.predicted_mean.plot(ax=ax, label = 'Previsto')
ax.fill_between(pred_ic.index, pred_ic.iloc[:0], pred_ic.iloc[:,1], color = 'k', alpha = 0.25)