In [None]:
!pip list -v # list installed packages

In [None]:
# Exemplo retirado de https://predictivehacks.com/arima-model-in-python/
# importar bibliotecas necessarias
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
from google.colab import files
import io
uploaded = files.upload()
filename = io.BytesIO(uploaded['air_passengers.csv'])

In [None]:
# ler ficheiro csv com dados de passageiros
airline = pd.read_csv(filename, index_col ='TravelDate', parse_dates = True)

In [None]:
# explorar dataframe
airline.head()
airline.info()
airline.plot()

In [None]:
# decomposicao ETS (Error, Trend, Seasonality)
result = seasonal_decompose(airline['Passengers'], model ='multiplicative')

In [None]:
# ETS plot
result.plot()

In [None]:
#%% serie estacionaria ou nao-estacionaria?
# teste Augmented Dickey-Fuller -> p-value abaixo de 0.05

from statsmodels.tsa.stattools import adfuller
 
result=adfuller(airline['Passengers'])

In [None]:
# atribuicao de nomes a cada uma das variaveis de output
dict(zip(['adf', 'pvalue', 'usedlag', 'nobs', 'critical' 'values', 'icbest'],result))

In [None]:
#%% transformar nao-estacionario em estacionario usando diferencas (parametros d e D)

airline['1difference']=airline['Passengers']-airline['Passengers'].shift(1)
 
airline['1difference'].plot()

In [None]:
#%% testar novamente Augmented Dickey-Fuller -> p-value abaixo de 0.05
# notar que estamos a descartar o valor nan 
result=adfuller(airline['1difference'].dropna())

dict(zip(['adf', 'pvalue', 'usedlag', 'nobs', 'critical' 'values', 'icbest'],result))

In [None]:
#%% repetir as difencas paras as 2as diferencas
airline['2difference']=airline['1difference']-airline['1difference'].shift(1)
 
airline['2difference'].plot()

In [None]:
#%% testar novamente Augmented Dickey-Fuller -> p-value abaixo de 0.05 -> d=2
result=adfuller((airline['2difference']).dropna())

dict(zip(['adf', 'pvalue', 'usedlag', 'nobs', 'critical' 'values', 'icbest'],result))

In [None]:
#%% a serie é sazonal, é necssário calcular o parametro D
airline['Seasonal_Difference']=airline['Passengers']-airline['Passengers'].shift(12)

airline['Seasonal_Difference'].plot()

In [None]:
#%% testar novamente Augmented Dickey-Fuller -> p-value abaixo de 0.05 -> D=0
result=adfuller((airline['Seasonal_Difference']).dropna())

dict(zip(['adf', 'pvalue', 'usedlag', 'nobs', 'critical' 'values', 'icbest'],result))

In [None]:
#%% Autocorrelation and Partial Autocorrelation Plots (p,q and P,Q parameters)
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
 
plot_acf(airline['2difference'].dropna(), lags=12) 
plot_pacf(airline['2difference'].dropna(), lags =12) 

plot_acf(airline['Seasonal_Difference'].dropna(), lags=24) 
plot_pacf(airline['Seasonal_Difference'].dropna(), lags = 12)

In [None]:
#%% criar modelo ARIMA AR-Autoregression I-Integrated MA-Moving Average
from statsmodels.tsa.statespace.sarimax import SARIMAX

model=SARIMAX(airline['Passengers'],
              order=(1,2,1),
              seasonal_order=(1, 0, 0, 12))

result=model.fit()
result.summary() # https://analyzingalpha.com/interpret-arima-results

result.resid.plot(kind='kde')

In [None]:
#%% criar dataframe vazia com novas datas para nova previsoes
from pandas.tseries.offsets import DateOffset

new_dates=[airline.index[-1]+DateOffset(months=x) for x in range(1,48)]

df_pred=pd.DataFrame(index=new_dates,columns =airline.columns)

df_pred.head()

In [None]:
#%% gerar novas previsoes para dataframe final
df2=pd.concat([airline,df_pred])
 
df2['predictions']=result.predict(start=143,end=191)
df2[['Passengers','predictions']].plot()