# Previsão de Séries Temporais

<div style="text-align:justify">
Uma série temporal é uma sequência de números coletados em intervalos regulares durante um período de tempo. Em modelos de regressão linear a ordem das observações é irrelevante para a análise, em séries temporais a ordem dos dados é fundamental. Uma característica muito importante deste tipo de dado é que as observações vizinhas são dependentes e o interesse é analisar e modelar essa dependência.
</div>

### Carregando Pacotes

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

from pandas import Series
from pandas import concat
from pandas import DataFrame

from sklearn.metrics import mean_squared_error

from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.seasonal import seasonal_decompose

from pandas.plotting import autocorrelation_plot, lag_plot

%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

### Dataset

In [None]:
"""
data = pd.read_csv('AirPassengers.csv', header=0, parse_dates=[0], names=['Month', 'Passengers'], index_col=0)

ts = data['Passengers']
ts.head(5)

"""
data = pd.read_csv('daily-minimum-temperatures.csv', header=0, parse_dates=[0], names=['Date', 'Value'], index_col=0)

ts = data['Value']
ts.head(5)


### Visualização da Série

In [None]:
plt.plot(ts)
plt.show()

### Análise Gráfica de Auto-correlação

In [None]:
lag_plot(ts)

In [None]:
autocorrelation_plot(ts)

In [None]:
pacf_result = pacf(ts, nlags=12)
plt.plot(pacf_result)
plt.show()

In [None]:
acf_result = acf(ts, nlags=12)
plt.plot(acf_result)
plt.show()

### Análise de Auto-correlação

In [None]:
values = DataFrame(ts.values)
df = concat([values.shift(1), values], axis=1)
df.columns = ['t-1', 't']
print df.corr()

### Teste de Estacionariedade

- Explicar teste adfuller

In [None]:
def test_stationarity(timeseries):
    
    # determing rolling statistics
    rolmean = timeseries.rolling(window=12, center=False).mean()
    rolstd  = timeseries.rolling(window=12, center=False).std()

    # plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    """
    # perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value

    print dfoutput
    """

In [None]:
test_stationarity(ts)

### Estacionariedade por Decomposição

In [None]:
decomposition = seasonal_decompose(ts.values, freq=12)

# statsmodels.tsa.seasonal.DecomposeResult'
print type(decomposition)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

# statsmodels.tsa.seasonal.DecomposeResult'
print type(residual)

# original serie
plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')

# trend removie
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')

# seasonality removie
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')

# residuals
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')

plt.tight_layout()

In [None]:
s_residual = Series(residual)
s_residual.dropna(inplace=True)
test_stationarity(s_residual)

### Estacionariedade por Diferenciação

In [None]:
ts_log = np.log(ts)
plt.plot(ts_log)

In [None]:
# difference
ts_log_diff = ts_log - ts_log.shift()
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)

In [None]:
moving_avg = ts_log.rolling(window=12, center=False).mean()

# rolling mean
ts_log_moving_avg_diff = ts_log - moving_avg
ts_log_moving_avg_diff.dropna(inplace=True)
test_stationarity(ts_log_moving_avg_diff)

### Data Split

In [None]:
values = DataFrame(ts_log_diff.values)
df = concat([values.shift(1), values], axis=1)
df.columns = ['t-1', 't']

# split train / test
X = df.values
train, test = X[1:len(X)-12], X[len(X)-12:]
train_X, train_y = train[:,0], train[:,1]
test_X, test_y = test[:,0], test[:,1]

### Baseline

In [None]:
# walk-forward validation
predictions = []
for x in test_X:
    yhat = x
    predictions.append(yhat)
    
test_score = mean_squared_error(test_y, predictions)
print('Test MSE: %.3f' % test_score)

# plot predictions vs expected
plt.plot(test_y)
plt.plot(predictions, color='red')
plt.show()

### Modelo Auto-Regressivo

In [None]:
# split dataset
X = ts_log_diff.values
train, test = X[1:len(X)-12], X[len(X)-12:]

# train autoregression
model = AR(train)
model_fit = model.fit()
print('Lag: %s' % model_fit.k_ar)
print('Coefficients: %s' % model_fit.params)

# make predictions
predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
for i in range(len(predictions)):
    print('predicted=%f, expected=%f' % (predictions[i], test[i]))

error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)

# plot results
plt.plot(test)
plt.plot(predictions, color='red')
plt.show()

### Modelo On-line

### Escala Original