In [None]:
import numpy as np

import pandas as pd
from pandas import read_csv
from pandas import datetime
from pandas import DataFrame
from pandas.plotting import autocorrelation_plot

import matplotlib.pyplot as plt

from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt

import seaborn as sns

from sklearn.metrics import mean_squared_error

In [None]:
def tsplot(y, 
           lags=None, 
           title='', 
           figsize=(14, 8)):
    '''Examine the patterns of ACF and PACF, along with the time series plot and histogram.
    
    Original source: https://tomaugspurger.github.io/modern-7-timeseries.html
    '''
    fig = plt.figure(figsize=figsize)
    layout = (2, 2)
    ts_ax   = plt.subplot2grid(layout, (0, 0))
    hist_ax = plt.subplot2grid(layout, (0, 1))
    acf_ax  = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))
    
    y.plot(ax=ts_ax)
    ts_ax.set_title(title)
    y.plot(ax=hist_ax, kind='hist', bins=25)
    hist_ax.set_title('Histogram')
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
    [ax.set_xlim(0) for ax in [acf_ax, pacf_ax]]
    sns.despine()
    plt.tight_layout()
    return ts_ax, acf_ax, pacf_ax

In [None]:
def ARIMA_plot_ki(series, 
               order,
               train_size_percent,
               scale=1):
    
    # Credit to Fong King Ian for providing this code

    # convert Series to DataFrame
    # remove duplicated rows except for most recent, convert to daily freq, fil blanks with prev observation
    X = series.rename("actual").to_frame() 
    X = X.loc[~X.index.duplicated(keep='last')].asfreq('d', 'ffill')
    
    # determine where the training set ends and the test set starts
    size = int(len(X) * train_size_percent)
    first_test_index = X.index[size]
        
    # forecast out-of-sample value using ARIMA
    for t in X[X.index >= first_test_index].index.tolist():
        # fit model with 'actual' values up to and excluding time t
        model = ARIMA(X[X.index < t]['actual'].values, order)
        model_fit = model.fit(disp=0)
        
        # forecast returns 3 arrays: 
        # out-of-sample forecast value, std error of the forecast and 
        # 2d array of the confidence interval for the forecast 
        
        output = model_fit.forecast()
        X.loc[t, 'predictions'] = output[0]  # output[0] contains forecast value
#        print('predicted = %f, expected = %f' % (output[0], X.at[t, 'actual']))
    
    # aligning predictions with correct time periods, removing rows without predictions
    X['predictions'] = X['predictions'].shift(-1)
    X.dropna(inplace = True)
    
    # MSE
    error = mean_squared_error(X['actual'].values, X['predictions'].values)
    print('Test MSE: %.3f' % error)
    
    # Scale to avoid exceeding maximum margin of plots
    test_scaled = X['actual'].values / scale
    predictions_scaled = X['predictions'].values / scale

    # plot
    plt.plot(test_scaled, color='blue', label='true values')
    plt.plot(predictions_scaled, color='red', label=f'estimated ARIMA{order}')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
def parser(x):
    return datetime.strptime(x, '%Y-%m-%d')

======================================================================================================================

# Examples of Financial Time Series

In [None]:
# Download Bitcoin's, Amazon's and Apple's historical price from Yahoo Finance as csv files
BitCoin = read_csv('BTC-USD.csv', 
                    header = 0, 
                    parse_dates = [0],  
                    index_col = 0, 
                    squeeze = True, 
                    date_parser = parser)

Amazon = read_csv('amzn.csv',  
                  header = 0, 
                  parse_dates = [0],
                  index_col = 0,
                  squeeze = True,
                 date_parser = parser)

AAPL = read_csv('AAPL.csv',
                  header = 0, 
                  parse_dates = [0],  
                  index_col = 0, 
                  squeeze = True, 
                  date_parser = parser)

In [None]:
BitCoin.head()

In [None]:
BitCoin.info()

In [None]:
BitCoin.describe()

In [None]:
Amazon.head()

In [None]:
Amazon.info()

In [None]:
AAPL.head()

In [None]:
AAPL.info()

In [None]:
tsplot(BitCoin['High'], title='Bitcoin')

In [None]:
tsplot(Amazon['High'], title='Amazon')

In [None]:
tsplot(AAPL['High'], title='Apple')

======================================================================================================================

# Forecasting

In [None]:
ARIMA_plot_ki(series = BitCoin['High'], 
              order = (5,1,0), 
              train_size_percent = 0.9, 
              scale = 1000)

In [None]:
x = ARIMA_plot_ki(series = Amazon['High'], 
                  order = (2,2,2), 
                  train_size_percent = 0.9, 
                  scale = 1000)

In [None]:
ARIMA_plot_ki(series = AAPL['High'], 
              order = (5,2,0), 
              train_size_percent = 0.8)