In [None]:
# Import statements (standard)
import math
import time
import numpy as np
import pandas as pd
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Import statements (custom)
import helper_functions as hf

# Import statements (stats)
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
#from pandas.plotting import autocorrelation_plot as acf_pl
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
# Load data.
df = pd.read_csv('~/Desktop/Springboard/Cryptocurrency/code/cleaned_crypto_all_features.csv', header=[0,1], skiprows=[2], index_col=0)
df.index.name = 'time'
df.index = pd.to_datetime(df.index)
df = df[:-1].astype(float) # drop last row (missing closing prices)

In [None]:
# Split data into "training"/"test" (test will be where we forecast) sets.
train_end = pd.to_datetime('2017/11/30 12:00:00')
test_start = pd.to_datetime('2017/11/30 13:00:00')
crypto_str = 'ETH_USD'

sub_df = df.xs(crypto_str, level='fsym_tsym', axis=1)
train_df = sub_df.ix[:train_end]
train_prices = train_df.close
test_df = sub_df.ix[test_start:] 
test_prices = test_df.close
all_prices = sub_df.close

In [None]:
# Plot all closing prices.
_ = sub_df.close.plot()
_ = plt.xlabel('Date')
_ = plt.ylabel('Closing Price')
_ = plt.title('Ethereum')
plt.show()

### Checking for Stationarity

In [None]:
def adf_stationarity_test(df, al=None, nl=1):
    """
    Performs augmented Dickey-Fuller test for stationarity. The null
    hypothesis is that the time series possesses a unit root (non-
    stationary), while the alternative hypothesis is there is no unit
    root (stationary). This function is written to be called one cryptocurrency
    at a time.

    Inputs:
        df: cryptocurrency prices (rows: times, column: prices)
        al: autolag (parameter for adfuller -> options: 'AIC', 'BIC', 't-stat')
        nl: number of lags (input manually)

    Returns:
        output: results from ADF test
    """
    if al is None:
        results = adfuller(df, maxlag=nl)
    else:
        results = adfuller(df, autolag=al)

    formatted_results = pd.Series({'Test Statistic': results[0], 'p-value':
                                   results[1], 'Number of Lags': results[2],
                                   'Number of Data Points': results[3]})

    return formatted_results

In [None]:
def difference_prices(df, order=1):
    """
    Difference cryptocurrency closing prices.

    Inputs:
        df: cryptocurrency prices (rows: times, columns: cryptocurrencies)
        order: order of differencing (default: 1)

    Returns:
        diff_df: differenced cryptocurrency prices (same as above)
    """
    diff_df = df.diff(periods=order)
    diff_df = diff_df.dropna()

    return diff_df

#### Closing Prices (without differencing)

In [None]:
print('Train:')
adf_stationarity_test(train_prices, al='AIC')

In [None]:
print('Test:')
adf_stationarity_test(test_prices, al='AIC')

In [None]:
print('All:')
adf_stationarity_test(all_prices, al='AIC')

#### Closing Prices (first differenced)

In [None]:
train_diff_prices = difference_prices(train_prices)
test_diff_prices = difference_prices(test_prices)
all_diff_prices = difference_prices(all_prices)

In [None]:
print('Train:')
adf_stationarity_test(train_diff_prices, al='AIC')

In [None]:
print('Test:')
adf_stationarity_test(test_diff_prices, al='AIC')

In [None]:
print('All:')
adf_stationarity_test(all_diff_prices, al='AIC')

### Determine *p*, *q* parameters from ACF/PACF

In [None]:
plot_acf(all_diff_prices)

In [None]:
plot_acf(all_diff_prices, lags=365)

In [None]:
plot_acf(all_diff_prices, lags=50)

In [None]:
# Generate PACF. 
# Note: plot_pacf stalls out, so do this manually.
pacf_values = pacf(all_diff_prices)

_ = plt.close('all')
_ = plt.stem(pacf_values, linefmt='b-', markerfmt='bo', basefmt='r-')
plt.show()

The ACF/PACF do not look as nice as the standard textbook examples, but let's try an ARIMA(0,1,2) model. 

### ARIMA Forecast/Check Residuals

In [None]:
ar_p = 0 
d = 1
ma_q = 2

# First, try it for the entire time series. 
model = ARIMA(all_prices, order=(ar_p, d, ma_q))
fitted_model = model.fit(disp=False)

In [None]:
# Check for any structure in the residuals. 
residuals = fitted_model.resid
residuals.plot(kind='kde')
plt.show()