In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot

from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

## Getting the Data Set 

In [2]:
# Loading the market data set 

# Probably a less circuitous route to getting this done, my pd skills are rusty...

md = pd.read_csv('Tech_Eval/market_data.csv')
inter = md[['base_asset_id', 'price_open']]
mdbtc = inter[inter['base_asset_id'] == 'Bitcoin_BTC_BTC']
mdbtc_arr = mdbtc['price_open'].values

## Univariate Predictive Models

### Autoregression

In [3]:
model = AR(mdbtc_arr)
model_fit = model.fit()
yhat = model_fit.predict(len(mdbtc_arr), len(mdbtc_arr))
print(yhat[0])

13179.25688155909


### Moving-averages

In [4]:
model = ARMA(mdbtc_arr, order=(0, 1))
model_fit = model.fit(disp=False)

yhat = model_fit.predict(len(mdbtc_arr), len(mdbtc_arr))
print(yhat[0])

10466.667170191871


### Autoregressive Moving-averages

In [5]:
model = ARMA(mdbtc_arr, order=(2, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(len(mdbtc_arr), len(mdbtc_arr))
print(yhat[0])

13543.738537098749


### Autoregressive Integrated Moving-averages

In [6]:
model = ARIMA(mdbtc_arr, order=(1, 1, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(len(mdbtc_arr), len(mdbtc_arr), typ='levels')
print(yhat[0])

13632.46129707051


### Seasonal Autoregressive Integrated moving-average 

In [7]:
model = SARIMAX(mdbtc_arr, order=(1, 1, 1), seasonal_order=(1, 1, 1, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(len(mdbtc_arr), len(mdbtc_arr))
print(yhat[0])

13909.786725343305


## Supervised Learning Predictive Model

In [9]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
        Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[0]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

yhat = series_to_supervised(list(mdbtc_arr))
print(yhat)

        var1(t-1)       var1(t)
1     3932.714647   3661.759485
2     3661.759485   3680.596153
3     3680.596153   3628.773472
4     3628.773472   3645.881857
5     3645.881857   3666.783059
6     3666.783059   3680.201283
7     3680.201283   3688.762632
8     3688.762632   3730.506764
9     3730.506764   3721.066534
10    3721.066534   3696.619600
11    3696.619600   3406.372093
12    3406.372093   3408.132504
13    3408.132504   3475.103375
14    3475.103375   3486.832555
15    3486.832555   3484.022922
16    3484.022922   3557.682910
17    3557.682910   3526.715157
18    3526.715157   3490.425066
19    3490.425066   3518.870428
20    3518.870428   3475.828556
21    3475.828556   3479.212412
22    3479.212412   3596.644951
23    3596.644951   3622.287288
24    3622.287288   3603.608649
25    3603.608649   3596.941779
26    3596.941779   3617.554154
27    3617.554154   3627.132906
28    3627.132906   3574.527789
29    3574.527789   3590.969566
30    3590.969566   3740.809269
..      

### References

[Univariate predictive models](https://machinelearningmastery.com/time-series-forecasting-methods-in-python-cheat-sheet/)

### Future Directions

[LSTM predictions](https://lilianweng.github.io/lil-log/2017/07/08/predict-stock-prices-using-RNN-part-1.html)   
[Code for LSTM predictions](https://github.com/lilianweng/stock-rnn)  

[Doing the same with TF and GCP](https://medium.com/google-cloud/how-to-do-time-series-prediction-using-rnns-and-tensorflow-and-cloud-ml-engine-2ad2eeb189e8)  

[Echo State Networks](https://towardsdatascience.com/predicting-stock-prices-with-echo-state-networks-f910809d23d4)  