# Simple forecast of a crypto currency

This notebook tries to use simple forecast methods and calculate their error.

The time series is only predicted one step into the future.
This is done for all possible past time series and the error over the prediction is calculated.

Therefore, no test data is needed. For any given time in the series, the prediction is a function of some or all past values in the series:

$$t_n = f(t_{n-1}, t_{n-2}, ..., t_0)$$

In [None]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd

import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

from sklearn.metrics import mean_squared_error
import math

from sklearn.linear_model import LinearRegression

In [None]:
# suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Import code from *data preprocessing* notebook

In [None]:
import ipynb.fs  # Boilerplate required
# only import definitions
from .defs.data_preprocessing import loadCoinMarketCap

In [None]:
matplotlib.rcParams['figure.figsize'] = 15,6

## Load historical data into a dataframe

In [None]:
df = loadCoinMarketCap(
    # only consider coins having at least half a year of samples
    minSamples   = 180,
    minMarketCap = 1*1000*1000*1000, # 1 billion
    minVolume    = 1*1000*1000*1000, # 1 billion
)
# some constants
COL_VOLUME = "volume"
COL_MARKET = "marketcap"
COL_NAME   = "name"
COL_OPEN   = "open"
COL_CLOSE  = "close"
COL_HIGH   = "high"
COL_LOW    = "low"
print(df.head())
print("Currencies: {}".format(", ".join(df[COL_NAME].unique())))

## Select a currency to work on

In [None]:
currency = "BTC"
lookAhead = 1  # step to predict into future
lookBack = 30  # steps to look back (if needed)
n_test = 30    # use to compare to machine learning

In [None]:
dfCoin = df.loc[df[COL_NAME] == currency]
# only keep close column
sCoin = dfCoin[COL_CLOSE]
sCoin.name = currency
sCoin.plot()
print(sCoin.describe())
pass

## Forecast time serie

We predict the whole serie. But for evaluation and comparision with the machine learning part, we focus only on the last **n_test** samples/predictions to calculate the error.

In [None]:
def plotFc(title, series, pred):
    plt.plot(series[-3*n_test:], label="series", color="blue")
    rmse = math.sqrt(mean_squared_error(sCoin[-n_test:], pred[-n_test:]))
    plt.plot(pred[-n_test:], label="forecast", color="red")
    plt.title("{} (RMSE: {:0.2f})".format(title, rmse))
    plt.show()

### Using latest sample

Predict the next sample based on the latest sample.

$$ t_{n+1} = t_{n} $$

In [None]:
# shift the time sereis look ahead steps into future
yHatLS = sCoin.shift(lookAhead)
# ignore the first value
plotFc("Latest sample", sCoin[1:], yHatLS[1:])

### Using average up to latest sample

Use the mean/average of all known samples up to *n*.

$$ t_{n+1} = \frac{1}{n} \sum_{k=1}^{n} t_k$$

In [None]:
yHatAv = []
for k in range(len(sCoin) - lookAhead):
    mean = sCoin[:k+lookAhead].mean()
    yHatAv.append(mean)
yHatAv = pd.Series(yHatAv, index=sCoin.index[lookAhead:])
# ignore the first value
plotFc("Latest average", sCoin[lookAhead:], yHatAv)

### Simple moving average over latest samples

Use the mean/average over the latest *m* samples for any *n*.

$$ t_{n+1} = \frac{1}{m} \sum_{k=n-m}^{n} t_k,
m = lookBack $$

In [None]:
yHatRAv = []
for k in range(lookBack, len(sCoin) - lookAhead):
    mean = sCoin[k-lookBack:k+lookAhead].mean()
    yHatRAv.append(mean)
yHatRAv = pd.Series(yHatRAv, index=sCoin.index[lookBack+lookAhead:])
# ignore the first value
plotFc("Moving average", sCoin[lookBack+lookAhead:], yHatRAv)

### Exponential weighted moving average


In [None]:
alpha = 0.5
# yHat = []
# for k in range(len(sCoin) - lookAhead):
#     mean = sCoin[:k+lookAhead].ewm(alpha=alpha).mean()
#     yHat.append(mean)
yHatEWM = sCoin[:-lookAhead].ewm(alpha=alpha).mean()
yHatEWM = pd.Series(yHatEWM.values, index=sCoin.index[lookAhead:])
# ignore the first value
plotFc("EWM average", sCoin[lookAhead:], yHatEWM)

### Auto Regressive model

Use an auto regressive model to predict next sample.

In [None]:
from statsmodels.tsa.ar_model import AR
yHat = []
maxLag = lookBack
for k in range(maxLag, len(sCoin) - lookAhead):
    subSeries = sCoin[k-maxLag:k+lookAhead]
    lenSubSeries = len(subSeries)
    model = AR(subSeries)
    modelFit = model.fit(maxLag=maxLag)
    y = modelFit.predict(start=lenSubSeries,
                         end=lenSubSeries+1,
                         dynamic=False)
    yHat.append(y.values[0])
yHat = pd.Series(yHat, index=sCoin.index[maxLag+lookAhead:])

# ignore the first value
plotFc("Auto Regressive", sCoin[maxLag+lookAhead:], yHat)

In [None]:
# view all forecasts over all data
plt.figure()
plt.plot(sCoin, label="Original Data")
plt.plot(yHatLS, label="Latest sample")
plt.plot(yHatAv, label="Latest Average")
plt.plot(yHatRAv, label="Rolling Average")
plt.plot(yHatEWM, label="Exp. Weighted Moving Average")
plt.legend(loc="best")
plt.title("Visualize Forcasts")
plt.show()

In [None]:
# prepare the best model to compete against ml forecasts
plt.plot(sCoin[-3*n_test:], label="series", color="blue")
def plotYHat(yHat, label):
    rmse = math.sqrt(mean_squared_error(sCoin[-n_test:], yHat[-n_test:]))
    plt.plot(yHat[-n_test:], label="{} ({:0.2f})".format(label, rmse))
plotYHat(yHatLS, "Latest Sample")
plotYHat(yHatAv, "Average")
plotYHat(yHatRAv, "Rolling Average")
plotYHat(yHatEWM, "Exp. Weighted. Moving Average")
plt.legend(loc="best")
plt.title("Visualize latest 30 days")
pass