# Bitcoin Prediction

In this notebook we will try to predict the price of bitcoin.

To do this, we will create a recurrent neural network with LSTM layers.
We will compare the results with those obtained using classical forecasting methods such as fbProphet.

In [None]:
import pandas as pd
import pandas_datareader as pdr
import datetime
import numpy as np

#keras
import keras
from keras.layers import Dense, LSTM, Dropout
from keras.models import Sequential
from keras.preprocessing.sequence import TimeseriesGenerator

#sklearn
from sklearn.preprocessing import MinMaxScaler

#display
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
pd.options.plotting.backend = "plotly"
import matplotlib.pyplot as plt
sns.set_theme()

Using TensorFlow backend.


We retrieve bitcoin closing prices since September 2014 using the yahoo API.

In [None]:
import pandas_datareader as pdr
import datetime
btc_data = pdr.get_data_yahoo(['BTC-USD'], 
                          start=datetime.datetime(2014, 9, 16), 
                          end=datetime.datetime(2021, 4, 2))
btc_data.columns = btc_data.columns.droplevel(1)
btc_data.head()

In [None]:
btc_data['Close'].plot()

## Extract data

We focus for now on the closing price of BTC-USD.

In [None]:
# split data
PREDICTIONS_DAYS = 700
N_FEATURES = 1
WINDOW_SIZE = 365
BATCH_SIZE = 8

# split data frames into train and test
df_train = btc_data.iloc[:len(btc_data)-PREDICTIONS_DAYS]['Close']
df_val = btc_data.iloc[len(btc_data)-PREDICTIONS_DAYS:]['Close']
df_test = btc_data.iloc[len(btc_data)-PREDICTIONS_DAYS-WINDOW_SIZE:]

# values
train_values = df_train.values
val_values = df_val.values
test_values = df_test.values
all_values = btc_data.values

series_train= np.reshape(train_values, (len(train_values), N_FEATURES))
series_val = np.reshape(val_values, (len(val_values), N_FEATURES))
series_test = np.reshape(test_values, (len(test_values), N_FEATURES))
series = np.reshape(all_values, (len(all_values), N_FEATURES))
print(series_train.shape, series_val.shape, series_test.shape)

sc = MinMaxScaler()
series_train = sc.fit_transform(series_train)
series_val = sc.transform(series_val)
series_test = sc.transform(series_test)

# define generator
train_generator = TimeseriesGenerator(series_train, series_train, length=WINDOW_SIZE, batch_size=BATCH_SIZE)
val_generator = TimeseriesGenerator(series_val, series_val, length=WINDOW_SIZE, batch_size=BATCH_SIZE)
test_generator = TimeseriesGenerator(series_test, series_test, length=WINDOW_SIZE, batch_size=BATCH_SIZE)

## Training

In [None]:
def build_model():
    # Initialising the RNN
    model = Sequential()

    # Adding the input layer and the LSTM layer
    model.add(LSTM(units = 200, input_shape = (WINDOW_SIZE, 1), return_sequences=True))
    
    #Dropout to avoid overfiting
    model.add(Dropout(0.25))
    
    # Adding a second LSTM layer
    model.add(LSTM(units = 100))
    
    #Dropout to avoid overfiting
    model.add(Dropout(0.25))
    
    # Adding the output layer
    model.add(Dense(units = 1, activation = "linear"))

    # Compiling the RNN
    model.compile(optimizer = 'adam', loss = 'MSE', metrics=["mae"])

    # Fitting the RNN to the Training set
    model.summary()

    return model

In [None]:
model = build_model()

In [None]:
history = model.fit(train_generator, validation_data = val_generator, epochs = 10)

In [None]:
def plot_training_analysis(history, metric='loss'):    

  loss = history.history[metric]
  val_loss = history.history['val_' + metric]

  epochs = range(len(loss))

  plt.plot(epochs, loss, 'b', linestyle="--",label='Training ' + metric)
  plt.plot(epochs, val_loss, 'g', label='Validation ' + metric)
  plt.title('Training and validation ' + metric)
  plt.legend()
  plt.yscale("log")
  plt.show()

In [None]:
plot_training_analysis(history)

In [None]:
def join_df(train_datetime, train_values, test_datetime, test_values):
    test_values = np.reshape(test_values, (len(test_values)))
    datetime = np.append(train_datetime, test_datetime, axis=0)
    values = np.append(train_values, test_values, axis=0)
    label_train = ['train' for x in train_values]
    label_test = ['test' for x in test_values]
    label = np.append(label_train, label_test, axis=0)
    df_pred = pd.DataFrame(data = np.transpose([datetime, label, values]), columns = ["datetime", "label", "value"])
    df_pred['datetime'] = pd.to_datetime(df_pred['datetime'])
    df_pred.set_index('datetime', inplace=True)
    df_pred = df_pred.astype({"value" : "float32"})
    return df_pred

In [None]:
y_test_pred = sc.inverse_transform(model.predict(test_generator))
print(y_test_pred.shape)
df_pred = join_df(df_train.index, df_train.values, df_val.index, y_test_pred)
df_pred.head()

In [None]:
num_val = 200
fig = px.line(df_pred[-num_val:], x = df_pred.index[-num_val:], y = "value", color="label")
fig.add_trace(go.Scatter(x = df_test.index[-num_val:], y = df_test.values[-num_val:], name = "true values"))
fig.show()

## Forecasting avec ensemble de test

In [None]:
sc = MinMaxScaler()
series_forecasting = sc.fit_transform(train_values.reshape(-1, 1))
generator_forecasting = TimeseriesGenerator(series_forecasting, series_forecasting, 
                                            length=WINDOW_SIZE, batch_size=8)
# fit model
model = build_model()
history = model.fit(generator_forecasting, epochs=5, verbose=1)
plt.plot(history.history["loss"])
plt.yscale('log')
plt.show()

In [None]:
def forecast_lstm(series_forecasting, nb_val):
    predictions = list()
    series_f = series_forecasting.copy()
    for i in range(nb_val):
        prediction = model.predict(np.expand_dims(series_f[-WINDOW_SIZE:], axis=0))
        yhat = prediction[-1]
        #print(sc.inverse_transform([yhat])[0][0])
        #Append forecasted value
        predictions.append(yhat)
        series_f = np.append(series_f, [yhat], axis=0)
    
    return predictions

In [None]:
# datetimes of train and test dataset
datetimes = df_train.index
dt_forecast = pd.date_range(start = datetimes[-1]+ datetime.timedelta(days=1), end=datetime.datetime(2021, 5, 8))

# Number of value to forecast
NB_VAL = len(dt_forecast)

# compute predictions
predictions_lstm = forecast_lstm(series_forecasting, NB_VAL)

# inverse transformation of MinMaxScaler
predictions_lstm = sc.inverse_transform(predictions_lstm)

# join train df and test predicted df
df_forecast_lstm = join_df(df_train.index, df_train.values, dt_forecast, predictions_lstm)
df_forecast_lstm.tail(5)

In [None]:
fig = px.line(df_forecast_lstm, x=df_forecast_lstm.index, y="value", color="label")
fig.show()

In [None]:
date=df_forecast_lstm.index

# Create traces
fig = px.line(df_forecast_lstm,x=df_forecast_lstm.index, y=df_forecast_lstm.value.values, color='label')
fig.add_trace(go.Scatter(x=df_val.index, y=df_val.values,
                    mode='lines+markers', name= "true values"))

fig.show()

## Forecasting sans ensemble de test

In [None]:
sc = MinMaxScaler()
series_forecasting = sc.fit_transform(all_values.reshape(-1, 1))
generator_forecasting = TimeseriesGenerator(series_forecasting, series_forecasting, 
                                            length=WINDOW_SIZE, batch_size=8)
# fit model
model = build_model()
history = model.fit(generator_forecasting, epochs=5, verbose=1)
plt.plot(history.history["loss"])
plt.yscale('log')
plt.show()

In [None]:
# datetimes of train and test dataset
datetimes = df_train.index
dt_forecast = pd.date_range(start = datetimes[-1]+ datetime.timedelta(days=1), end=datetime.datetime(2021, 5, 8))

# Number of value to forecast
NB_VAL = len(dt_forecast)

# compute predictions
predictions_lstm = forecast_lstm(series_forecasting, NB_VAL)

# inverse transformation of MinMaxScaler
predictions_lstm = sc.inverse_transform(predictions_lstm)

# join train df and test predicted df
df_forecast_lstm = join_df(btc_data.index, btc_data.values, dt_forecast, predictions_lstm)
df_forecast_lstm.tail(5)

In [None]:
fig = px.line(df_forecast_lstm, x=df_forecast_lstm.index, y="value", color="label")
fig.show()

In [None]:
date=df_forecast_lstm.index

# Create traces
fig = px.line(df_forecast_lstm,x=df_forecast_lstm.index, y=df_forecast_lstm.value.values, color='label')

fig.show()

## Prophet

In [None]:
### Prophet
from fbprophet import Prophet

columns = ["ds", "y"]
df_prophet = pd.DataFrame(columns = columns)
df_prophet["ds"] = btc_data.index
df_prophet["y"] = btc_data.values

sc = MinMaxScaler()
df_prophet["y"] = sc.fit_transform(np.reshape(df_prophet.y.values, (-1, 1)))
df_prophet.head()

In [None]:
m = Prophet()
m.fit(df_prophet)

In [None]:
# Python
future = m.make_future_dataframe(freq='s', periods=NB_VAL, include_history=True)
future.tail()

In [None]:
# Python
forecast_prophet = m.predict(future)
forecast_prophet.tail()

In [None]:
fig1 = m.plot(forecast_prophet)