# References

https://www.kaggle.com/humamfauzi/btc-price-prediction-using-lstm

https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/

https://www.analyticsvidhya.com/blog/2021/05/bitcoin-price-prediction-using-recurrent-neural-networks-and-lstm/

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import tensorflow as tf
tf.config.list_logical_devices('GPU')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

from tensorflow.keras import datasets, layers, models
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation

In [None]:
# Change...
pair = 'BTCUSDT'

# interval used to compute indicators and backtest analysis
# valid intervals - 1min, 3min, 5min, 15min, 30min, 1H, 2H, 4H, 6H, 8H, 12H, 1D, 3D, 1W, 1M
interval = '1H'

In [None]:
from binance_utils import init
from trade_utils import get_data

client = init()
data = get_data(client, pair, interval)

In [None]:
# Last column must be the target
COLUMNS = ['ClosePrice']
N_STEPS = 30
N_FEATURES = len(COLUMNS)

data = data[COLUMNS].copy()

In [None]:
from machine_learning_utils import split_sequence

sc = MinMaxScaler()
data_scaled = sc.fit_transform(data)

In [None]:
#close_price = np.array(data['ClosePrice']).reshape(-1,1)
#close_price = np.array(data.iloc[:,-1]).reshape(-1,1)
#close_price

In [None]:
close_price = np.array(data['ClosePrice']).reshape(-1,1)
plt.figure(figsize=(14,6))
plt.title("Bitcoin Closing Price")
plt.grid()
plt.plot(close_price)

# Last column from data_scaled
close_price_scaled = data_scaled[:,-1]
plt.figure(figsize=(14,6))
plt.title("Scaled")
plt.grid()
plt.plot(close_price_scaled)

In [None]:
from machine_learning_utils import split_sequence

X, y = split_sequence(data_scaled, N_STEPS)
print(X.shape, y.shape)

In [None]:
# Obtain the features and target for the 'train_data' and 'test_data' without shuffling
# shuffle is not correct to use in time-series analysis because the order of the data is mandatory
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.80, shuffle=False)

In [None]:
plt.figure(figsize=(14,4))
plt.plot(range(len(y_train)), y_train)
plt.plot(range(len(y_train), len(y)), y_test)
plt.legend(["Training", "Test"])
plt.grid()

## Prepare X for LSTM

In [None]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], N_FEATURES))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], N_FEATURES))

In [None]:
%%time
# define model
model = Sequential()

# LSTM layer 1
model.add(LSTM(units = 50, return_sequences=True, input_shape=(N_STEPS, N_FEATURES)))
model.add(Dropout(0.2))

# Intermediate LSTM layer
model.add(LSTM(units = 50, return_sequences=True))
model.add(Dropout(0.2))

# LSTM layer
model.add(LSTM(units = 50))
model.add(Dropout(0.2))

# Fully connected layer
model.add(Dense(units = 1))

print(model.summary())

# Compiling the RNN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Fitting to the training set
history = model.fit(X_train, y_train, epochs=50, batch_size=168, validation_data=(X_test, y_test))

# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
y_pred = model.predict(X_test)
MSE = mean_squared_error(y_test, y_pred)

In [None]:
# scale used in last column (target)
scale_target = sc.scale_[-1]

plt.figure(figsize=(14,6))
plt.plot(y_test * scale_target)
plt.plot(y_pred * scale_target)
plt.title("Comparison with MSE {0:0.10f}".format(MSE))
plt.legend(["Y", "Prediction"])
plt.xlabel("Timeframe")
plt.ylabel("Price")