In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model, Sequential
from sklearn.metrics import mean_squared_error

### Bitcoin

In [None]:
data = pd.read_csv('data/BTC.csv')

In [None]:
data.describe()

In [None]:
g = sns.lineplot(x = np.linspace(1,3049,3049), y = data['High'].values.reshape(-1))
g.set(xticks=np.arange(0,3049,200))
g.set_xticklabels(rotation=30, labels = data['Date'][0::200])

In [None]:
data_high = data["High"].to_numpy()
data_high = data_high[np.logical_not(np.isnan(data_high))]
data_high.shape

In [None]:
scaler = MinMaxScaler(feature_range = (0, 1))

data_normalized = scaler.fit_transform(data_high.reshape((-1, 1)))
data_normalized = np.reshape(data_normalized, (3048,))

In [None]:
def create_dataset(dataset, time_steps=1):
    data_x, data_y = [],[]
    for i in range(len(dataset)-time_steps-1):
        a = dataset[i:(i+time_steps)]
        data_x.append(a)
        data_y.append(dataset[i + time_steps])
    return np.array(data_x), np.array(data_y)

In [None]:
time_steps = 1

X, y = create_dataset(data_normalized, time_steps)

In [None]:
train_size = int(len(X) * 0.8)
test_size = len(X) - train_size
X_train, X_test = X[0: train_size, :], X[train_size: len(X), :]
y_train, y_test = y[0: train_size], y[train_size: len(y)]

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

X_train.shape

In [None]:
model = Sequential()
model.add(layers.LSTM(2, input_shape=(1, time_steps)))

model.add(layers.Dense(1))

In [None]:
loss = 'mean_squared_error'

opt = 'adam'

metrics = 'mean_squared_error'

model.compile(loss = loss, 
              optimizer = opt,
              metrics = metrics)

model.summary()

In [None]:
batchsize = 5

epochs =  40

# Fit model
history = model.fit(X_train, 
                    y_train, 
                    epochs = epochs, 
                    batch_size = batchsize,
                    validation_split = 0.2,
                    shuffle=False)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['train', 'validation'])
plt.show()

In [None]:
dataset = scaler.fit_transform(data_high.reshape((-1, 1)))

test_predict = model.predict(X_test)
train_predict = model.predict(X_train)

train_predict = scaler.inverse_transform(train_predict)
trainY = scaler.inverse_transform([y_train])
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform([y_test])

test_score = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
print('Test Score: %.2f RMSE' % (test_score))

train_plot = np.empty_like(dataset)
train_plot[:, :] = np.nan
train_plot[time_steps:len(train_predict)+time_steps, :] = train_predict

test_plot = np.empty_like(dataset)
test_plot[:] = np.nan
test_plot[len(train_predict)+time_steps:len(dataset)-1] = test_predict

plt.plot(scaler.inverse_transform(data_normalized.reshape(-1, 1)))
plt.plot(train_plot)
plt.plot(test_plot)
plt.legend(['real', 'train', 'prediction'])
plt.show()