In [17]:
###### Imports Section ######

# Linear algebra using numpy
import numpy as np 

# Data processing using pandas
import pandas as pd 

# Keras for deep learning
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, RNN, Activation, Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import RNN
from tensorflow.keras.layers import Layer
from tensorflow.python.keras.layers import CuDNNLSTM

# MinMaxScaler for transforming features by scaling each feature to a given range
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()

# For reading Google Drive files
import requests

# String manipulation
from io import StringIO

# Data visualization libraries
import seaborn as sb
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import style

# sklearn models
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB

# Cross validation library
from sklearn.model_selection import cross_val_score

# Use the following for hyperparameter tuning
from sklearn.model_selection import GridSearchCV

# Use the following two libraries for confusion matrices
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

# The following library is used for classifiers precision
from sklearn.metrics import precision_score, recall_score

# The following library is used for precision recall curve
from sklearn.metrics import precision_recall_curve

# F-score library
from sklearn.metrics import f1_score

# ROC curve library
from sklearn.metrics import roc_curve

# ROC AUC score library
from sklearn.metrics import roc_auc_score

# Use defaultdict for running totals + frequencies
from collections import defaultdict

# Fix for Jupyter not displaying all of the results
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Import the following to support regular expressions
import re

###### End Imports Section #######

In [4]:
###### Data Section ######

# Get Bitcoin price data

file_BitcoinPrice ='https://drive.google.com/file/d/1ZO20Jko427imyxVkrtM6MMa8coLkwUSR/view?usp=sharing'

bitcoinPrice_file_id = file_BitcoinPrice.split('/')[-2]
downloadURL='https://drive.google.com/uc?export=download&id=' + bitcoinPrice_file_id
formattedURL = requests.get(downloadURL).text
csv = StringIO(formattedURL)
bitcoinPrice_df = pd.read_csv(csv, parse_dates=['Date'])
bitcoinPrice_df = bitcoinPrice_df.sort_values('Date')

# End get Bitcoin price data 

###### End Data Section ######

In [5]:
###### Functions Section ######

# Unique values function (we may need this later but probably not)
def unique(listInput):   
    # Insert list to set
    listSet = set(listInput) 
    # Convert the set to a list
    uniqueList = (list(listSet)) 
    # Print the results
    for x in uniqueList:
        print(x), 

# Precision and recall plot
def precisionAndRecallPlot(precision, recall, threshold):
    plt.plot(threshold, precision[:-1], "r-", label="precision", linewidth=5)
    plt.plot(threshold, recall[:-1], "b", label="recall", linewidth=5)
    plt.xlabel("threshold", fontsize=18)
    plt.legend(loc="upper right", fontsize=18)
    plt.ylim([0, 1])

# Precision vs recall plot
def precisionVsRecallPlot(precision, recall):
    plt.plot(recall, precision, "g--", linewidth=2.5)
    plt.ylabel("recall", fontsize=18)
    plt.xlabel("precision", fontsize=18)
    plt.axis([0, 1.5, 0, 1.5])

# ROC AUC curve
def rocCurvePlot(false_positive_rate, true_positive_rate, label=None):
    plt.plot(false_positive_rate, true_positive_rate, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'r', linewidth=5)
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate (FPR)', fontsize=18)
    plt.ylabel('True Positive Rate (TPR)', fontsize=18)
    
# LSTM sequence
def to_sequence(data, seq_len):
    d = []
    for i in range(len(data) - seq_len):
        d.append(data[i: i + seq_len])
    return np.array(d)

# LSTM Preprocess
def preprocess(data_raw, seq_len, train_split):
    data = to_sequence(data_raw, seq_len)
    num_train = int(train_split * data.shape[0])
    X_train = data[:num_train, :-1, :]
    y_train = data[:num_train, -1, :]
    
    X_test = data[num_train:, :-1, :]
    y_test = data[num_train:, -1, :]
    
    return X_train, y_train, X_test, y_test

# Moving window predictions for price forecasting
def moving_time_window_preds(start, n_future_preds, epochs, response_index):
    pred = []
    moving_input_window = []
    inputs = X_test.reset_index(drop = True).values
    inputs = sc.transform(inputs)

    moving_input_window.append(inputs[start:(start + epochs), :])
    moving_input_window = np.array(moving_input_window)
    moving_input_window = np.reshape(moving_input_window, (moving_input_window.shape[1], response_index))
    # Predictions
    for i in range(n_future_preds):
        # Predict the next price (y_hat)
        y_hat = lstm_model.predict(moving_input_window)

        # Append y_hat to predictions
        pred.append(y_hat[0, :])

        # Run reshape on y_hat for concatenating to moving test window
        y_hat = y_hat.reshape(1,1,response_index)

        # Remove first element
        moving_input_window = np.concatenate((
            moving_input_window[:,1:,:], y_hat), axis = 1
        )

        # Prediction
        pred = sc.inverse_transform(pd.DataFrame(pred))

    return pred
    
###### End Functions Section ######

In [6]:
# Preliminary data exploration of Bitcoin price data
print('Preliminary data exploration of Bitcoin price dataframe')
bitcoinPrice_df.info()

Preliminary data exploration of Bitcoin price dataframe
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1760 entries, 1759 to 0
Data columns (total 7 columns):
Date          1760 non-null datetime64[ns]
Open          1760 non-null float64
High          1760 non-null float64
Low           1760 non-null float64
Close         1760 non-null float64
Volume        1760 non-null object
Market Cap    1760 non-null object
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 110.0+ KB


In [8]:
# Subset data to just response and date/time
bitcoin_timeseries_df = bitcoinPrice_df.drop(['Open', 'Low', 'Close', 'Volume', 'Market Cap'], axis=1)

In [9]:
# Scale the data
high_price = bitcoinPrice_df.High.values.reshape(-1, 1)
scaled_high_price = sc.fit_transform(high_price)

# Remove NaNs
scaled_high_price = scaled_high_price[~np.isnan(scaled_high_price)]
scaled_high_price = scaled_high_price.reshape(-1, 1)

In [None]:
# Perform Train/test split and shape data
SEQ_LEN = 100
X_train, y_train, X_test, y_test = preprocess(scaled_high_price, SEQ_LEN, train_split = 0.7)

In [21]:
# Build LSTM model
DROPOUT = 0.2
WINDOW_SIZE = SEQ_LEN -1

lstm_model = keras.Sequential()

lstm_model.add(Bidirectional(LSTM(WINDOW_SIZE, return_sequences = True),
    input_shape = (WINDOW_SIZE, X_train.shape[-1])
))
lstm_model.add(Dropout(rate=DROPOUT))

lstm_model.add(Bidirectional(
    LSTM((WINDOW_SIZE * 2), return_sequences = True)
))
lstm_model.add(Dropout(rate=DROPOUT))

lstm_model.add(Bidirectional(
    LSTM(WINDOW_SIZE, return_sequences = False)
))

lstm_model.add(Dense(units = 1))

lstm_model.add(Activation('linear'))
#lstm_model.add(Dense(1))

BATCH_SIZE = 64

lstm_model.compile(loss = 'mean_squared_error', optimizer = 'adam')
#lstm_model.fit(X_train, y_train, epochs = 100, batch_size = 1, verbose = 2)

history = lstm_model.fit(
    X_train,
    y_train,
    epochs = 50,
    batch_size = BATCH_SIZE,
    shuffle = False,
    validation_split = 0.1
)

Train on 1162 samples
Epoch 1/100
1162/1162 - 308s - loss: 3.1299e-05
Epoch 2/100
1162/1162 - 296s - loss: 1.2162e-05
Epoch 3/100
1162/1162 - 400s - loss: 9.3682e-06
Epoch 4/100


KeyboardInterrupt: 

In [None]:
lstm_model.evaluate(X_test, y_test)

In [None]:
# Plot LSTM model loss
plt.plot(history.history['loss'], label = 'Train')
plt.plot(history.history['val_loss'], label = 'Test')
plt.legend()
plt.title("LSTM Model Loss")
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()

In [None]:
# LSTM Predictions
y_hat = lstm_model.predict(X_test)
y_test_inverse = sc.inverse_transform(y_test)
y_hat_inverse = sc.inverse_transform(y_hat)

# Plot LSTM model loss
plt.plot(y_test_inverse, label = 'Actual Price')
plt.plot(y_hat_inverse, label = 'Predicted Price')
plt.legend()
plt.title("Bitcoin Price Prediction")
plt.xlabel('Time (in days)')
plt.ylabel('Price')
plt.show()