In [22]:
#Load packages
import sys
import os
import time
import datetime

import numpy as np
import sklearn as sk
import pandas as pd
import tensorflow as tf

import tensorflow.keras as keras
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, SimpleRNN, GRU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard

%load_ext tensorboard
%matplotlib inline
from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 18})

# Package versions
print('### Python version: ' + sys.version)
print('### Numpy version: ' + np.__version__)
print('### Scikit-learn version: ' + sk.__version__)
print('### Tensorflow version: ' + tf.__version__)
print('------------')

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
### Python version: 3.7.11 (default, Jul 27 2021, 09:42:29) [MSC v.1916 64 bit (AMD64)]
### Numpy version: 1.19.2
### Scikit-learn version: 0.23.2
### Tensorflow version: 2.6.0
------------


In [23]:
# Check GPU Usage
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [24]:
seed = 52
np.random.seed(seed)
tf.random.set_seed(seed)


In [132]:
#read and view data
bitcoin_path = 'Data/BTC-Hourly.csv'
bitcoin_df = pd.read_csv(bitcoin_path,
                          encoding = "ISO-8859-1")
bitcoin_df = bitcoin_df[bitcoin_df.columns[~bitcoin_df.columns.isin(['unix','date','symbol'])]]
bitcoin_df

Unnamed: 0,open,high,low,close,quantity,value,fear and greed index,google trends,btc dominance,total marketcap,...,28 day google trends percent change,7 day btc dominance average,7 day btc dominance change,28 day btc dominance average,28 day btc dominance change,7 day volume quantity average,7 day volume quantity percent change,28 day volume quantity average,28 day volume quantity percent change,target
0,8733.86,8796.68,8707.28,8740.99,4.906603e+06,559.93000,40.0,9.0,37.3641,3.740000e+11,...,0.000000,37.364100,0.000000,37.364100,0.000000,4.906603e+06,0.000000,4.906603e+06,0.000000,8740.99
1,8740.99,8766.00,8721.11,8739.00,2.390399e+06,273.58000,40.0,9.0,37.3641,3.740000e+11,...,0.000000,37.364100,0.000000,37.364100,0.000000,3.648501e+06,-0.512820,3.648501e+06,-0.512820,8739.00
2,8739.00,8750.27,8660.53,8728.49,7.986063e+06,917.79000,40.0,9.0,37.3641,3.740000e+11,...,0.000000,37.364100,0.000000,37.364100,0.000000,5.094355e+06,0.627615,5.094355e+06,0.627615,8728.49
3,8728.49,8754.40,8701.35,8708.32,1.593992e+06,182.62000,40.0,9.0,37.3641,3.740000e+11,...,0.000000,37.364100,0.000000,37.364100,0.000000,4.219264e+06,-0.675133,4.219264e+06,-0.675133,8708.32
4,8708.32,8865.00,8695.11,8795.90,1.110127e+07,1260.69000,40.0,9.0,37.3641,3.740000e+11,...,0.000000,37.364100,0.000000,37.364100,0.000000,5.595666e+06,1.262517,5.595666e+06,1.262517,8795.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37574,20018.00,20043.00,19950.00,20015.00,1.426210e+01,285456.00020,28.0,14.0,39.7860,9.740000e+11,...,-0.176471,39.872493,-0.009471,40.330005,-0.040994,6.941874e+01,-0.889159,6.463593e+01,-0.871257,20024.00
37575,20024.00,20031.00,19884.00,19884.00,1.733832e+01,344755.07420,28.0,14.0,39.7860,9.740000e+11,...,-0.176471,39.870229,-0.009471,40.327474,-0.040994,6.875604e+01,-0.769239,6.449688e+01,-0.896131,19880.00
37576,19880.00,20100.00,19870.00,20028.00,3.095370e+01,619940.76070,28.0,14.0,39.7860,9.740000e+11,...,-0.176471,39.867964,-0.009471,40.324943,-0.040994,6.849305e+01,0.525962,6.429454e+01,0.307403,20022.00
37577,20022.00,20089.00,19999.00,20045.00,1.590873e+01,318890.41390,28.0,14.0,39.7860,9.740000e+11,...,-0.125000,39.865700,-0.009061,40.322412,-0.035479,6.846700e+01,-0.793317,6.428298e+01,-0.157130,20044.00


In [133]:
bitcoin_data_column_names = ['open', 'high', 'low', 'close', 'quantity', 'value', 'fear and greed index',
                            'google trends', 'btc dominance', 'total marketcap', 'altcoin marketcap',
                            '7 day $ avg', '7 day $ % change', '28 day $ avg', '28 day $ % change',
                            '7 day greed avg', '7 day greed % change', '28 day greed avg', '28 day greed % change',
                            '7 day google trends avg', '7 day google trends % change', '28 day google trends avg', '28 day google trends % change',
                            '7 day btc dominance average', '7 day btc dominance % change', '28 day btc dominance avg', '28 day btc dominance % change',
                            '7 day volume avg', '7 day volume % change', '28 day volume avg', '28 day volume % change', 'target']

In [134]:
# Convert from a df to a numpy array
bitcoin_data = np.asarray(bitcoin_df, dtype='float64')
assert bitcoin_data.shape[1] == 32

all_x = bitcoin_data[:, range(0, bitcoin_data.shape[1] - 1)]
all_y = bitcoin_data[:, bitcoin_data.shape[1] - 1]

In [135]:
def min_max_normalize(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    return scaler.transform(data)

In [136]:
# normalize data
import Processing
all_x = min_max_normalize(all_x)

In [137]:
# train test split data
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(all_x, all_y, test_size = .2661, random_state = seed)

# verify shapes
train_x.shape, test_x.shape, train_y.shape, test_y.shape

((27579, 31), (10000, 31), (27579,), (10000,))

In [140]:
# test val split data
test_x, val_x, test_y, val_y = train_test_split(test_x, test_y, test_size = .5, random_state = seed)

# verify shapes
test_x.shape, val_x.shape, test_y.shape, val_y.shape

((2500, 31), (2500, 31), (2500,), (2500,))