In [1]:
#################################################################
# Compare Lookback history 1,5,10,25,50,100 minutes
#
# MLND Capstone > Crypto Ethereum Future Price RNN Classifier 
# Input - Ethereum minute price data, exchange Gemini, year 2018
# Output - A Prediction better than random 0.5
#################################################################

### 1. Check installed python and packages

In [1]:
# check python version
import sys
sys.version

'3.5.2 (default, Nov 12 2018, 13:43:14) \n[GCC 5.4.0 20160609]'

In [2]:
# check tensorflow, keras version and GPU
import tensorflow as tf

# Check TensorFlow Version
print (tf.VERSION)

# Check Keras Version
print(tf.keras.__version__)

# Check for a GPU
# print (tf.test.gpu_device_name())
# notice - this code line uses GPU VRam
# if executed and GPU VRam is low, then restart notebook kernel to free GPU VRam

1.12.0
2.1.6-tf


In [3]:
# ignore warnings
import warnings
warnings.filterwarnings('ignore')

### 2. Data Exploration

#### 2.2 Load Dataset from the two csv files

In [4]:
################################################
# load Dataset from the two csv files
#   gemini_BTCUSD_2018_1min.csv
#   gemini_ETHUSD_2018_1min.csv
################################################

# import packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import datetime as time
from datetime import datetime
import warnings
import os
from tqdm import tnrange, tqdm_notebook
import time

data_folder = './data/minute/'
data_files = ['gemini_BTCUSD_2018_1min.csv', 'gemini_ETHUSD_2018_1min.csv']

def date_format(date_str):
    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(date_str))
    
def build_dataset():
    
    df_main = pd.DataFrame() 
    
    for file in tqdm_notebook(data_files):
        if file.endswith('.csv'):
            
            # get info from filename
            exchange, pair, year, _ = file.replace('.csv', '').split('_')
        
            # load data .csv file
            file_path = os.path.join(data_folder, file)
            df = pd.read_csv(file_path, parse_dates=True, skiprows=2, names=['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume'])
            
            # date already in correct format yyyy-mm-dd hh:mm:ss
            # no need to reformat
            # df['Date'] = df['Date'].apply(lambda x: date_format(x))
            
            # set Date as index column
            df.set_index('Date', inplace=True)

            # rename columns
            rename_cols_from = ['Close', 'Volume']
            rename_cols_to = ['{}_{}_Close'.format(exchange, pair), '{}_{}_Volume'.format(exchange, pair)]
            df.rename(columns={rename_cols_from[0]: rename_cols_to[0], rename_cols_from[1]: rename_cols_to[1]}, inplace=True)

            # reduce to only columns you want to keep
            df = df[[rename_cols_to[0], rename_cols_to[1]]]
                        
            # join all csv data into one dataframe
            if len(df_main) == 0:  
                df_main = df
            else:
                df_main = df_main.join(df)
        
            
    # fill any missing data
    df_main.fillna(method="ffill", inplace=True)

    # delete any na
    df_main.dropna(inplace=True)
    
    return df_main
            
df_main = build_dataset()          
        
# order time sequence ASC
# 2018-01-01 to 2018-12-31
df_main.sort_index(inplace=True)

# remove any bad data
# the following index is isolated and not useful to predict sequence future data
# index=0, index value=2017-09-22 19:00:00
df_main.drop(df_main.index[0], inplace=True)


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




#### 2.3 Loaded data verification

In [5]:
# Data Exploration - preview first 5 records
df_main.head()

Unnamed: 0_level_0,gemini_BTCUSD_Close,gemini_BTCUSD_Volume,gemini_ETHUSD_Close,gemini_ETHUSD_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01 00:01:00,13800.0,0.933856,737.98,2.410785
2018-01-01 00:02:00,13775.0,1.747634,736.03,1.613
2018-01-01 00:03:00,13772.85,1.040767,738.29,1.135121
2018-01-01 00:04:00,13750.0,6.171053,738.29,0.0
2018-01-01 00:05:00,13700.0,2.305962,735.0,66.676885


In [6]:
# Data Exploration - preview last 5 records
df_main.tail()

Unnamed: 0_level_0,gemini_BTCUSD_Close,gemini_BTCUSD_Volume,gemini_ETHUSD_Close,gemini_ETHUSD_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-12-31 23:55:00,3685.7,3.279082,130.8,0.0
2018-12-31 23:56:00,3686.38,8.244595,130.0,79.267075
2018-12-31 23:57:00,3686.38,0.089801,130.0,44.386537
2018-12-31 23:58:00,3686.38,0.117334,130.0,7.966173
2018-12-31 23:59:00,3692.35,4.107691,130.8,8.688215


The data looks correct and well formatted. 

#### The list of columns in joined dataset:

In [7]:
for c in df_main.columns:
    print(c)

gemini_BTCUSD_Close
gemini_BTCUSD_Volume
gemini_ETHUSD_Close
gemini_ETHUSD_Volume


### 3. Technical Analysis    

#### Skip as not needed

### 4. Implemenation
#### 4.1 Define Varibles for Lookback history sequence and Target column     

In [8]:
# RNN model learn by lookback history
# the longer the lookback period the better, but more data will take longer to train
LOOKBACK_HISTORY_SEQ_LEN = 5 # look back last 1/5/10/25/50/100 minutes 

# How far we wish to predict the future?
PREDICT_FUTURE_SEQ_LEN = 5 # predict 5 minute in the future

# We are predicting the future price of Ethereum in exchange Gemini
PREDICT_COLUMN = 'gemini_ETHUSD_Close'

# Could switch to bitcoin, or any other altcoin for future development
# PREDICT_COLUMN = 'gemini_BTCUSD_Close' 

# Create a new column to hold the future price of Ethereum
# this allow the calculation of price raised or falled.
PREDICT_COLUMN_FUTUTE = '{}_Future'.format(PREDICT_COLUMN)

# The target label column 
# holding binary values indicate the future price is either: 
# If future price Raised, Action=Buy, Stored Value=1
# If future price Falled, Action=Sell, Stored Value=0
TARGET_LABEL = 'Target_Action'

In [9]:
# Data Exploration - preview column data, and check no NaN
df_main[PREDICT_COLUMN]

Date
2018-01-01 00:01:00    737.98
2018-01-01 00:02:00    736.03
2018-01-01 00:03:00    738.29
2018-01-01 00:04:00    738.29
2018-01-01 00:05:00    735.00
2018-01-01 00:06:00    736.00
2018-01-01 00:07:00    736.20
2018-01-01 00:08:00    736.20
2018-01-01 00:09:00    736.20
2018-01-01 00:10:00    736.31
2018-01-01 00:11:00    736.21
2018-01-01 00:12:00    736.21
2018-01-01 00:13:00    736.20
2018-01-01 00:14:00    736.20
2018-01-01 00:15:00    735.00
2018-01-01 00:16:00    733.01
2018-01-01 00:17:00    733.01
2018-01-01 00:18:00    733.01
2018-01-01 00:19:00    732.99
2018-01-01 00:20:00    732.99
2018-01-01 00:21:00    732.99
2018-01-01 00:22:00    732.99
2018-01-01 00:23:00    732.99
2018-01-01 00:24:00    732.99
2018-01-01 00:25:00    732.99
2018-01-01 00:26:00    731.54
2018-01-01 00:27:00    730.61
2018-01-01 00:28:00    731.01
2018-01-01 00:29:00    733.00
2018-01-01 00:30:00    733.00
                        ...  
2018-12-31 23:30:00    130.91
2018-12-31 23:31:00    130.91
2018-

#### 4.2 Binary Classification
the binary values are Buy=1 or Sell=0<br/>
The function binary_classify() is map to the dataset to generate values in target column<br/>
if future price raised, then buy representation value 1 is assigned<br/>
if future price falled, then sell representation value 0 is assigned<br/>

In [10]:
# Numeric Representation of Buy and Sell
SELL = 0
BUY = 1

# function to calculate values to assign target value in each data point.
def binary_classify(current_price, future_price):
    if(float(future_price) < float(current_price)):
        return SELL
    else:
        return BUY
    
def create_target_label_column(df):
    # create a new column for future price (the future price is already in dataset) 
    # shift target column data upwards to create the future data
    # there is now a current price and a future price for each data row
    # the current price and future price is then pass to binary_classify() to calculate target label buy/sell
    df[PREDICT_COLUMN_FUTUTE] = df[PREDICT_COLUMN].shift(-PREDICT_FUTURE_SEQ_LEN)

    # create a new column for target action buy or sell
    df[TARGET_LABEL] = list(map(binary_classify, df[PREDICT_COLUMN], df[PREDICT_COLUMN_FUTUTE])) 
    
    return df

df_main = create_target_label_column(df_main)

In [11]:
# check data column values shifted by -5, and target label is correctly classified
df_main[[PREDICT_COLUMN, PREDICT_COLUMN_FUTUTE, TARGET_LABEL]].head(100)

Unnamed: 0_level_0,gemini_ETHUSD_Close,gemini_ETHUSD_Close_Future,Target_Action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01 00:01:00,737.98,736.00,0
2018-01-01 00:02:00,736.03,736.20,1
2018-01-01 00:03:00,738.29,736.20,0
2018-01-01 00:04:00,738.29,736.20,0
2018-01-01 00:05:00,735.00,736.31,1
2018-01-01 00:06:00,736.00,736.21,1
2018-01-01 00:07:00,736.20,736.21,1
2018-01-01 00:08:00,736.20,736.20,1
2018-01-01 00:09:00,736.20,736.20,1
2018-01-01 00:10:00,736.31,735.00,0


In [12]:
number_of_buys = len(df_main[df_main[TARGET_LABEL] == BUY])
number_of_sells = len(df_main[df_main[TARGET_LABEL] == SELL])

print ('Number of Buys:', number_of_buys)
print ('Number of Sells:', number_of_sells)

if (number_of_buys > number_of_sells):
    print ('There are more Buys than Sells.')
elif (number_of_buys < number_of_sells):
    print ('There are more Sells than Buys.')
else:
    print ('There are equal number of Buys and Sells.')

Number of Buys: 276145
Number of Sells: 213961
There are more Buys than Sells.


#### 4.3 Data split for Training and Validation
training set 95% <br/>
validation set 05% <br/>
(while data still in correctly ordered in time sequence)<br/>

In [13]:
def data_split(df):
    time_list = sorted(df.index.values)

    val_set_percentage = 0.05
    chop_at_time_index = -int(val_set_percentage * len(time_list))
    chop_at_time = sorted(df_main.index.values)[chop_at_time_index]

    df_train = df_main[(df_main.index < chop_at_time)]
    df_validation = df_main[(df_main.index >= chop_at_time)]

    return df_train, df_validation, chop_at_time
    
df_train, df_validation, chop_at_time = data_split(df_main)

print ('The dataset split at index time:', chop_at_time)
print ('The number of data points in training set:', len(df_train))
print ('The number of data points in validation set:', len(df_validation))


The dataset split at index time: 2018-12-14 23:35:00
The number of data points in training set: 465601
The number of data points in validation set: 24505


#### 4.4 Pre-processing data
step 1. drop future column - PREDICT_COLUMN_FUTUTE <br/>
step 2. normalize data by pct change <br/>
step 3. scale data close to 0 -> 1.0 <br/>
step 4. build sequences, each data point get 100 minutes lookback history <br/>
step 5. balance data to 50% buys 50% sells <br/>
step 6. package features as X, target as y, return <br/>

In [14]:
from sklearn import preprocessing
from collections import deque
import random

def handle_features_zero(df):
    for col in df.columns:
        
        # avoid target as target is 1 or 0
        if col != TARGET_LABEL:
            
            # handling volume values 0.0, 
            # these 0.0 value will cause pct_change to error by divide by 0
            # replace 0.0 with NAN
            df[col].replace([0.0], [float('nan')], inplace=True) 

            # drop any data points have volume value 0.0, as it might be misleading
            df.dropna(inplace=True) 
            
    return df
    
# 2. normalize data - change values to pct change
def normalize_to_rate_of_change(df):
    for col in df.columns:
        if col != TARGET_LABEL:
            df[col] = df[col].pct_change()
            
    return df
    
# 3. scale data - change values to 0 -> 1.0
def scale(df):
    for col in df.columns:
        if col != TARGET_LABEL:
            df[col] = preprocessing.scale(df[col].values)
        
    # drop any nan as a result of normalize or scale
    df = handle_features_nan(df)
            
    return df
    
def handle_features_nan(df):
    df.dropna(inplace=True)
    return df
    
# 4. build sequences of history rows, last 100 minutes
def build_history_sequences(df):
    
    all_sequences = []
    next_sequence = deque(maxlen=LOOKBACK_HISTORY_SEQ_LEN)
    
    for data_row in df.values:
        # extract feature columns, ignore target column
        features_only_no_target = [n for n in data_row[:-1]]
        next_sequence.append(features_only_no_target)
    
        # once the sequence have enough length, 24 hours, then append ot main list
        if len(next_sequence) == LOOKBACK_HISTORY_SEQ_LEN:
            all_sequences.append([np.array(next_sequence), data_row[-1]])
            
    # shuffle to random spread data
    random.shuffle(all_sequences)
    
    return all_sequences
    
# 5. balance data to 50% buys 50% sells
def balance_data(all_sequences):
    buy_list = []
    sell_list = []
    
    # first seperate out the buys and sells 
    for seq, target in all_sequences:
        if target == SELL:
            sell_list.append([seq, target])
        elif target == BUY:
            buy_list.append([seq, target])

    random.shuffle(buy_list)
    random.shuffle(sell_list)
    
    # work out which havethe smaller len, the crop on that len
    lower_len = min(len(buy_list), len(sell_list))

    buy_list = buy_list[:lower_len]
    sell_list = sell_list[:lower_len]
    
    # join the list back together and shuffle to spread the buys and sells evenly
    balanced_sequences = buy_list + sell_list
    random.shuffle(balanced_sequences)
    
    return balanced_sequences
        
def package_data(balanced_sequences):
    X = []
    y = []
    
    for seq, target in balanced_sequences:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y
        
def data_preprocessing(df):
    
    # 1. drop future column
    df = df.drop(PREDICT_COLUMN_FUTUTE, 1)
    
    # handle any nan values, by fill forward
    df.fillna(method='ffill')
        
    # remove any volumn with value 0.0
    df = handle_features_zero(df)
    
    # 2. normalize data - change values to pct change
    df = normalize_to_rate_of_change(df)
                        
    # 3. scale data - change values to 0 -> 1.0
    df = scale(df)
    
    # 4. build sequences of history rows, last 100 minutes
    all_sequences = build_history_sequences(df)

    # 5. balance data to 50% buys 50% sells
    balanced_sequences = balance_data(all_sequences)
    
    # 6. return features as X, target as y
    X, y = package_data(balanced_sequences)
    
    return X, y
    

The above code in data_preprocessing creates 4 sets:
- X Training
- y Training
- X Validation
- y Validation

In [15]:
X_train, y_train = data_preprocessing(df_train)
X_val, y_val = data_preprocessing(df_validation)

#### 4.5 Save pre-processed data
Save the train sets and validation sets for future use.

In [16]:
import pickle

save_to_folder = 'data/preprocessed_lookback_minutes/'

#create folder data if not exist
if not os.path.exists(save_to_folder):
    os.makedirs(save_to_folder)

save_file = save_to_folder + 'X_train.pickle'
pickle.dump(X_train, open(save_file, "wb"))
print ('Saved pre-processed data to pickle - %s' % save_file)

save_file = save_to_folder + 'y_train.pickle'
pickle.dump(y_train, open(save_file, "wb"))
print ('Saved pre-processed data to pickle - %s' % save_file)

save_file = save_to_folder + 'X_val.pickle'
pickle.dump(X_val, open(save_file, "wb"))
print ('Saved pre-processed data to pickle - %s' % save_file)

save_file = save_to_folder + 'y_val.pickle'
pickle.dump(y_val, open(save_file, "wb"))
print ('Saved pre-processed data to pickle - %s' % save_file)       


Saved pre-processed data to pickle - data/preprocessed_lookback_minutes/X_train.pickle
Saved pre-processed data to pickle - data/preprocessed_lookback_minutes/y_train.pickle
Saved pre-processed data to pickle - data/preprocessed_lookback_minutes/X_val.pickle
Saved pre-processed data to pickle - data/preprocessed_lookback_minutes/y_val.pickle


#### 4.6 Load pre-proocessed data
To Save time, next time in this notebook you dont have to run the previous code cells again, just run code from next cell to use the pre-processed data for RNN model training.

In [17]:
# re-import used packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import datetime as time
from datetime import datetime
import warnings
import os
from tqdm import tnrange, tqdm_notebook
import time
import pickle


In [18]:
# re-define variables
LOOKBACK_HISTORY_SEQ_LEN = 5 # 1, 5, 10, 25, 50, 100 
PREDICT_FUTURE_SEQ_LEN = 5 
PREDICT_COLUMN = 'gemini_ETHUSD_Close'
PREDICT_COLUMN_FUTUTE = '{}_Future'.format(PREDICT_COLUMN)
TARGET_LABEL = 'Target_Action'


Load the training set and validation set back from previously saved pickle files:


In [19]:
load_from_folder = 'data/preprocessed_lookback_minutes/'

load_file = load_from_folder + 'X_train.pickle'
X_train = pickle.load(open(load_file, "rb"))
print ('Loaded pre-processed data - %s' % load_file)

load_file = load_from_folder + 'y_train.pickle'
y_train = pickle.load(open(load_file, "rb"))
print ('Loaded pre-processed data - %s' % load_file)

load_file = load_from_folder + 'X_val.pickle'
X_val = pickle.load(open(load_file, "rb"))
print ('Loaded pre-processed data - %s' % load_file)

load_file = load_from_folder + 'y_val.pickle'
y_val = pickle.load(open(load_file, "rb"))
print ('Loaded pre-processed data - %s' % load_file)


Loaded pre-processed data - data/preprocessed_lookback_minutes/X_train.pickle
Loaded pre-processed data - data/preprocessed_lookback_minutes/y_train.pickle
Loaded pre-processed data - data/preprocessed_lookback_minutes/X_val.pickle
Loaded pre-processed data - data/preprocessed_lookback_minutes/y_val.pickle


In [20]:
# check data len
print ('Size of X train data:', len(X_train))
print ('Size of y train data:', len(y_train))
print ('Size of X validation data:', len(X_val))
print ('Size of y validation data:', len(y_val))

Size of X train data: 240516
Size of y train data: 240516
Size of X validation data: 11516
Size of y validation data: 11516


In [21]:
# check target buy=1, sell=0
print ('y train buys:', y_train.count(1))
print ('y train sells:', y_train.count(0))

print ('y validation buys:', y_val.count(1))
print ('y validation sells:', y_val.count(0))


y train buys: 120258
y train sells: 120258
y validation buys: 5758
y validation sells: 5758


### 5. RNN Model for Time Sequence 

#### 5.1 import deep learning and related packages

In [22]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam
import time
import os
from glob import glob


#### 5.2 Tuning Hyper Parameters
fine tune model by executing combination of multiple hyper parameters

In [23]:
###
# use best model hyper parameters, to speed up training time 
#
# *Model 35/128*
# exch-gemini-predict-gemini_ETHUSD_Close-lookback-100-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.0001-decay-1e-06
# Train on 240418 samples, validate on 11446 samples
# val_acc: 0.5744
# Test Loss:0.67945953966666
# Test Accuracy:0.5744364843925938
###    

EPOCHS = 10

# model achitecture layers
lstm_layers = [2]
layer_sizes = [128] 
dense_layers = [1]
batch_sizes = [64]
dropouts = [0.2]
optimizers = [Adam]
learning_rates = [1e-3]
decays = [1e-6]
loss_functions = ['sparse_categorical_crossentropy']

number_of_models = len(lstm_layers) \
                    *len(layer_sizes) \
                    *len(dense_layers) \
                    *len(batch_sizes) \
                    *len(dropouts) \
                    *len(optimizers) \
                    *len(learning_rates) \
                    *len(decays) \
                    *len(loss_functions)

print ('Number of models:', number_of_models)

# folders for model training and tensorboard
logs_to_folder = 'logs_lookback_minutes/'
if not os.path.exists(logs_to_folder):
    os.makedirs(logs_to_folder)
    
models_to_folder = 'models_lookback_minutes/'
if not os.path.exists(models_to_folder):
    os.makedirs(models_to_folder)

Number of models: 1


#### 5.3 Execute RNN model training on each combination of hyper-parameters.

In [24]:

model_count = 0

for lstm_layer in lstm_layers:
    for layer_size in layer_sizes:
        for dense_layer in dense_layers:
            for batch_size in batch_sizes:
                for dropout in dropouts:
                    for optimizer in optimizers:
                        for lr in learning_rates:
                            for decay in decays:
                                for loss_function in loss_functions:
                                    model_name = 'Lookback_minutes'
                                    model_name += '-exch-gemini'
                                    model_name += '-predict-'+PREDICT_COLUMN \
                                                + '-lookback-'+str(LOOKBACK_HISTORY_SEQ_LEN) \
                                                + '-future-'+str(PREDICT_FUTURE_SEQ_LEN)
                                    model_name += "-lstm-{}-nodes-{}-dense-{}-batch-{}-dropout-{}-optimizer-{}-lr-{}-decay-{}".format( \
                                                    lstm_layer, \
                                                    layer_size, \
                                                    dense_layer, \
                                                    batch_size, \
                                                    dropout, \
                                                    optimizer, \
                                                    lr, \
                                                    decay)

                                    model_count += 1

                                    print ('*Model {}/{}*'.format(model_count, number_of_models))
                                    print (model_name)

                                    # let user know if hyper parameters already trained
                                    # this could happen if kernal crash and restarted training
                                    check_logs_file = '{}/{}'.format(logs_to_folder, model_name)
                                    if glob(check_logs_file+'*'):
                                        print ('%s already exist.' % (check_logs_file))
                                        # continue

                                    # name of model by hyper-parameter values
                                    # to compare them all in tensorboard
                                    model_name += '-time-{}'.format(int(time.time()))

                                    # LSTM layers - minimum of 1
                                    model = Sequential()
                                    model.add(CuDNNLSTM(layer_size, input_shape=(X_train.shape[1:]), return_sequences=True))

                                    # how much to forget
                                    model.add(Dropout(dropout))

                                    # normalize output, the output is the input for the next LSTM layer
                                    model.add(BatchNormalization())

                                    # LSTM layers - add more
                                    for l in range(lstm_layer-1):
                                        model.add(CuDNNLSTM(layer_size, return_sequences=True))
                                        model.add(Dropout(dropout))
                                        model.add(BatchNormalization())

                                    model.add(CuDNNLSTM(layer_size))
                                    model.add(Dropout(dropout))

                                    # this LSTM layer do not need to return sequence, as the next layer is dense layer
                                    model.add(BatchNormalization())

                                    # Dense layers
                                    for _ in range(dense_layer):
                                        model.add(Dense(32, activation='relu'))
                                        model.add(Dropout(dropout))

                                    # the final layer only have 2 nodes for binary classification: Buy=1, Sell=0
                                    model.add(Dense(2, activation='softmax'))

                                    # optimizer
                                    #   learning rate
                                    #   learning decay
                                    # opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-6)
                                    opt = optimizer(lr=lr, decay=decay)

                                    # compile 
                                    model.compile(optimizer=opt, loss=loss_function, metrics=['accuracy'])

                                    # save logs for tensorboard graphs
                                    tensorboard = TensorBoard(log_dir="{}/{}".format(logs_to_folder, model_name))

                                    # saves only the best ones
                                    filepath = "RNN_-{epoch:02d}-{val_acc:.3f}"
                                    checkpoint = ModelCheckpoint('{}/{}.model'.format(models_to_folder, filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) 

                                    # training
                                    model.fit(X_train, y_train, epochs=EPOCHS, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=[tensorboard, checkpoint],)

                                    # score
                                    score = model.evaluate(X_val, y_val, verbose=0)

                                    # loss and accuracy
                                    loss = score[0]
                                    accuracy = score[1]
                                    print('Test Loss:{}'.format(loss))
                                    print('Test Accuracy:{}'.format(accuracy))

                                    # save model
                                    model.save('{}/{}'.format(models_to_folder, model_name))


*Model 1/1*
Lookback_minutes-exch-gemini-predict-gemini_ETHUSD_Close-lookback-5-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240516 samples, validate on 11516 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss:0.6857328338314983
Test Accuracy:0.5431573462800956


#### 5.4 Results

The model validation accuracy stayed the same for 1,5,10,25,50 minutes, and improved on 100 mins.

### Results

#### lookback 01 min training result

*Model 1/1*
Lookback_minutes-exch-gemini-predict-gemini_ETHUSD_Close-lookback-1-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240518 samples, validate on 11522 samples
Epoch 1/10
240518/240518 [==============================] - 21s 88us/step - loss: 0.6949 - acc: 0.5327 - val_loss: 0.6875 - val_acc: 0.5473
Epoch 2/10
240518/240518 [==============================] - 20s 82us/step - loss: 0.6893 - acc: 0.5392 - val_loss: 0.6875 - val_acc: 0.5459
Epoch 3/10
240518/240518 [==============================] - 20s 81us/step - loss: 0.6892 - acc: 0.5393 - val_loss: 0.6869 - val_acc: 0.5522
Epoch 4/10
240518/240518 [==============================] - 20s 82us/step - loss: 0.6892 - acc: 0.5390 - val_loss: 0.6877 - val_acc: 0.5457
Epoch 5/10
240518/240518 [==============================] - 20s 81us/step - loss: 0.6890 - acc: 0.5397 - val_loss: 0.6869 - val_acc: 0.5523
Epoch 6/10
240518/240518 [==============================] - 19s 81us/step - loss: 0.6889 - acc: 0.5398 - val_loss: 0.6875 - val_acc: 0.5528
Epoch 7/10
240518/240518 [==============================] - 19s 81us/step - loss: 0.6886 - acc: 0.5410 - val_loss: 0.6859 - val_acc: 0.5500
Epoch 8/10
240518/240518 [==============================] - 20s 81us/step - loss: 0.6886 - acc: 0.5414 - val_loss: 0.6865 - val_acc: 0.5512
Epoch 9/10
240518/240518 [==============================] - 20s 83us/step - loss: 0.6885 - acc: 0.5420 - val_loss: 0.6868 - val_acc: 0.5519
Epoch 10/10
240518/240518 [==============================] - 20s 81us/step - loss: 0.6884 - acc: 0.5421 - val_loss: 0.6858 - val_acc: 0.5524
Test Loss:0.6858131916981111
Test Accuracy:0.552421454608575

#### lookback 05 min training result

*Model 1/1*
Lookback_minutes-exch-gemini-predict-gemini_ETHUSD_Close-lookback-5-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240516 samples, validate on 11516 samples
Epoch 1/10
240516/240516 [==============================] - 23s 96us/step - loss: 0.6958 - acc: 0.5342 - val_loss: 0.6861 - val_acc: 0.5427
Epoch 2/10
240516/240516 [==============================] - 22s 90us/step - loss: 0.6868 - acc: 0.5466 - val_loss: 0.6875 - val_acc: 0.5473
Epoch 3/10
240516/240516 [==============================] - 21s 88us/step - loss: 0.6866 - acc: 0.5483 - val_loss: 0.6852 - val_acc: 0.5532
Epoch 4/10
240516/240516 [==============================] - 21s 89us/step - loss: 0.6856 - acc: 0.5515 - val_loss: 0.6856 - val_acc: 0.5548
Epoch 5/10
240516/240516 [==============================] - 21s 88us/step - loss: 0.6850 - acc: 0.5534 - val_loss: 0.6847 - val_acc: 0.5533
Epoch 6/10
240516/240516 [==============================] - 21s 88us/step - loss: 0.6846 - acc: 0.5547 - val_loss: 0.6833 - val_acc: 0.5582
Epoch 7/10
240516/240516 [==============================] - 21s 87us/step - loss: 0.6844 - acc: 0.5537 - val_loss: 0.6833 - val_acc: 0.5524
Epoch 8/10
240516/240516 [==============================] - 22s 90us/step - loss: 0.6842 - acc: 0.5555 - val_loss: 0.6825 - val_acc: 0.5572
Epoch 9/10
240516/240516 [==============================] - 21s 89us/step - loss: 0.6836 - acc: 0.5567 - val_loss: 0.6856 - val_acc: 0.5454
Epoch 10/10
240516/240516 [==============================] - 21s 87us/step - loss: 0.6834 - acc: 0.5567 - val_loss: 0.6857 - val_acc: 0.5432
Test Loss:0.6857328338314983
Test Accuracy:0.5431573462800956

#### lookback 10 min training result

*Model 1/1*
Lookback_minute-sexch-gemini-predict-gemini_ETHUSD_Close-lookback-10-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240512 samples, validate on 11514 samples
Epoch 1/10
240512/240512 [==============================] - 24s 99us/step - loss: 0.6942 - acc: 0.5353 - val_loss: 0.6896 - val_acc: 0.5398
Epoch 2/10
240512/240512 [==============================] - 23s 94us/step - loss: 0.6856 - acc: 0.5511 - val_loss: 0.6835 - val_acc: 0.5591
Epoch 3/10
240512/240512 [==============================] - 22s 93us/step - loss: 0.6846 - acc: 0.5537 - val_loss: 0.6831 - val_acc: 0.5557
Epoch 4/10
240512/240512 [==============================] - 22s 93us/step - loss: 0.6836 - acc: 0.5556 - val_loss: 0.6839 - val_acc: 0.5544
Epoch 5/10
240512/240512 [==============================] - 22s 92us/step - loss: 0.6832 - acc: 0.5584 - val_loss: 0.6842 - val_acc: 0.5511
Epoch 6/10
240512/240512 [==============================] - 22s 92us/step - loss: 0.6825 - acc: 0.5593 - val_loss: 0.6842 - val_acc: 0.5541
Epoch 7/10
240512/240512 [==============================] - 22s 92us/step - loss: 0.6816 - acc: 0.5609 - val_loss: 0.6855 - val_acc: 0.5545
Epoch 8/10
240512/240512 [==============================] - 22s 92us/step - loss: 0.6808 - acc: 0.5638 - val_loss: 0.6869 - val_acc: 0.5448
Epoch 9/10
240512/240512 [==============================] - 22s 93us/step - loss: 0.6799 - acc: 0.5654 - val_loss: 0.6834 - val_acc: 0.5517
Epoch 10/10
240512/240512 [==============================] - 22s 93us/step - loss: 0.6786 - acc: 0.5679 - val_loss: 0.6841 - val_acc: 0.5507
Test Loss:0.6841059787749251
Test Accuracy:0.5507208615598402

#### lookback 25 min training result

*Model 1/1*
Lookback_minute-exch-gemini-predict-gemini_ETHUSD_Close-lookback-25-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240492 samples, validate on 11500 samples
Epoch 1/10
240492/240492 [==============================] - 29s 120us/step - loss: 0.6944 - acc: 0.5334 - val_loss: 0.6857 - val_acc: 0.5476
Epoch 2/10
240492/240492 [==============================] - 27s 111us/step - loss: 0.6862 - acc: 0.5495 - val_loss: 0.6817 - val_acc: 0.5577
Epoch 3/10
240492/240492 [==============================] - 27s 112us/step - loss: 0.6841 - acc: 0.5550 - val_loss: 0.6850 - val_acc: 0.5501
Epoch 4/10
240492/240492 [==============================] - 27s 113us/step - loss: 0.6824 - acc: 0.5610 - val_loss: 0.6862 - val_acc: 0.5449
Epoch 5/10
240492/240492 [==============================] - 27s 111us/step - loss: 0.6805 - acc: 0.5645 - val_loss: 0.6814 - val_acc: 0.5617
Epoch 6/10
240492/240492 [==============================] - 27s 111us/step - loss: 0.6797 - acc: 0.5673 - val_loss: 0.6847 - val_acc: 0.5457
Epoch 7/10
240492/240492 [==============================] - 27s 113us/step - loss: 0.6774 - acc: 0.5728 - val_loss: 0.6826 - val_acc: 0.5579
Epoch 8/10
240492/240492 [==============================] - 27s 110us/step - loss: 0.6753 - acc: 0.5765 - val_loss: 0.6869 - val_acc: 0.5450
Epoch 9/10
240492/240492 [==============================] - 27s 111us/step - loss: 0.6725 - acc: 0.5822 - val_loss: 0.6843 - val_acc: 0.5597
Epoch 10/10
240492/240492 [==============================] - 27s 111us/step - loss: 0.6688 - acc: 0.5898 - val_loss: 0.6924 - val_acc: 0.5500
Test Loss:0.6924360336428104
Test Accuracy:0.55

#### lookback 50 min training result

*Model 1/1*
Lookback_minute-exch-gemini-predict-gemini_ETHUSD_Close-lookback-50-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240474 samples, validate on 11482 samples
Epoch 1/10
240474/240474 [==============================] - 40s 166us/step - loss: 0.6944 - acc: 0.5353 - val_loss: 0.6860 - val_acc: 0.5588
Epoch 2/10
240474/240474 [==============================] - 38s 160us/step - loss: 0.6850 - acc: 0.5513 - val_loss: 0.6833 - val_acc: 0.5569
Epoch 3/10
240474/240474 [==============================] - 38s 158us/step - loss: 0.6835 - acc: 0.5575 - val_loss: 0.6861 - val_acc: 0.5457
Epoch 4/10
240474/240474 [==============================] - 38s 158us/step - loss: 0.6819 - acc: 0.5616 - val_loss: 0.6837 - val_acc: 0.5499
Epoch 5/10
240474/240474 [==============================] - 38s 158us/step - loss: 0.6805 - acc: 0.5656 - val_loss: 0.6825 - val_acc: 0.5647
Epoch 6/10
240474/240474 [==============================] - 38s 159us/step - loss: 0.6786 - acc: 0.5689 - val_loss: 0.6815 - val_acc: 0.5584
Epoch 7/10
240474/240474 [==============================] - 38s 159us/step - loss: 0.6764 - acc: 0.5741 - val_loss: 0.6820 - val_acc: 0.5592
Epoch 8/10
240474/240474 [==============================] - 38s 159us/step - loss: 0.6742 - acc: 0.5780 - val_loss: 0.6855 - val_acc: 0.5624
Epoch 9/10
240474/240474 [==============================] - 38s 158us/step - loss: 0.6714 - acc: 0.5845 - val_loss: 0.6898 - val_acc: 0.5525
Epoch 10/10
240474/240474 [==============================] - 38s 159us/step - loss: 0.6684 - acc: 0.5890 - val_loss: 0.6874 - val_acc: 0.5543
Test Loss:0.6874361594386517
Test Accuracy:0.5542588399233583

#### lookback 100 min training result

*Model 1/1*
Lookback_minutesexch-gemini-predict-gemini_ETHUSD_Close-lookback-100-future-5-lstm-2-nodes-128-dense-1-batch-64-dropout-0.2-optimizer-<class 'tensorflow.python.keras.optimizers.Adam'>-lr-0.001-decay-1e-06
Train on 240418 samples, validate on 11446 samples
Epoch 1/10
240418/240418 [==============================] - 65s 271us/step - loss: 0.6934 - acc: 0.5372 - val_loss: 0.6851 - val_acc: 0.5616
Epoch 2/10
240418/240418 [==============================] - 64s 267us/step - loss: 0.6845 - acc: 0.5533 - val_loss: 0.6823 - val_acc: 0.5529
Epoch 3/10
240418/240418 [==============================] - 64s 265us/step - loss: 0.6834 - acc: 0.5575 - val_loss: 0.6812 - val_acc: 0.5639
Epoch 4/10
240418/240418 [==============================] - 64s 265us/step - loss: 0.6817 - acc: 0.5619 - val_loss: 0.6815 - val_acc: 0.5622
Epoch 5/10
240418/240418 [==============================] - 64s 267us/step - loss: 0.6797 - acc: 0.5685 - val_loss: 0.6804 - val_acc: 0.5598
Epoch 6/10
240418/240418 [==============================] - 64s 266us/step - loss: 0.6777 - acc: 0.5721 - val_loss: 0.6786 - val_acc: 0.5660
Epoch 7/10
240418/240418 [==============================] - 64s 267us/step - loss: 0.6757 - acc: 0.5762 - val_loss: 0.6950 - val_acc: 0.5584
Epoch 8/10
240418/240418 [==============================] - 64s 267us/step - loss: 0.6732 - acc: 0.5822 - val_loss: 0.6807 - val_acc: 0.5637
Epoch 9/10
240418/240418 [==============================] - 64s 266us/step - loss: 0.6698 - acc: 0.5892 - val_loss: 0.6820 - val_acc: 0.5619
Epoch 10/10
240418/240418 [==============================] - 64s 267us/step - loss: 0.6660 - acc: 0.5937 - val_loss: 0.6831 - val_acc: 0.5654
Test Loss:0.6830835346156602
Test Accuracy:0.5654377074439938
