In [18]:
import random
import time

import pandas as pd
import numpy as np

from sklearn import preprocessing
from collections import deque

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [26]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "ETH-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [20]:
def classify(current, future):
    # This if statement essentially says that if the future price is more than the current one, its a good thing.
    if float(future) > float(current):
        return 1
    else:
        return 0


def preprocess_df(df):
    df = df.drop(columns="future")
    
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change() # get the percentage change.
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values) # Normalises the data, between 0 and 1. 
            
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN) # once it has the SEQ_LEN number of values it will return a list
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])    
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
            
    random.shuffle(sequential_data)
    
    buys = []
    sells = []
    
    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
            
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells))
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys+sells
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X), y

In [27]:
main_df = pd.DataFrame()

ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]

for ratio in ratios:
    dataset = f"crypto_data/{ratio}.csv"
    
    df = pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"])
    
    df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True)
    
    df.set_index("time", inplace=True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]
    

    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)
    
main_df["future"] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))

print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(5))


times =sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]


            ETH-USD_close     future  target
time                                        
1528968660            NaN  485.75000       0
1528968720      486.01001  486.00000       0
1528968780      486.00000  486.00000       0
1528968840      485.75000  485.98999       1
1528968900      486.00000  485.98999       0


In [28]:
# Getting validation data. The last 5% of data in the dataframe.
times =sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 74196 validation: 3260
Dont buys: 37098, buys: 37098
VALIDATION Dont buys: 1630, buys: 1630


In [29]:
model = Sequential()

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1)) # if anything weird, try putting this back to 0.2
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # consider binary crossentropy for loss

tensorboard = TensorBoard(log_dir=f'logs/{NAME}')

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor="val_accuracy", verbose=1, save_best_only=True, mode="max"))

train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
validation_x = np.asarray(validation_x)
validation_y = np.asarray(validation_y)

history = model.fit(train_x, train_y, 
                    batch_size=BATCH_SIZE, 
                    epochs=EPOCHS, 
                    validation_data=(validation_x, validation_y), 
                    callbacks=[tensorboard, checkpoint])

Epoch 1/10



INFO:tensorflow:Assets written to: models\RNN_Final-01-0.539.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-01-0.539.model\assets


Epoch 2/10



INFO:tensorflow:Assets written to: models\RNN_Final-02-0.530.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-02-0.530.model\assets


Epoch 3/10



INFO:tensorflow:Assets written to: models\RNN_Final-03-0.501.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-03-0.501.model\assets


Epoch 4/10



INFO:tensorflow:Assets written to: models\RNN_Final-04-0.536.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-04-0.536.model\assets


Epoch 5/10



INFO:tensorflow:Assets written to: models\RNN_Final-05-0.544.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-05-0.544.model\assets


Epoch 6/10



INFO:tensorflow:Assets written to: models\RNN_Final-06-0.548.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-06-0.548.model\assets


Epoch 7/10



INFO:tensorflow:Assets written to: models\RNN_Final-07-0.554.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-07-0.554.model\assets


Epoch 8/10



INFO:tensorflow:Assets written to: models\RNN_Final-08-0.557.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-08-0.557.model\assets


Epoch 9/10



INFO:tensorflow:Assets written to: models\RNN_Final-09-0.566.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-09-0.566.model\assets


Epoch 10/10



INFO:tensorflow:Assets written to: models\RNN_Final-10-0.557.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-10-0.557.model\assets


