In [1]:
import os
import sys
import random
import time
from collections import deque
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Activation, Flatten, MaxPooling2D, Conv2D
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import optimizers
from tensorflow.keras import utils

# Check GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# tf.config.list_physical_devices()

Num GPUs Available:  1


In [2]:
SEQ_LEN = 24 #hours
FUTURE_PERIOD_PREDICT = 3 #hours

# Change time format on csv read
do_time_fix = False

# Model parameters
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"e{EPOCHS}-b{BATCH_SIZE}-s{SEQ_LEN}-fpp{FUTURE_PERIOD_PREDICT}-{int(time.time())}"

In [3]:
## Import data
# csv_file = "BTCUSDT-1h-data.csv"
csv_file = "output.csv"

data = pd.read_csv(csv_file, skiprows=[0], names=["timestamp","open","high","low","close","volume","close_time","quote_av","trades","tb_base_av","tb_quote_av","ignore"])

if do_time_fix:
    for i in range(len(data["timestamp"])):
        t = data["timestamp"][i].split(".")[0]


        # 2017-08-17 04:00:00.000
        tstruct = time.strptime(t, "%Y-%m-%d %H:%M:%S")
        epoch_sec = int(time.mktime(tstruct))

        data["timestamp"][i] = epoch_sec

    data.to_csv(f"{csv_file.split('.')[0]}-formatted.csv")

data.set_index("timestamp", inplace=True)

data.head()

Unnamed: 0_level_0,open,high,low,close,volume,close_time,quote_av,trades,tb_base_av,tb_quote_av,ignore
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1502956800,4261.48,4313.62,4261.32,4308.83,47.181009,1502945999999,202366.138393,171,35.160503,150952.477943,7887.635513
1502960400,4308.83,4328.69,4291.37,4315.32,23.234916,1502949599999,100304.823567,102,21.448071,92608.279728,8039.262402
1502964000,4330.29,4345.45,4309.37,4324.35,7.229691,1502953199999,31282.31267,36,4.802861,20795.317224,8041.760498
1502967600,4316.62,4349.99,4287.41,4349.99,4.443249,1502956799999,19241.0583,25,2.602292,11291.347015,8048.127886
1502971200,4333.32,4377.85,4333.32,4360.69,0.972807,1502960399999,4239.503586,28,0.814655,3552.746817,8018.0365


In [4]:
## Formatting data
try:
    data = data.drop(["trades", "quote_av", "tb_base_av", "tb_quote_av", "ignore", "close_time"], 1)
except:
    pass

def classify(current, future):
    if float(future) > float(current):
        return 1
    return 0


data["future"] = data["close"].shift(-FUTURE_PERIOD_PREDICT)

# Cut off NaNs
# data = data[:-FUTURE_PERIOD_PREDICT]
data.dropna(inplace=True)

data["target"] = list(map(classify, data["close"], data["future"]))
# data[["close", "future", "target"]].tail()
data = data.drop("future", 1)

# Split dataset
last_5_pct = int(len(data) * .95)

train_data = data[:last_5_pct]
validation_data = data[last_5_pct:]

print(f"{len(train_data)} :: {len(validation_data)}")

data.head()

36681 :: 1931


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,open,high,low,close,volume,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1502956800,4261.48,4313.62,4261.32,4308.83,47.181009,1
1502960400,4308.83,4328.69,4291.37,4315.32,23.234916,1
1502964000,4330.29,4345.45,4309.37,4324.35,7.229691,1
1502967600,4316.62,4349.99,4287.41,4349.99,4.443249,1
1502971200,4333.32,4377.85,4333.32,4360.69,0.972807,1


In [5]:
## Helper
## Ratios of buy to sell targets

## See how balances the input data is

sell_counter = list(data["target"]).count(0)
buy_counter = list(data["target"]).count(1)

pct_sell = sell_counter / len(data)
pct_buy = buy_counter / len(data)

print(f"{pct_sell} :: {pct_buy}")

0.4820004143789495 :: 0.5179995856210504


In [6]:
## Preprocess Data

def preprocess_df(df):
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            df.dropna(inplace=True)

            df[col] = preprocessing.StandardScaler().fit_transform(df[col].values.reshape(-1,1))

    df.dropna(inplace=True)

    sequential_data = []
    prev_periods = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_periods.append([n for n in i[:-1]])
        if len(prev_periods) == SEQ_LEN:
            sequential_data.append([np.array(prev_periods), i[-1]])

    # random.shuffle(sequential_data)

    # Balance buys and sells
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells

    random.shuffle(sequential_data)

    X = [d[0] for d in sequential_data]
    Y = [d[1] for d in sequential_data]

    return np.array(X), np.array(Y)
    

train_x, train_y = preprocess_df(train_data)
validation_x, validation_y = preprocess_df(validation_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove 

In [7]:
## Dataset metrics

print(f"Train : Validation == {len(train_x)} : {len(validation_x)}")
print(f"Train\t\tBuys : Sells == {list(train_y).count(0)} : {list(train_y).count(1)}")
print(f"Validation\tBuys : Sells == {list(validation_y).count(0)} : {list(validation_y).count(1)}")

Train : Validation == 35212 : 1842
Train		Buys : Sells == 17606 : 17606
Validation	Buys : Sells == 921 : 921


In [8]:
## Make model

model = Sequential()
optimizer = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)


model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh", return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh", return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh"))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))



model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

tb = TensorBoard(log_dir=f"logs/{NAME}")

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}-" + str(int(time.time()))
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor="val_acc", verbose=1, save_best_only=True, mode="max"))

  super(Adam, self).__init__(name, **kwargs)


In [9]:
## Fit model

with tf.device("/device:GPU:0"):
    history = model.fit(x=train_x, y=train_y,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=(validation_x, validation_y),
                        callbacks=[tb, checkpoint]
                        )

Epoch 1/10



INFO:tensorflow:Assets written to: models\RNN_Final-01-0.524-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-01-0.524-1642374208.model\assets


Epoch 2/10



INFO:tensorflow:Assets written to: models\RNN_Final-02-0.509-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-02-0.509-1642374208.model\assets


Epoch 3/10



INFO:tensorflow:Assets written to: models\RNN_Final-03-0.549-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-03-0.549-1642374208.model\assets


Epoch 4/10



INFO:tensorflow:Assets written to: models\RNN_Final-04-0.516-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-04-0.516-1642374208.model\assets


Epoch 5/10



INFO:tensorflow:Assets written to: models\RNN_Final-05-0.535-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-05-0.535-1642374208.model\assets


Epoch 6/10



INFO:tensorflow:Assets written to: models\RNN_Final-06-0.510-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-06-0.510-1642374208.model\assets


Epoch 7/10



INFO:tensorflow:Assets written to: models\RNN_Final-07-0.525-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-07-0.525-1642374208.model\assets


Epoch 8/10



INFO:tensorflow:Assets written to: models\RNN_Final-08-0.527-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-08-0.527-1642374208.model\assets


Epoch 9/10



INFO:tensorflow:Assets written to: models\RNN_Final-09-0.525-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-09-0.525-1642374208.model\assets


Epoch 10/10



INFO:tensorflow:Assets written to: models\RNN_Final-10-0.548-1642374208.model\assets


INFO:tensorflow:Assets written to: models\RNN_Final-10-0.548-1642374208.model\assets




In [1]:
## Upload to tensorboard
# !tensorboard dev upload --logdir=logs
!tensorboard --logdir=logs