In [4]:
!source /etc/profile

import os
import sys
import random
import time
from collections import deque
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Activation, Flatten, MaxPooling2D, Conv2D
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.models import load_model
from tensorflow.keras import optimizers
from tensorflow.keras import utils

# Check GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# tf.config.list_physical_devices()

Num GPUs Available:  1


In [20]:
SEQ_LEN = 48 #hours
FUTURE_PERIOD_PREDICT = 1 #hours

# Model parameters
EPOCHS = 20
BATCH_SIZE = 64
NAME = f"softmax2-e{EPOCHS}-b{BATCH_SIZE}-s{SEQ_LEN}-fpp{FUTURE_PERIOD_PREDICT}-{int(time.time())}"

In [21]:
## Import data
# DATA MUST BE FORMATTED USING CSV_FORMATTER.IPYNB

csv_file = "data/formatted/BTCUSDT-1h-data.csv"

data = pd.read_csv(csv_file, skiprows=[0], names=["timestamp", "open", "high", "low", "close", "volume", "rsi", "ema"])

data.set_index("timestamp", inplace=True)

data.head()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1503064800,4304.15,4371.52,4296.04,4356.31,51.563675,52.623958,4327.15156
1503068400,4356.31,4357.37,4302.72,4340.31,24.093449,51.678528,4327.804777
1503072000,4320.52,4340.31,4287.79,4331.71,15.118957,51.167386,4327.995329
1503075600,4302.97,4318.16,4221.05,4293.09,46.533767,48.919621,4326.319858
1503079200,4293.09,4293.09,4193.7,4259.4,74.368943,47.054235,4323.157459


In [22]:
## Formatting data

def classify(current, future):
    if float(future) > float(current):
        return 1
    return 0



data["future"] = data["close"].shift(-FUTURE_PERIOD_PREDICT)

# Cut off NaNs
# data = data[:-FUTURE_PERIOD_PREDICT]
data.dropna(inplace=True)

data["target"] = list(map(classify, data["close"], data["future"]))
# data[["close", "future", "target"]].tail()
data = data.drop("future", 1)

# Split dataset
last_5_pct = int(len(data) * .95)

train_data = data[:last_5_pct]
validation_data = data[last_5_pct:]

print(f"{len(train_data)} :: {len(validation_data)}")

data.head()

36695 :: 1932


  data = data.drop("future", 1)


Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1503064800,4304.15,4371.52,4296.04,4356.31,51.563675,52.623958,4327.15156,0
1503068400,4356.31,4357.37,4302.72,4340.31,24.093449,51.678528,4327.804777,0
1503072000,4320.52,4340.31,4287.79,4331.71,15.118957,51.167386,4327.995329,0
1503075600,4302.97,4318.16,4221.05,4293.09,46.533767,48.919621,4326.319858,0
1503079200,4293.09,4293.09,4193.7,4259.4,74.368943,47.054235,4323.157459,0


In [23]:
## Helper
## Ratios of buy to sell targets

## See how balances the input data is

sell_counter = list(data["target"]).count(0)
buy_counter = list(data["target"]).count(1)

pct_sell = sell_counter / len(data)
pct_buy = buy_counter / len(data)

print(f"{pct_sell} :: {pct_buy}")

0.48815595308980764 :: 0.5118440469101924


In [24]:
## Preprocess Data

def preprocess_df(df):
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            df.dropna(inplace=True)

            df[col] = preprocessing.StandardScaler().fit_transform(df[col].values.reshape(-1,1))

    df.dropna(inplace=True)

    sequential_data = []
    prev_periods = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_periods.append([n for n in i[:-1]])
        if len(prev_periods) == SEQ_LEN:
            sequential_data.append([np.array(prev_periods), i[-1]])

    # random.shuffle(sequential_data)

    # Balance buys and sells
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells

    random.shuffle(sequential_data)

    X = [d[0] for d in sequential_data]
    Y = [d[1] for d in sequential_data]

    return np.array(X), np.array(Y)
    

train_x, train_y = preprocess_df(train_data)
validation_x, validation_y = preprocess_df(validation_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().replace(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ret

In [25]:
## Dataset metrics

print(f"Train : Validation == {len(train_x)} : {len(validation_x)}")
print(f"Train\t\tBuys : Sells == {list(train_y).count(0)} : {list(train_y).count(1)}")
print(f"Validation\tBuys : Sells == {list(validation_y).count(0)} : {list(validation_y).count(1)}")

Train : Validation == 35664 : 1836
Train		Buys : Sells == 17832 : 17832
Validation	Buys : Sells == 918 : 918


In [27]:
train_x.shape[1:]

(48, 7)

In [17]:
## Make model

model = Sequential()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)


model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh", return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh", return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh"))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))



model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# saved_model = tf.train.load_checkpoint("")
# model.load_weights(saved_model)

tb = TensorBoard(log_dir=f"logs/{NAME}")

filepath = NAME + "-e{epoch:02d}-vacc{val_accuracy:.3f}-" + str(int(time.time()))
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor="val_acc", verbose=1, save_best_only=True, mode="max"))

2022-01-30 01:03:00.382817: W tensorflow/core/util/tensor_slice_reader.cc:96] Could not open models/softmax2-e20-b64-s48-fpp1-1643498200-e08-vacc0.550-1643498220.model: FAILED_PRECONDITION: models/softmax2-e20-b64-s48-fpp1-1643498200-e08-vacc0.550-1643498220.model; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


In [9]:
## Fit model

with tf.device("/device:GPU:0"):
    history = model.fit(x=train_x, y=train_y,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=(validation_x, validation_y),
                        callbacks=[tb, checkpoint]
                        )

Epoch 1/20


2022-01-29 18:37:54.904864: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8204




2022-01-29 18:38:19.920358: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e01-vacc0.528-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e01-vacc0.528-1643499465.model/assets


Epoch 2/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e02-vacc0.531-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e02-vacc0.531-1643499465.model/assets


Epoch 3/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e03-vacc0.528-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e03-vacc0.528-1643499465.model/assets


Epoch 4/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e04-vacc0.543-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e04-vacc0.543-1643499465.model/assets


Epoch 5/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e05-vacc0.539-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e05-vacc0.539-1643499465.model/assets


Epoch 6/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e06-vacc0.548-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e06-vacc0.548-1643499465.model/assets


Epoch 7/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e07-vacc0.533-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e07-vacc0.533-1643499465.model/assets


Epoch 8/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e08-vacc0.537-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e08-vacc0.537-1643499465.model/assets


Epoch 9/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e09-vacc0.535-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e09-vacc0.535-1643499465.model/assets


Epoch 10/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e10-vacc0.530-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e10-vacc0.530-1643499465.model/assets


Epoch 11/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e11-vacc0.536-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e11-vacc0.536-1643499465.model/assets


Epoch 12/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e12-vacc0.525-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e12-vacc0.525-1643499465.model/assets


Epoch 13/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e13-vacc0.528-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e13-vacc0.528-1643499465.model/assets


Epoch 14/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e14-vacc0.521-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e14-vacc0.521-1643499465.model/assets


Epoch 15/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e15-vacc0.526-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e15-vacc0.526-1643499465.model/assets


Epoch 16/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e16-vacc0.528-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e16-vacc0.528-1643499465.model/assets


Epoch 17/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e17-vacc0.528-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e17-vacc0.528-1643499465.model/assets


Epoch 18/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e18-vacc0.532-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e18-vacc0.532-1643499465.model/assets


Epoch 19/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e19-vacc0.528-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e19-vacc0.528-1643499465.model/assets


Epoch 20/20



INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e20-vacc0.535-1643499465.model/assets


INFO:tensorflow:Assets written to: models/softmax2-e20-b64-s48-fpp1-1643499439-e20-vacc0.535-1643499465.model/assets




In [15]:
from pushbullet import Pushbullet
pb = Pushbullet("o.nyntgspLep97yl0oPDbp0nAbMIDUGiO5")
push = pb.push_note(f"{time.asctime()}", "ML Training Done")

In [16]:
## Upload to tensorboard
# !tensorboard dev upload --logdir=logs
!tensorboard --logdir=logs

2022-01-29 18:32:12.597758: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-29 18:32:12.605668: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-29 18:32:12.606001: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.7.0 at http://localhost:6006/ (Press CTRL