## ZBTCNN TANH 2.1
Predicts price increase  
Uses technicals aswell from csv_formatter.ipynb

### 2.1 Changes  

- Shuffle input data after split  
- Added percentage error as a metric
- RSI scaler max set to 100 because that is RSI fxn limit

In [3]:
!source /etc/profile

import os
import sys
import random
import time
from collections import deque
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Activation, Flatten, MaxPooling2D, Conv2D
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import optimizers
from tensorflow.keras import utils
import joblib

# Check GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# tf.config.list_physical_devices()

Num GPUs Available:  1


In [5]:
SEQ_LEN = 48 #hours
FUTURE_PERIOD_PREDICT = 1 #hours

# Model parameters
EPOCHS = 20
BATCH_SIZE = 16
NAME = f"TANH2-e{EPOCHS}-b{BATCH_SIZE}-s{SEQ_LEN}-fpp{FUTURE_PERIOD_PREDICT}-{int(time.time())}"

In [6]:
## Import data
# DATA MUST BE FORMATTED USING CSV_FORMATTER.IPYNB

csv_file = "data/formatted/BTCUSDT-1h-data.csv"

data = pd.read_csv(csv_file, skiprows=[0], names=["timestamp", "open", "high", "low", "close", "volume", "rsi", "ema"])

data.set_index("timestamp", inplace=True)

data.head()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1503064800,4304.15,4371.52,4296.04,4356.31,51.563675,52.623958,4327.15156
1503068400,4356.31,4357.37,4302.72,4340.31,24.093449,51.678528,4327.804777
1503072000,4320.52,4340.31,4287.79,4331.71,15.118957,51.167386,4327.995329
1503075600,4302.97,4318.16,4221.05,4293.09,46.533767,48.919621,4326.319858
1503079200,4293.09,4293.09,4193.7,4259.4,74.368943,47.054235,4323.157459


In [9]:
## Add min max bounds to data

price_max = 80000.0
volume_max = 60000.0
rsi_max = 100.0

price_min = 2000.0
volume_min = 0.0
rsi_min = 0.0

max_df = pd.DataFrame()

max_df["timestamp"] = []

for col in data.columns:
    max_df[col] = []

max_df = max_df.append({"timestamp": str(int(time.time())),
                "open": price_max,
                "high": price_max,
                "low": price_max,
                "close": price_max,
                "volume": volume_max,
                "rsi": rsi_max,
                "ema": price_max,
                "target": price_max}, ignore_index=True)

max_df = max_df.append({"timestamp": str(int(time.time())),
                "open": price_min,
                "high": price_min,
                "low": price_min,
                "close": price_min,
                "volume": volume_min,
                "rsi": rsi_min,
                "ema": price_min,
                "target": price_min}, ignore_index=True)

max_df.set_index("timestamp", inplace=True)

max_df.head()


Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1645506622,80000.0,80000.0,80000.0,80000.0,60000.0,100.0,80000.0,80000.0
1645506622,2000.0,2000.0,2000.0,2000.0,0.0,0.0,2000.0,2000.0


In [10]:
## Formatting data and Scaler Initialization

# def classify(current, future):
#     return float((future - current) / current)


data["target"] = data["close"].shift(-FUTURE_PERIOD_PREDICT)

# # Cut off NaNs
# # data = data[:-FUTURE_PERIOD_PREDICT]
data.dropna(inplace=True)

# data["target"] = list(map(classify, data["close"], data["future"]))
# # data[["close", "future", "target"]].tail()
# data = data.drop("future", 1)

# Fit scalers
price_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
volume_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
rsi_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))

data = data.append(max_df)

price_scaler.fit(np.array(data["close"]).reshape(-1, 1))
volume_scaler.fit(np.array(data["volume"]).reshape(-1, 1))
rsi_scaler.fit(np.array(data["rsi"]).reshape(-1, 1))

#Dump scalers
try:
    os.mkdir(f"scalers/{NAME}")
except:
    pass

joblib.dump(price_scaler, f"scalers/{NAME}/price_scaler")
joblib.dump(volume_scaler, f"scalers/{NAME}/volume_scaler")
joblib.dump(rsi_scaler, f"scalers/{NAME}/rsi_scaler")

# Remove min max boundary values
data = data[:-2]


# Split dataset # Dont split here; do split after shuffle
# last_5_pct = int(len(data) * .95)

# train_data = data[:last_5_pct]
# validation_data = data[last_5_pct:]

# print(f"{len(train_data)} :: {len(validation_data)}")

data.head()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1503064800,4304.15,4371.52,4296.04,4356.31,51.563675,52.623958,4327.15156,4340.31
1503068400,4356.31,4357.37,4302.72,4340.31,24.093449,51.678528,4327.804777,4331.71
1503072000,4320.52,4340.31,4287.79,4331.71,15.118957,51.167386,4327.995329,4293.09
1503075600,4302.97,4318.16,4221.05,4293.09,46.533767,48.919621,4326.319858,4259.4
1503079200,4293.09,4293.09,4193.7,4259.4,74.368943,47.054235,4323.157459,4236.89


In [11]:
## Helper
## Ratios of buy to sell targets

## See how balanced the input data is

close_target_list = list(zip(data["close"], data["target"]))

sell_counter = len([x for x in close_target_list if x[0] > x[1]])
buy_counter = len([x for x in close_target_list if x[0] < x[1]])

pct_sell = sell_counter / len(data)
pct_buy = buy_counter / len(data)

print(f"{pct_sell} :: {pct_buy}")

0.48743107153027676 :: 0.5118440469101924


In [9]:
## Preprocess Data

def preprocess_df_p1(df_p):

    df = pd.DataFrame()
    for col in df_p.columns:
        df[col] = df_p[col]

    for col in df.columns:
        scaler = None
        if col in ["open", "high", "low", "close", "ema", "target"]:
            scaler = price_scaler
        elif col == "volume":
            scaler = volume_scaler
        elif col == "rsi":
            scaler = rsi_scaler
        else:
            raise Exception("Column not recognized and scaler cannot be determined")

        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        df.dropna(inplace=True)
        df[col] = scaler.transform(np.array(df[col]).reshape(-1, 1))

        
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)

    sequential_data = []
    prev_periods = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_periods.append([n for n in i[:-1]])
        if len(prev_periods) == SEQ_LEN:
            sequential_data.append([np.array(prev_periods), i[-1]])

    return sequential_data
    # random.shuffle(sequential_data)

def preprocess_df_p2(seq_data):

    # Balance buys and sells
    buys = []
    sells = []

    for seq, target in seq_data:

        if target < seq[-1][3]: #compares to close column
            sells.append([seq, target])
        elif target > seq[-1][3]:
            buys.append([seq, target])

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    local_seq_data = buys + sells

    random.shuffle(local_seq_data)

    X = [d[0] for d in local_seq_data]
    Y = [d[1] for d in local_seq_data]

    return np.array(X), np.array(Y)
    

# train_x, train_y = preprocess_df(train_data)
# validation_x, validation_y = preprocess_df(validation_data)

# Preprocess and split data here
seq_data_full = preprocess_df_p1(data)

last_5_pct = int(len(seq_data_full) * .95)

seq_data_train = seq_data_full[:last_5_pct]
seq_data_val = seq_data_full[last_5_pct:]

train_x, train_y = preprocess_df_p2(seq_data_train)
validation_x, validation_y = preprocess_df_p2(seq_data_val)

In [10]:
## Dataset metrics

close_target_list = list(zip([x[-1][3] for x in list(train_x)], list(train_y)))

train_sell_counter = len([x for x in close_target_list if x[0] > x[1]])
train_buy_counter = len([x for x in close_target_list if x[0] < x[1]])

close_target_list = list(zip([x[-1][3] for x in list(validation_x)], list(validation_y)))

val_sell_counter = len([x for x in close_target_list if x[0] > x[1]])
val_buy_counter = len([x for x in close_target_list if x[0] < x[1]])

print(f"Train : Validation == {len(train_x)} : {len(validation_x)}")
print(f"Train\t\tBuys : Sells == {train_buy_counter} : {train_sell_counter}")
print(f"Validation\tBuys : Sells == {val_buy_counter} : {val_sell_counter}")

Train : Validation == 35626 : 1882
Train		Buys : Sells == 17813 : 17813
Validation	Buys : Sells == 941 : 941


In [11]:
## Make model

model = Sequential()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)


model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh", return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh", return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation="tanh"))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(1, activation="tanh"))



model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=optimizer, metrics=["mse", "mean_absolute_percentage_error"])

tb = TensorBoard(log_dir=f"logs/{NAME}")

filepath = NAME + "-e{epoch:02d}-vmse{val_loss:.5f}-" + str(int(time.time()))
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor="mse", verbose=1, save_best_only=True, mode="max"))

2022-02-21 22:43:20.596028: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-21 22:43:20.597276: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-21 22:43:20.598302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-21 22:43:20.598872: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA 

In [12]:
with tf.device("/device:GPU:0"):
    history = model.fit(x=train_x, y=train_y,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=(validation_x, validation_y),
                        callbacks=[tb, checkpoint]
                        )

Epoch 1/20


2022-02-21 22:43:40.057912: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8201




2022-02-21 22:44:10.033057: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e01-vacc0.02794-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e01-vacc0.02794-1645483403.model/assets


Epoch 2/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e02-vacc0.00854-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e02-vacc0.00854-1645483403.model/assets


Epoch 3/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e03-vacc0.00522-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e03-vacc0.00522-1645483403.model/assets


Epoch 4/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e04-vacc0.00503-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e04-vacc0.00503-1645483403.model/assets


Epoch 5/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e05-vacc0.00406-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e05-vacc0.00406-1645483403.model/assets


Epoch 6/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e06-vacc0.00356-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e06-vacc0.00356-1645483403.model/assets


Epoch 7/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e07-vacc0.00322-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e07-vacc0.00322-1645483403.model/assets


Epoch 8/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e08-vacc0.00346-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e08-vacc0.00346-1645483403.model/assets


Epoch 9/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e09-vacc0.00300-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e09-vacc0.00300-1645483403.model/assets


Epoch 10/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e10-vacc0.00321-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e10-vacc0.00321-1645483403.model/assets


Epoch 11/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e11-vacc0.00275-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e11-vacc0.00275-1645483403.model/assets


Epoch 12/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e12-vacc0.00279-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e12-vacc0.00279-1645483403.model/assets


Epoch 13/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e13-vacc0.00263-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e13-vacc0.00263-1645483403.model/assets


Epoch 14/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e14-vacc0.00263-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e14-vacc0.00263-1645483403.model/assets


Epoch 15/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e15-vacc0.00271-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e15-vacc0.00271-1645483403.model/assets


Epoch 16/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e16-vacc0.00256-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e16-vacc0.00256-1645483403.model/assets


Epoch 17/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e17-vacc0.00249-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e17-vacc0.00249-1645483403.model/assets


Epoch 18/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e18-vacc0.00245-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e18-vacc0.00245-1645483403.model/assets


Epoch 19/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e19-vacc0.00243-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e19-vacc0.00243-1645483403.model/assets


Epoch 20/20



INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e20-vacc0.00234-1645483403.model/assets


INFO:tensorflow:Assets written to: models/TANH2-e20-b16-s48-fpp1-1645483374-e20-vacc0.00234-1645483403.model/assets




In [None]:
from pushbullet import Pushbullet
pb = Pushbullet("o.nyntgspLep97yl0oPDbp0nAbMIDUGiO5")
push = pb.push_note(f"{time.asctime()}", "ML Training Done")