In [1]:
!source /etc/profile

import os
import sys
import random
import time
from collections import deque
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Activation, Flatten, MaxPooling2D, Conv2D
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.models import load_model
from tensorflow.keras import optimizers
from tensorflow.keras import utils
import matplotlib.pyplot as plt
import json
import joblib

# Check GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# tf.config.list_physical_devices()

Num GPUs Available:  1


2022-02-07 05:42:06.673852: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-07 05:42:06.680615: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-07 05:42:06.681073: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [2]:
SEQ_LEN = 24 #hours
FUTURE_PERIOD_PREDICT = 1 #hours

csv_file = "data/formatted/ETHUSDT-1h-data.csv"
model_path = "models/TANH-e20-b64-s24-fpp1-1644209479-e08-vacc0.998-1644209502.model"
scaler_path = "scalers/" + "-".join(model_path.split("/")[1].split("-")[:6])

In [3]:
## Import data
# DATA MUST BE FORMATTED USING CSV_FORMATTER.IPYNB



data = pd.read_csv(csv_file, skiprows=[0], names=["timestamp", "open", "high", "low", "close", "volume", "rsi", "ema"])

data.set_index("timestamp", inplace=True)

data.head()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1503064800,304.73,311.06,304.73,308.88,332.5643,56.653853,305.268933
1503068400,308.88,311.79,306.27,307.78,261.1517,55.497845,305.393589
1503072000,307.78,310.29,305.73,308.47,388.17208,56.079385,305.543699
1503075600,307.17,309.66,301.3,303.22,357.19041,50.849211,305.432161
1503079200,303.22,303.22,296.32,298.52,398.64644,46.80643,305.105516


In [4]:
## Formatting data

# def classify(current, future):
#     return float((future - current) / current)


# data["future"] = data["close"].shift(-FUTURE_PERIOD_PREDICT)

# Cut off NaNs
# data = data[:-FUTURE_PERIOD_PREDICT]
data.dropna(inplace=True)

# data["target"] = list(map(classify, data["close"], data["future"]))
# data[["close", "future", "target"]].tail()
# data = data.drop("future", 1)

# Split dataset
# last_5_pct = int(len(data) * .95)

# train_data = data[:last_5_pct]
# validation_data = data[last_5_pct:]

# print(f"{len(train_data)} :: {len(validation_data)}")

data.head()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1503064800,304.73,311.06,304.73,308.88,332.5643,56.653853,305.268933
1503068400,308.88,311.79,306.27,307.78,261.1517,55.497845,305.393589
1503072000,307.78,310.29,305.73,308.47,388.17208,56.079385,305.543699
1503075600,307.17,309.66,301.3,303.22,357.19041,50.849211,305.432161
1503079200,303.22,303.22,296.32,298.52,398.64644,46.80643,305.105516


In [5]:
## Preprocess Data

def preprocess_df(df_p):

    df = pd.DataFrame()
    for col in df_p.columns:
        df[col] = df_p[col]

    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col])

    df.dropna(inplace=True)

    sequential_data = []
    prev_periods = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_periods.append([n for n in i])
        if len(prev_periods) == SEQ_LEN:
            sequential_data.append([np.array(prev_periods), i])

    # random.shuffle(sequential_data)

    # # Balance buys and sells
    # buys = []
    # sells = []

    # for seq, target in sequential_data:

    #     if target < 0.0:
    #         sells.append([seq, target])
    #     elif target > 0.0:
    #         buys.append([seq, target])

    # lower = min(len(buys), len(sells))

    # buys = buys[:lower]
    # sells = sells[:lower]

    # sequential_data = buys + sells

    # random.shuffle(sequential_data)

    X = [d[0] for d in sequential_data]
    # Y = [d[1] for d in sequential_data]

    # return np.array(X), np.array(Y)
    return np.array(X)
    
    
seq_data = preprocess_df(data)

In [6]:
## Load model

model = load_model(model_path)
model.summary()


2022-02-07 05:42:16.389362: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-07 05:42:16.389848: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-07 05:42:16.390266: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-07 05:42:16.724261: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-07 05:42:16.724673: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 24, 128)           69632     
                                                                 
 dropout (Dropout)           (None, 24, 128)           0         
                                                                 
 batch_normalization (BatchN  (None, 24, 128)          512       
 ormalization)                                                   
                                                                 
 lstm_1 (LSTM)               (None, 24, 128)           131584    
                                                                 
 dropout_1 (Dropout)         (None, 24, 128)           0         
                                                                 
 batch_normalization_1 (Batc  (None, 24, 128)          512       
 hNormalization)                                        

In [7]:
bal = 10000
usd_bal = 10000
coin_bal = 0

price = 234890

list(data["close"])[0+24]

# print(f"BAL {bal}\tUSD {usd_bal}\tCOIN {coin_bal}")

287.14

In [9]:
with tf.device("/device:GPU:0"):
    pred = model.predict(seq_data)

    # print(json.dumps([float(list(d)[0]) for d in list(model.predict(seq_data))], indent=4))
    

    

In [13]:
scaler = joblib.load(scaler_path)
unscaled_pred = scaler.inverse_transform(pred)
print(json.dumps([float(list(d)[0]) for d in list(unscaled_pred)], indent=4))

[
    -0.00026289792731404305,
    -0.00026227114722132683,
    -0.00032883943640626967,
    -0.0007795246783643961,
    -0.0003816113749053329,
    -0.00033874859218485653,
    -0.0002698771422728896,
    -0.0002294909063493833,
    -0.0001965714036487043,
    -0.00013605270942207426,
    -0.00020051244064234197,
    -0.0002557127154432237,
    -0.00025334511883556843,
    -0.00025561705115251243,
    -0.0002571223594713956,
    -0.00025868392549455166,
    -0.00026816027821041644,
    -0.0003238792414776981,
    -0.00038896629121154547,
    -0.00026311088004149497,
    -0.0002467295271344483,
    -0.00021753960754722357,
    -0.00022326872567646205,
    -0.00022712985810358077,
    -0.0002517332904972136,
    -0.00026581002748571336,
    -0.0002652929397299886,
    -0.0002636118442751467,
    -0.000262381014181301,
    -0.0002676204312592745,
    -0.0004021658096462488,
    -0.00036329770227894187,
    -0.0002789665013551712,
    -0.00024703878443688154,
    -0.00022568357235286385,


In [7]:
## Trading bot (test only)

bal = 10000
usd_bal = bal
coin_bal = 0

model_trade_x = list(range(len(seq_data) + 1))
model_trade_y = []

def do_buy(price):
    global usd_bal
    global coin_bal

    if usd_bal == 0:
        return

    usd_bal -= usd_bal * calc_fee()
    coin_bal += (usd_bal / price)
    usd_bal = 0

    calc_bal(price)

def do_sell(price):
    global usd_bal
    global coin_bal

    if coin_bal == 0:
        return

    usd_bal += (coin_bal * price)
    usd_bal -= usd_bal * calc_fee()
    coin_bal = 0

    calc_bal(price)

def calc_bal(price):
    global bal
    bal = usd_bal + (coin_bal * price)

def calc_fee():
    return 0
    # if bal > 1000000:
    #     return .0018
    # elif bal > 100000:
    #     return .0020
    # elif bal > 50000:
    #     return .0025
    # elif bal > 10000:
    #     return .0035
    # else:
    #     return .0050

def get_price(i):
    return list(data["close"])[i+SEQ_LEN]

def predict(seq):
    with tf.device("/device:GPU:0"):
        return model.predict(seq)[0]

def full_predict(full_seq):
    with tf.device("/device:GPU:0"):
        return model.predict(full_seq)


def main_loop_iter():
    print(f"ITER LEN: {len(seq_data)}")

    for i in range(len(seq_data)):
        print(f"SEQ: {i}\tBAL: {bal}")
        model_trade_y.append(bal)
        prediction = predict(seq_data[i:i+1])

        if prediction[0] > prediction[1]:
            do_buy(get_price(i))
        else:
            do_sell(get_price(i))
        

    do_sell(get_price(len(seq_data)-1))
    model_trade_y.append(bal)
    print(f"FINAL BAL: {bal}")

def main_loop():
    print(f"ITER LEN: {len(seq_data)}")

    prediction = full_predict(seq_data)

    for i in range(len(seq_data)):
        print(f"SEQ: {i}\tBAL: {bal}")
        model_trade_y.append(bal)

        buy_indicator = prediction[i][0]
        sell_indicator = prediction[i][1]

        # if prediction[i][0] > .8 and prediction[i][1] < .2:
        #     do_buy(get_price(i))
        # if prediction[i][1] > .8 and prediction[i][0] < .2:
        #     do_sell(get_price(i))
        # else:
        #     pass

        if buy_indicator > sell_indicator:
            do_buy(get_price(i))
        else:
            do_sell(get_price(i))
            
        

    do_sell(get_price(len(seq_data)-1))
    model_trade_y.append(bal)
    print(f"FINAL BAL: {bal}")


main_loop()

ITER LEN: 38543


2022-02-03 03:06:59.733310: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8204


SEQ: 0	BAL: 10000
SEQ: 1	BAL: 10000.0
SEQ: 2	BAL: 10000.0
SEQ: 3	BAL: 10047.714121349532
SEQ: 4	BAL: 10047.714121349532
SEQ: 5	BAL: 10047.714121349532
SEQ: 6	BAL: 10076.738953985549
SEQ: 7	BAL: 10076.738953985549
SEQ: 8	BAL: 10076.738953985549
SEQ: 9	BAL: 10148.458306319453
SEQ: 10	BAL: 10148.458306319453
SEQ: 11	BAL: 10148.458306319453
SEQ: 12	BAL: 10148.458306319453
SEQ: 13	BAL: 10148.458306319453
SEQ: 14	BAL: 10148.458306319453
SEQ: 15	BAL: 10265.554589128602
SEQ: 16	BAL: 10265.554589128602
SEQ: 17	BAL: 10214.39848097542
SEQ: 18	BAL: 10214.39848097542
SEQ: 19	BAL: 10214.39848097542
SEQ: 20	BAL: 10214.39848097542
SEQ: 21	BAL: 10214.39848097542
SEQ: 22	BAL: 10849.711242923699
SEQ: 23	BAL: 10849.711242923699
SEQ: 24	BAL: 10849.711242923699
SEQ: 25	BAL: 10983.75539203732
SEQ: 26	BAL: 10983.75539203732
SEQ: 27	BAL: 10983.75539203732
SEQ: 28	BAL: 10983.75539203732
SEQ: 29	BAL: 10983.75539203732
SEQ: 30	BAL: 10983.75539203732
SEQ: 31	BAL: 10983.75539203732
SEQ: 32	BAL: 10983.75539203732
SE

In [9]:
plt.rcParams["figure.figsize"] = (15,15)
plt.plot(model_trade_x, model_trade_y)
plt.plot(model_trade_x, data["close"].tail(38568))

NameError: name 'model_trade_x' is not defined