In [23]:
from ue.uexp.dataprocessing.processor_binance import BinanceProcessor
from ue.uexp.dataprocessing.func import *
from ue.uexp.models.util import *
from ta import add_all_ta_features
import tensorflow as tf
from tensorflow.keras import layers

import warnings
warnings.filterwarnings('ignore')

## Data + preprocessing

In [2]:
### get data (set config params)
ticker_list = ["BTCUSDT"]
start_date = "2021-12-30"
end_date = "2021-12-31"
time_interval = "1s"

p = BinanceProcessor("binance")
df = p.download_data(ticker_list, start_date, end_date, time_interval)

Found 1 symbols
[1/1] - start download daily BTCUSDT aggTrades 

file already exists! ./cache/tick_raw/BTCUSDT-aggTrades-2021-12-29.csv

file already exists! ./cache/tick_raw/BTCUSDT-aggTrades-2021-12-30.csv

file already exists! ./cache/tick_raw/BTCUSDT-aggTrades-2021-12-31.csv


In [3]:
df.drop(["tic"], axis=1, inplace=True)
df.columns = ["Open","High","Low","Close","Volume"]

In [4]:
df

Unnamed: 0,Open,High,Low,Close,Volume
2021-12-30 00:00:00,46464.66,46467.73,46464.65,46467.73,0.39143
2021-12-30 00:00:01,46467.73,46471.83,46460.24,46464.33,4.92520
2021-12-30 00:00:02,46461.29,46461.29,46456.02,46456.03,1.05925
2021-12-30 00:00:03,46456.02,46456.03,46456.02,46456.03,0.03054
2021-12-30 00:00:04,46456.02,46456.03,46456.02,46456.02,0.50262
...,...,...,...,...,...
2021-12-31 23:59:55,46216.94,46216.94,46216.93,46216.94,0.01090
2021-12-31 23:59:56,46216.94,46216.94,46216.93,46216.93,0.15614
2021-12-31 23:59:57,46216.94,46216.94,46200.00,46200.01,4.42605
2021-12-31 23:59:58,46200.02,46200.05,46200.00,46200.05,0.14667


In [5]:
df_feats0 = gen_feats0(df)
df_feats0

Unnamed: 0,Open,High,Low,Close,Volume,spread,upper_Shadow,lower_Shadow,trade
2021-12-30 00:00:00,46464.66,46467.73,46464.65,46467.73,0.39143,3.08,0.00,0.01,3.07
2021-12-30 00:00:01,46467.73,46471.83,46460.24,46464.33,4.92520,11.59,4.10,4.09,-3.40
2021-12-30 00:00:02,46461.29,46461.29,46456.02,46456.03,1.05925,5.27,0.00,0.01,-5.26
2021-12-30 00:00:03,46456.02,46456.03,46456.02,46456.03,0.03054,0.01,0.00,0.00,0.01
2021-12-30 00:00:04,46456.02,46456.03,46456.02,46456.02,0.50262,0.01,0.01,0.00,0.00
...,...,...,...,...,...,...,...,...,...
2021-12-31 23:59:55,46216.94,46216.94,46216.93,46216.94,0.01090,0.01,0.00,0.01,0.00
2021-12-31 23:59:56,46216.94,46216.94,46216.93,46216.93,0.15614,0.01,0.00,0.00,-0.01
2021-12-31 23:59:57,46216.94,46216.94,46200.00,46200.01,4.42605,16.94,0.00,0.01,-16.93
2021-12-31 23:59:58,46200.02,46200.05,46200.00,46200.05,0.14667,0.05,0.00,0.02,0.03


In [6]:
# df_feats2 = add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume")
# df_feats2.head()

## Model Parameters

In [7]:
HORIZON = 1
WINDOW = 30

## Make Windowed dataset (using dt_feats1)

In [9]:
# Make a copy of the Bitcoin historical data with block reward feature
df0 = df_feats0.copy()
df0.head()

Unnamed: 0,Open,High,Low,Close,Volume,spread,upper_Shadow,lower_Shadow,trade
2021-12-30 00:00:00,46464.66,46467.73,46464.65,46467.73,0.39143,3.08,0.0,0.01,3.07
2021-12-30 00:00:01,46467.73,46471.83,46460.24,46464.33,4.9252,11.59,4.1,4.09,-3.4
2021-12-30 00:00:02,46461.29,46461.29,46456.02,46456.03,1.05925,5.27,0.0,0.01,-5.26
2021-12-30 00:00:03,46456.02,46456.03,46456.02,46456.03,0.03054,0.01,0.0,0.0,0.01
2021-12-30 00:00:04,46456.02,46456.03,46456.02,46456.02,0.50262,0.01,0.01,0.0,0.0


In [14]:
# Add windowed columns
for i in range(WINDOW): # Shift values for each step in WINDOW_SIZE
  df0[f"Price+{i+1}"] = df0["Close"].shift(periods=i+1)
df0.head(5)

Unnamed: 0,Open,High,Low,Close,Volume,spread,upper_Shadow,lower_Shadow,trade,Price+1,...,Price+21,Price+22,Price+23,Price+24,Price+25,Price+26,Price+27,Price+28,Price+29,Price+30
2021-12-30 00:00:00,46464.66,46467.73,46464.65,46467.73,0.39143,3.08,0.0,0.01,3.07,,...,,,,,,,,,,
2021-12-30 00:00:01,46467.73,46471.83,46460.24,46464.33,4.9252,11.59,4.1,4.09,-3.4,46467.73,...,,,,,,,,,,
2021-12-30 00:00:02,46461.29,46461.29,46456.02,46456.03,1.05925,5.27,0.0,0.01,-5.26,46464.33,...,,,,,,,,,,
2021-12-30 00:00:03,46456.02,46456.03,46456.02,46456.03,0.03054,0.01,0.0,0.0,0.01,46456.03,...,,,,,,,,,,
2021-12-30 00:00:04,46456.02,46456.03,46456.02,46456.02,0.50262,0.01,0.01,0.0,0.0,46456.03,...,,,,,,,,,,


In [15]:
print("using [price_0, ..., price_29, volume, spread, log_price_change] to predict -> price_30")

using [price_0, ..., price_29, volume, spread, log_price_change] to predict -> price_30


In [16]:
X = df0.dropna().drop(["Close","Open","High","Low"], axis=1).astype(np.float32) 
X.head()

Unnamed: 0,Volume,spread,upper_Shadow,lower_Shadow,trade,Price+1,Price+2,Price+3,Price+4,Price+5,...,Price+21,Price+22,Price+23,Price+24,Price+25,Price+26,Price+27,Price+28,Price+29,Price+30
2021-12-30 00:00:30,0.33023,1.99,0.0,0.01,1.98,46489.339844,46494.988281,46494.980469,46494.980469,46496.320312,...,46456.019531,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.03125,46456.03125,46464.328125,46467.730469
2021-12-30 00:00:31,0.50273,2.66,2.65,0.0,-0.01,46491.320312,46489.339844,46494.988281,46494.980469,46494.980469,...,46456.019531,46456.019531,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.03125,46456.03125,46464.328125
2021-12-30 00:00:32,0.16562,0.01,0.0,0.0,0.01,46491.308594,46491.320312,46489.339844,46494.988281,46494.980469,...,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.03125,46456.03125
2021-12-30 00:00:33,0.16494,2.64,2.55,0.0,0.09,46491.320312,46491.308594,46491.320312,46489.339844,46494.988281,...,46456.03125,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.03125
2021-12-30 00:00:34,0.29916,0.04,0.0,0.0,0.04,46491.410156,46491.320312,46491.308594,46491.320312,46489.339844,...,46450.390625,46456.03125,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531,46456.019531,46456.03125,46456.019531


In [18]:
y = df0.dropna()["Close"].astype(np.float32)
y.head()

2021-12-30 00:00:30    46491.320312
2021-12-30 00:00:31    46491.308594
2021-12-30 00:00:32    46491.320312
2021-12-30 00:00:33    46491.410156
2021-12-30 00:00:34    46492.468750
Freq: S, Name: Close, dtype: float32

### Train test split (80%)

In [20]:
split_size = int(len(X) * 0.8)
X_train, y_train = X[:split_size], y[:split_size]
X_test, y_test = X[split_size:], y[split_size:]
len(X_train), len(y_train), len(X_test), len(y_test)

(138216, 138216, 34554, 34554)

## Model building and fitting

In [26]:
# Make multivariate time series model
model_dense_multivariate = tf.keras.Sequential([
  layers.Dense(128, activation="relu"),
  layers.Dense(128, activation="relu"), # adding an extra layer here should lead to beating the naive model
  layers.Dense(HORIZON)
], name="model_dense_multivariate")

# Compile
model_dense_multivariate.compile(loss="mae",
                optimizer=tf.keras.optimizers.Adam())

# Fit
model_dense_multivariate.fit(X_train, y_train,
            epochs=100,
            batch_size=128,
            verbose=0, # only print 1 line per epoch
            validation_data=(X_test, y_test),
            callbacks=[create_model_checkpoint(model_name=model_dense_multivariate.name)])

2022-02-03 13:45:56.503639: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-02-03 13:46:00.668727: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets
INFO:tensorflow:Assets written to: model_experiments/model_dense_multivariate/assets


<keras.callbacks.History at 0x2ccf23970>

## Evaluating model
should be the %age off of predicting the horizon price

In [27]:
# Make sure best model is loaded and evaluate
model_6 = tf.keras.models.load_model("model_experiments/model_dense_multivariate")
loss = model_6.evaluate(X_test, y_test)
print("loss: ", loss)

  60/1080 [>.............................] - ETA: 2s - loss: 37.0821

2022-02-03 13:55:14.005448: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




39.17752456665039

In [35]:
# percentage
y_pred = model_6.predict(X_test)

In [None]:
# Evaluate preds
model_6_results = evaluate_preds(y_true=y_test,
                                 y_pred=y_pred)
model_6_results