In [257]:
from etr.auto_import import *

## Data
---

In [307]:
# replace yfinance data as crypt
data = pd.concat(
    [pd.read_parquet(file) for file in Path("../../data/gmo/BTC_JPY/").glob("*2020*")]
    + [pd.read_parquet(file) for file in Path("../../data/gmo/ETH_JPY/").glob("*2020*")]
#     + [pd.read_parquet(file) for file in Path("../../data/gmo/LTC_JPY/").glob("*2020*")]
)

### preprocess

In [311]:
%%time
freq = "10min"
ohlc = data.set_index("timestamp").groupby("symbol").resample(freq, label="right").price.ohlc()
v = data.set_index("timestamp").groupby(["symbol", "side"]).resample(freq, label="right")["size"].sum()

# Form OHLCV
ohlcv = ohlc.join(v.unstack(level=1))
ohlcv["volume"] = ohlcv.BUY + ohlcv.SELL
ohlcv["v_imbalance"] = ohlcv.BUY.fillna(0) - ohlcv.SELL.fillna(0)

CPU times: user 18.5 s, sys: 10.6 s, total: 29.1 s
Wall time: 49.7 s


In [346]:
# as timeseries
close = ohlcv.close.unstack(level=0).ffill()
cc = np.log(close.pct_change().add(1)).rename(lambda x: f"CC_{x[0]}", axis=1)
hl = np.log(ohlcv.high.unstack(level=0).div(ohlcv.low.unstack(level=0))).fillna(0).rename(lambda x: f"HL_{x[0]}", axis=1)
v_imb = ohlcv.v_imbalance.unstack(level=0).fillna(0).rename(lambda x: f"VI_{x[0]}", axis=1)
v_tot = ohlcv.volume.unstack(level=0).fillna(0).rename(lambda x: f"VT_{x[0]}", axis=1)

horizon = 6
label = np.log(close.shift(-horizon) / close)
label = label.sub(label.mean()).div(label.std()).clip(-4, +4)
features = pd.concat([cc, hl, v_imb, v_tot], axis=1)
features_norm = features.sub(features.rolling("7D").mean()).div(features.rolling("7D").std()).clip(-4, +4)
features_norm["W"] = features_norm.index.weekday
features_norm["H"] = features_norm.index.hour
dataset = label.join(features_norm).dropna()

In [347]:
X, y = dataset.iloc[:, label.shape[1]:], dataset.ETH_JPY

In [348]:
X.head(3)

symbol,CC_B,CC_E,HL_B,HL_E,VI_B,VI_E,VT_B,VT_E,W,H
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-12-31 22:10:00,0.960466,-0.707107,0.276149,1.211169,0.810523,-2.266048,0.54272,2.053838,1,22
2019-12-31 22:20:00,-1.102851,0.57735,-0.818994,-0.53515,-1.577404,0.337222,-0.065053,-0.492669,1,22
2019-12-31 22:30:00,1.52583,0.5,0.772385,-0.499531,-0.345608,0.31816,0.496215,-0.461112,1,22


In [349]:
y.head(3)

timestamp
2019-12-31 22:10:00    0.541024
2019-12-31 22:20:00    0.541024
2019-12-31 22:30:00    0.331412
Name: ETH_JPY, dtype: float64

## Train
---

In [351]:
from etr.research.mt_garch.lstm_transformer import *

In [360]:
model = LSTMTransformerWrapper(
    total_input_dim=X.shape[1],
    cat_index={-2: 7, -1: 24}, 
    seq_len=60, 
    activation=nn.Identity(), 
    criterion=nn.MSELoss(),
    lr=0.001,
)

In [361]:
TRAIN_ET = "2020-01-30 00:00"
model.fit_dataframe(
    X_df = X.loc[:TRAIN_ET],
    y_df = y.loc[:TRAIN_ET],
    val_split=0.2
)

Epoch [1/100] Train Loss: 0.5746 | Val Loss: 0.3609
Epoch [2/100] Train Loss: 0.5603 | Val Loss: 0.3592
Epoch [3/100] Train Loss: 0.5568 | Val Loss: 0.3590
Epoch [4/100] Train Loss: 0.5530 | Val Loss: 0.3613
Epoch [5/100] Train Loss: 0.5504 | Val Loss: 0.3632
Epoch [6/100] Train Loss: 0.5491 | Val Loss: 0.3689
Epoch [7/100] Train Loss: 0.5425 | Val Loss: 0.3697
Epoch [8/100] Train Loss: 0.5377 | Val Loss: 0.3797
Epoch [9/100] Train Loss: 0.5320 | Val Loss: 0.3704
Epoch [10/100] Train Loss: 0.5244 | Val Loss: 0.3709
Epoch [11/100] Train Loss: 0.5202 | Val Loss: 0.3721
Epoch [12/100] Train Loss: 0.5131 | Val Loss: 0.3767
Epoch [13/100] Train Loss: 0.5052 | Val Loss: 0.3964
Early stopping triggered.


In [362]:
pred = model.predict_dataframe(X.loc[:TRAIN_ET])