# Import require modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from keras.models import Sequential
from keras.layers import GRU, Dense, Dropout, RepeatVector, TimeDistributed
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import gc

# Load Data

In [2]:
rdata = pd.read_csv('./Data/btcusd_1-min_data.csv')
rdata['Datetime'] = pd.to_datetime(rdata['Timestamp'], unit='s')
rdata.set_index('Datetime', inplace=True)
# Resample to 5-min bars
data_5m = rdata.resample('5T').agg({
    'Open' : 'first', 'High' : 'max', 'Low' : 'min', 'Close' : 'last', 'Volume' : 'sum'
}).dropna()
# Add log returns (target)
data_5m['LogRet'] = np.log(data_5m['Close']).diff()
data_5m.dropna(inplace=True)

  data_5m = rdata.resample('5T').agg({


# Sliding Windows

In [3]:
def create_sliding_windows(data, win_size, horizon):
    """
    Build sliding windows for sequence-to-sequence forecasting.

    Args:
        data (np.ndarray): Shape (N, num_features)
        win_size (int): Input length (e.g., 3 days = 864 steps)
        horizon (int): Output length (e.g., 289 steps = 1 day)

    Returns:
        Tuple[np.ndarray, np.ndarray]:
            X of shape (num_samples, win_size, num_features)
            y of shape (num_samples, horizon) where target is column 0 (LogRet)
    """
    X, y = [], []
    # iterate start index of each sample
    for start_idx in range(0, len(data) - win_size - horizon + 1):
        in_window = data[start_idx:start_idx + win_size]
        out_window = data[start_idx + win_size:start_idx + win_size + horizon, 0]  # target = LogRet
        X.append(in_window)
        y.append(out_window)
    return np.array(X), np.array(y)

# Features: LogRet + Volume + (optional OHCL)
features = ['LogRet', 'Volume']
data = data_5m[features].values

win_size = 865  # 3 days history
horizon = 289   # 1 day ahead

X, y = create_sliding_windows(data, win_size, horizon)

# Train / Validation / Test Split

In [4]:
train_size = int(len(X) * 0.8)
val_size = int(len(X) * 0.1)

x_train, y_train = X[:train_size], y[:train_size]
x_val, y_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size]

# Reshape for GRU [samples, timesteps, features]
print("Train shape:", x_train.shape, y_train.shape)

Train shape: (1151256, 865, 2) (1151256, 289)


# Build Seq2Seq GRU Model

In [5]:
model = Sequential([
    GRU(256, return_sequences=True, input_shape=(win_size, X.shape[2])),
    Dropout(0.2),
    GRU(128),
    Dropout(0.2),
    RepeatVector(horizon),
    GRU(128, return_sequences=True),
    Dropout(0.2),
    GRU(64, return_sequences=True),
    TimeDistributed(Dense(1))
])

model.compile(optimizer='adam', loss='mse')
model.summary()

  super().__init__(**kwargs)


# Train

In [6]:
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=5)
]

history = model.fit(x_train, y_train.reshape(-1, horizon, 1),
                    validation_data=(x_val, y_val.reshape(-1, horizon, 1)),
                    epochs=100,
                    batch_size=256,
                    callbacks=callbacks,
                    verbose=1
                    )


Epoch 1/100
[1m 175/4498[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:22:44[0m 7s/step - loss: 0.0820

KeyboardInterrupt: 

# Evaluate