# FEDformer on 1-min BNB/USDT


In [13]:
import sys, pathlib, torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
from types import SimpleNamespace

# add FEDformer repo to path
repo_root = pathlib.Path("/Users/mchildress/Active Code/ts_basics/Modeling/Transformer/FEDformer")
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# import the FEDformer model
from FEDformer.models.FEDformer import Model as FEDformer


In [14]:
df = pd.read_csv(
    "/Users/mchildress/Active Code/ts_basics/data/bnbusdt_1m.csv",
    parse_dates=["timestamp"],
    index_col="timestamp"
)
series = df["close"].values.reshape(-1,1)
scaler = StandardScaler()
series_scaled = scaler.fit_transform(series).squeeze()


  df = pd.read_csv(


In [15]:
import pandas as pd
import numpy as np

# make sure the index is datetime
df.index = pd.to_datetime(df.index)

# now extract time‐features
ts = df.index  # this is now a DatetimeIndex
tf = np.stack([
    ts.month,
    ts.day,
    ts.dayofweek,
    ts.hour,
    ts.minute
], axis=1)  # shape (N, 5)

In [16]:
class TSData(Dataset):
    def __init__(self, arr, time_features, lookback, horizon):
        t  = torch.from_numpy(arr).float()           # (N,)
        tf = torch.from_numpy(time_features).long()  # (N, 5)

        win    = t.unfold(0, lookback + horizon, 1)      # (B, W)
        tf_win = tf.unfold(0, lookback + horizon, 1)     # (B, 5, W)

        # **insert this** to swap to (B, W, 5)
        tf_win = tf_win.permute(0, 2, 1)

        self.X      = win[:, :lookback].unsqueeze(-1)    # (B, L, 1)
        self.Y      = win[:, lookback:].unsqueeze(-1)    # (B, H, 1)
        self.X_mark = tf_win[:, :lookback, :].contiguous()  # now (B, lookback, 5)
        self.Y_mark = tf_win[:, lookback:, :].contiguous()  # now (B, horizon, 5)    

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.X_mark[i], self.Y[i], self.Y_mark[i]

In [17]:
# %%  Build Dataset & DataLoaders  -------------------------------------------
lookback = 720           # already defined earlier
horizon  = 96
batch_size = 32

# ── simple 80 / 20 chronological split ───────────────────────────────────────
split_idx   = int(len(series_scaled) * 0.8)
train_vals  = series_scaled[:split_idx]
train_tf    = tf[:split_idx]

val_vals    = series_scaled[split_idx - lookback - horizon + 1:]  # keep continuity
val_tf      = tf[split_idx - lookback - horizon + 1:]

train_ds = TSData(train_vals, train_tf, lookback, horizon)
val_ds   = TSData(val_vals,   val_tf,   lookback, horizon)

from torch.utils.data import DataLoader
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False)

In [18]:
device = (
    torch.device("mps") if torch.backends.mps.is_available()
    else torch.device("cuda") if torch.cuda.is_available()
    else torch.device("cpu")
)
fed_cfg = SimpleNamespace(
    version     = "Fourier",
    mode_select = "random",
    output_attention = False,
    seq_len     = lookback,
    label_len   = horizon,
    pred_len    = horizon,
    enc_in      = 1,
    dec_in      = 1,
    c_out       = 1,
    d_model     = 512,  # increased to be compatible with n_heads
    n_heads     = 8,    # changed to 8 heads
    e_layers    = 2,
    d_ff        = 2048, # increased to typical transformer ratio
    modes       = 64,
    dropout     = 0.1,
    factor      = 1,
    moving_avg  = 25,
    embed       = True,
    freq        = 't',
    activation  = 'relu',
    d_layers    = 2,
)
model     = FEDformer(fed_cfg).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()

fourier enhanced block used!
modes=64, index=[7, 13, 25, 34, 35, 36, 37, 40, 48, 50, 76, 82, 85, 87, 95, 98, 99, 100, 110, 132, 137, 140, 141, 145, 147, 148, 152, 155, 170, 177, 181, 182, 190, 193, 195, 215, 225, 226, 230, 241, 243, 252, 253, 260, 261, 264, 267, 269, 274, 278, 283, 285, 288, 289, 291, 317, 327, 329, 331, 333, 335, 347, 352, 359]
fourier enhanced block used!
modes=64, index=[0, 4, 5, 6, 16, 19, 21, 22, 24, 30, 32, 35, 41, 44, 50, 52, 57, 58, 62, 63, 66, 67, 69, 70, 77, 80, 84, 89, 91, 92, 97, 104, 109, 114, 115, 116, 119, 120, 124, 129, 137, 142, 144, 148, 153, 154, 155, 156, 160, 169, 170, 181, 182, 193, 195, 199, 201, 202, 205, 208, 219, 220, 222, 227]
 fourier enhanced cross attention used!
modes_q=64, index_q=[0, 3, 4, 6, 14, 20, 21, 29, 31, 35, 37, 38, 39, 41, 42, 44, 46, 47, 48, 56, 58, 59, 60, 62, 65, 66, 88, 90, 92, 98, 99, 103, 118, 119, 122, 125, 128, 130, 134, 138, 141, 143, 148, 150, 157, 166, 167, 176, 184, 187, 195, 196, 197, 198, 201, 203, 204, 205, 207, 

In [None]:
epochs = 10
for ep in range(epochs):
    model.train()
    total_loss = 0.0

    for X, X_mark, Y, Y_mark in train_loader:
        # Move to device
        X, X_mark, Y, Y_mark = [t.to(device) for t in (X, X_mark, Y, Y_mark)]
        B = X.size(0)

        # 1) Build decoder inputs
        dec_vals = torch.cat([
            X[:, -fed_cfg.label_len:, :],
            torch.zeros(B, fed_cfg.pred_len, 1, device=device)
        ], dim=1)      # (B, label_len+pred_len, 1)

        dec_tf = torch.cat([
            X_mark[:, -fed_cfg.label_len:, :],
            Y_mark
        ], dim=1)      # (B, label_len+pred_len, C_time)

        # 2) Forward through FEDformer
        # FEDformer expects: x_enc, x_mark_enc, x_dec, x_mark_dec
        optimizer.zero_grad()
        out = model(
            X,              # (B, seq_len, 1)
            X_mark,         # (B, seq_len, C_time)
            dec_vals,       # (B, label_len+pred_len, 1)
            dec_tf          # (B, label_len+pred_len, C_time)
        )
        preds = out[0] if isinstance(out, tuple) else out
        # preds shape: (B, pred_len, 1) or (B, label_len+pred_len, 1)
        # For FEDformer, output is already (B, pred_len, 1)
        # No need to slice as model handles this internally

        # 3) Compute loss
        target = Y    # Y is already (B, pred_len, 1)
        loss = criterion(preds, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {ep+1}/{epochs} — train MSE: {avg_loss:.5f}")

IndexError: index 97 is out of bounds for dimension 3 with size 97

In [None]:
model.eval()
val_loss = 0.0
with torch.no_grad():
    for X, Y in val_loader:
        X, Y = X.to(device), Y.to(device)
        x_dec    = torch.zeros(X.size(0), fed_cfg.label_len, X.size(2), device=device)
        x_mark_enc = torch.zeros(X.size(0), fed_cfg.seq_len, 5, device=device)
        x_mark_dec = torch.zeros(X.size(0), fed_cfg.label_len, 5, device=device)

        out = model(X, x_mark_enc, x_dec, x_mark_dec)
        preds = out[0] if isinstance(out, tuple) else out
        val_loss += criterion(preds, Y).item()

print(f"Validation MSE: {val_loss/len(val_loader):.5f}")