### Transformer for prediction 
 The goal is to predict whether the next X(x=6)-minute close of BAP will be higher than the current close, and to feel first-hand how self-attention:

**Puntos clave en Transformers:**

- **B, L, D**
- Los más importantes:
  - **L** es la longitud de secuencia
  - **D** es el tamaño del embedding o los features que tenga el caso de uso

---

- **L** para este ejemplo es el tamaño de la secuencia de intervalos diarios,  
  en el ejemplo se toman diferentes secuencias de forma aleatoria.
- **D** es el número de features: price closed, volumen, etc.

---

**En el dataset tiene la forma `29x49x5`:**

- 29 es el número de días útiles de trading
- 49 es la cantidad de intervalos diarios (bars),  
  o sea, número máximo de intervalos en un día.  
  Para el entrenamiento se toma por ejemplo una secuencia fija de 32  
  pero se van extrayendo de forma aleatoria de los 49
  (considere que para este caso cada bar equivale a 2min)
- 5 cantidad

In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import math
import matplotlib.pyplot as plt
import yfinance as yf, pandas as pd, torch, numpy as np

In [43]:
class SelfAttention(nn.Module):

    def __init__(self, d: int):
        super().__init__()
        self.q = nn.Linear(d, d, bias=False)
        self.k = nn.Linear(d, d, bias=False)
        self.v = nn.Linear(d, d, bias=False)
        self.scale = math.sqrt(d)

    def forward(self, x):
        Q = self.q(x)
        K = self.q(x)
        V = self.q(x)
        score = Q @ K.transpose(-2, -1) / self.scale  # BxLxL
        attn_w = F.softmax(score, dim=-1)
        context = attn_w @ V
        return context, attn_w

class FeedForward(nn.Module):

    def __init__(self, d: int, f: int):
        super().__init__()
        self.l1 = nn.Linear(d, f)
        self.l2 = nn.Linear(f, d)

    def forward(self, x):
        return self.l2(F.gelu(self.l1(x)))

In [44]:
class EncoderLayer(nn.Module):

    def __init__(self, d: int, d_f: int):
        super().__init__()
        self.attn = SelfAttention(d)
        self.norm1 = nn.LayerNorm(d)
        self.ff = FeedForward(d, d_f)
        self.norm2 = nn.LayerNorm(d)

    def forward(self, x, return_attention=False):
        context, atten = self.attn(x)
        x = self.norm1(x + context)
        x = self.norm2(x + self.ff(x))
        
        return (x, atten if return_attention else x)

In [45]:
class FinanceTransformer(nn.Module):

    def __init__(self, in_dim=5, d=32, d_f=64):
        super().__init__()
        self.proj = nn.Linear(in_dim, d)
        self.enc = EncoderLayer(d, d_f)
        self.cls = nn.Linear(d, 1)

    def forward(self, x, return_attention=False):
        x = self.proj(x)
        x, attn = self.enc(x, return_attention=True)
        logits = self.cls(x).squeeze(-1)

        return (logits, attn) if return_attention else logits

In [46]:
TICKER   = "BAP"
PERIOD   = "60d"
INTERVAL = "2m"
df = yf.download(TICKER, period=PERIOD, interval=INTERVAL, progress=False)
df = df.between_time("09:30", "16:00")  # Regular trading hours

  df = yf.download(TICKER, period=PERIOD, interval=INTERVAL, progress=False)


In [47]:
df["label"] = (df["Close"].shift(-3) > df["Close"]).astype(int)
df.dropna(inplace=True)

In [62]:
df

Price,Close,High,Low,Open,Volume,label
Ticker,BAP,BAP,BAP,BAP,BAP,Unnamed: 6_level_1
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2025-06-06 13:30:00+00:00,217.270004,217.270004,217.270004,217.270004,3010,1
2025-06-06 13:36:00+00:00,217.979996,217.979996,217.979996,217.979996,2035,0
2025-06-06 13:38:00+00:00,217.889999,217.889999,217.889999,217.889999,135,0
2025-06-06 13:40:00+00:00,218.210007,218.210007,218.210007,218.210007,662,0
2025-06-06 13:46:00+00:00,216.089996,217.154999,216.089996,217.154999,863,1
...,...,...,...,...,...,...
2025-07-22 15:46:00+00:00,228.860001,228.880005,228.479996,228.500000,25563,1
2025-07-22 15:48:00+00:00,228.490005,228.860001,228.490005,228.860001,14219,1
2025-07-22 15:50:00+00:00,228.690002,228.690002,228.675003,228.675003,569,0
2025-07-22 15:54:00+00:00,228.889999,228.889999,228.750000,228.750000,1692,0


In [48]:
# numero real de dias de trading
steps_per_day = int(len(df) / len(df.index.normalize().unique()))
n_days = len(df) // steps_per_day

In [49]:
# Features: OHLCV
cols = ["Open", "High", "Low", "Close", "Volume"]
X1 = df[cols].values[:n_days * steps_per_day].reshape(n_days, steps_per_day, len(cols))
y = df["label"].values[:n_days * steps_per_day].reshape(n_days, steps_per_day)

In [50]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X1.reshape(-1, len(cols))).reshape(X1.shape)

In [51]:
X[-1, -1, :]

array([ 1.78244663,  1.90998904,  1.81771779,  1.78653107, 13.55621035])

In [52]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = FinanceTransformer().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.BCEWithLogitsLoss()

In [53]:
# usamos un tamaño de sequencia de 32
# sin embargo el tamaño maximo puede ser el numero de pasos por dia 
seq_len = 32  

for epoch in range(1):
    epoch_loss = 0
    for i in range(n_days):
        # Pick random window of length seq_len
        k = np.random.randint(0, steps_per_day - seq_len - 3)
        xb = torch.tensor(X[i:i+1, k:k+seq_len, :], dtype=torch.float32).to(device)
        yb = torch.tensor(y[i:i+1, k:k+seq_len], dtype=torch.float32).to(device)
        logits, _ = model(xb, return_attention=True)
        loss = loss_fn(logits, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1} loss: {epoch_loss / n_days:.4f}")

Epoch 1 loss: 0.7144


In [61]:
model.eval()
x1 = X[-2:-1, :, :]
x1 = torch.tensor(x1, dtype=torch.float32)
print(x1.shape)

with torch.no_grad():
    pred = model(x1)
    print('prediction is ', pred)


torch.Size([1, 49, 5])
prediction is  tensor([[-0.3402, -0.3046, -0.3002, -0.3091, -0.3337, -0.3375, -0.2988, -0.2964,
         -0.3076, -0.3053, -0.3065, -0.3114, -0.3055, -0.3346, -0.3077, -0.2988,
         -0.2915, -0.2920, -0.2915, -0.2977, -0.2612, -0.3110, -0.3100, -0.3730,
         -0.3062, -0.3314, -0.3578, -0.3215, -0.3346, -0.3199, -0.3184, -0.3212,
         -0.3448, -0.3634, -0.3574, -0.3378, -0.3349, -0.3366, -0.3314, -0.3324,
         -0.3452, -0.3396, -0.3332, -0.3700, -0.3300, -0.3296, -0.3568, -0.3486,
         -0.3331]])
