In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# 1. Preparar os dados
df = pd.read_csv('datasets/PETR4_raw.csv', parse_dates=['Unnamed: 0'])
df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
df.sort_values('Date', inplace=True)

# 2. Criar target
df['target'] = df['close'].shift(-252) / df['close'] - 1
features = ['open', 'high', 'low', 'close', 'volume']
n_days = 30

X, y = [], []

for i in range(n_days, len(df) - 252):
    window = df.iloc[i - n_days:i][features].values
    target = df.iloc[i]['target']
    if not np.isnan(window).any() and not np.isnan(target):
        X.append(window)
        y.append(target)

X = np.array(X)  # shape: (N, 30, 5)
y = np.array(y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

# Converter para tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

In [3]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim=5, d_model=64, nhead=4, num_layers=2):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.regressor = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.input_proj(x)  # (B, 30, d_model)
        x = self.transformer(x)  # (B, 30, d_model)
        x = x.mean(dim=1)  # média ao longo da sequência
        return self.regressor(x)

# Instanciar modelo
model = TimeSeriesTransformer()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [4]:
# 4. Treinar
for epoch in range(30):
    model.train()
    pred = model(X_train)
    loss = loss_fn(pred, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 5 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")

Epoch 0 | Loss: 0.9414
Epoch 5 | Loss: 0.5037
Epoch 10 | Loss: 0.1924
Epoch 15 | Loss: 0.2685
Epoch 20 | Loss: 0.2077
Epoch 25 | Loss: 0.2126


In [6]:
# 5. Avaliar
model.eval()
with torch.no_grad():
    y_pred = model(X_test).squeeze().numpy()
    y_true = y_test.squeeze().numpy()

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)

print(f"\nRMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")


RMSE: 0.2797
R²: -0.0463
