In [4]:
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import T5ForConditionalGeneration

# Crear un DataFrame de ejemplo
data = {'FECHA': pd.date_range(start='1/1/2020', periods=100),
        'DIA': np.random.randint(1, 32, 100),
        'MES': np.random.randint(1, 13, 100),
        'ANO': np.full(100, 2020),
        'SEMANA': np.random.randint(1, 53, 100),
        'DIASEMANA': np.random.randint(1, 8, 100),
        'CANTIDAD': np.random.randint(1, 100, 100)}

df = pd.DataFrame(data)

# Normalizar las columnas numéricas utilizando MinMaxScaler
scaler = MinMaxScaler()
scaled_values = scaler.fit_transform(df.drop(columns=['FECHA', 'CANTIDAD']))

# Crear las secuencias de tamaño fijo (por ejemplo, longitud 5)
sequence_length = 5
X = []
y = []

for i in range(len(df) - sequence_length):
    X.append(scaled_values[i:i+sequence_length])
    y.append(df['CANTIDAD'].iloc[i+sequence_length])

X = np.array(X)
y = np.array(y)

# Crear un conjunto de datos personalizado para PyTorch
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.float32)

dataset = TimeSeriesDataset(X, y)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)


class CustomTransformer(nn.Module):
    def __init__(self, d_model, nhead, num_layers, dim_feedforward):
        super().__init__()
        self.transformer = nn.Transformer(d_model, nhead, num_layers, dim_feedforward)
        self.linear = nn.Linear(d_model, 1)

    def forward(self, x):
        # Crear una máscara de atención triangular inferior para asegurar que las posiciones futuras no afecten las predicciones
        attn_mask = torch.triu(torch.ones(x.size(0), x.size(0)), diagonal=1).bool().to(x.device)
        
        # Proporcionar el mismo tensor para las entradas de origen y destino
        x = self.transformer(x, x,tgt_mask=attn_mask)
        x = self.linear(x[:, -1])
        return x

# Hiperparámetros del modelo
d_model = 5  # Cambiar esto para que coincida con la cantidad de características en tus datos
nhead = 5
num_layers = 3
dim_feedforward = 256

# Crear el modelo
model = CustomTransformer(d_model, nhead, num_layers, dim_feedforward)

# Define your execution device 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
print("The model will be running on", device, "device\n") 
model.to(device)    # Convert model parameters and buffers to CPU or Cuda

The model will be running on cuda:0 device



CustomTransformer(
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-2): 3 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=5, out_features=5, bias=True)
          )
          (linear1): Linear(in_features=5, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=5, bias=True)
          (norm1): LayerNorm((5,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((5,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
      (norm): LayerNorm((5,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
        (0-255): 256 x TransformerDecoderLayer(
          (self_attn): MultiheadAttention(
    

In [7]:
# Entrenar el modelo con tus datos (necesitarás ajustar esto a tu problema específico)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 100

for epoch in range(num_epochs):
    for batch_X, batch_y in dataloader:
        # Asegurar que los datos tienen la forma correcta (S, N, E)
        # batch_X = batch_X.transpose(0, 1)
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        # Hacer predicciones con los datos de entrada
        predictions = model(batch_X).squeeze(-1)
        
        # Calcular la pérdida
        loss = criterion(predictions, batch_y)
        
        # Optimizar el modelo
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [8]:
loss

tensor(1627.7230, device='cuda:0', grad_fn=<MseLossBackward0>)

In [9]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)