In [1]:
import numpy as np
import pandas as pd



# Datos de prueba
data = {
    "uid": [0] * 20 + [1] * 20,  # Usuarios: 0, 1
    "d": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9] * 2,
    "t": [0, 1, 12, 13, 24, 25, 36, 37, 41, 47, 0, 1, 12, 13, 24, 25, 36, 37, 41, 47]
    * 2,
    "x": range(40),
    "y": range(40, 80),
}


# Configuración inicial
# num_users = 3
# num_pings = 40

# data = {
#     "uid": np.repeat(np.arange(num_users), num_pings),  # Usuarios: 0, 1, 2
#     "d": np.repeat(np.arange(num_pings) // 5, num_users),  # Días crecientes por cada 5 pings
#     "t": np.tile(np.arange(num_pings) % 48, num_users),  # Timeslots crecientes en ciclos de 48
#     "x": np.random.randint(0, 200, num_users * num_pings),  # Coordenadas X aleatorias
#     "y": np.random.randint(0, 200, num_users * num_pings),  # Coordenadas Y aleatorias
# }



# Crear DataFrame



df = pd.DataFrame(data)



print(df.head())

   uid  d   t  x   y
0    0  0   0  0  40
1    0  0   1  1  41
2    0  1  12  2  42
3    0  1  13  3  43
4    0  2  24  4  44


In [2]:
df["t_unificado"] = df["d"] * 48 + df["t"]
df = df.sort_values(by=["uid", "t_unificado"]).reset_index(drop=True)

display(df)

Unnamed: 0,uid,d,t,x,y,t_unificado
0,0,0,0,0,40,0
1,0,0,1,1,41,1
2,0,1,12,2,42,60
3,0,1,13,3,43,61
4,0,2,24,4,44,120
5,0,2,25,5,45,121
6,0,3,36,6,46,180
7,0,3,37,7,47,181
8,0,4,41,8,48,233
9,0,4,47,9,49,239


In [11]:
from scipy.ndimage import gaussian_filter
import numpy as np
import numpy as np
from scipy.ndimage import gaussian_filter
import torch


def create_tensor_from_data(data, grid_size=200):
    """
    Convierte un subconjunto de datos recientes en un tensor compatible con el modelo.

    Args:
        data (pd.DataFrame): DataFrame con los datos recientes del usuario.
                             Debe contener columnas ['x', 'y', 'd', 't'].
        grid_size (int): Tamaño del plano espacial (default: 200x200).

    Returns:
        torch.Tensor: Tensor con forma (1, N, 3, grid_size, grid_size).
    """
    # Número de timeslots en los datos recientes
    seq_len = len(data)

    # Crear el tensor vacío
    tensor = np.zeros(
        (seq_len, 3, grid_size, grid_size)
    )  # (N, 3, grid_size, grid_size)

    # Iterar por cada timeslot en los datos recientes
    for j, row in enumerate(data.itertuples(index=False)):
        x, y, d, t = row.x, row.y, row.d, row.t

        # Canal 1: Representación gaussiana
        gaussian_layer = np.zeros((grid_size, grid_size))
        gaussian_layer[x, y] = 1
        gaussian_layer = gaussian_filter(gaussian_layer, sigma=2)

        # Canal 2: Día de la semana
        day_layer = np.zeros((grid_size, grid_size))
        day_layer[x, y] = d % 7 + 1

        # Canal 3: Timeslot
        timeslot_layer = np.zeros((grid_size, grid_size))
        timeslot_layer[x, y] = t

        # Asignar al tensor
        tensor[j, 0] = gaussian_layer
        tensor[j, 1] = day_layer
        tensor[j, 2] = timeslot_layer

    # Convertir a tensor de PyTorch y añadir la dimensión batch
    return torch.tensor(tensor, dtype=torch.float32).unsqueeze(0)


def generate_sequences(data, grid_size=200):
    sequences = []
    timeslots = []
    coordinates = []
    user_ids = []

    for user_id, group in data.groupby("uid"):
        group = group.sort_values(by="t_unificado").reset_index(drop=True)

        # Generar secuencias
        for i in range(len(group) - 5):  # Tamaño fijo de 5
            # Crear ventana de datos recientes
            recent_data = group.iloc[i : i + 5]  # Subconjunto de 5 filas

            # Usar create_tensor_from_data para construir el tensor
            tensor = (
                create_tensor_from_data(recent_data, grid_size=grid_size)
                .squeeze(0)
                .numpy()
            )

            # Target: El siguiente timeslot y coordenadas
            t_target = group["t_unificado"].iloc[i + 5]
            coord_target = [group["x"].iloc[i + 5], group["y"].iloc[i + 5]]

            # Guardar resultados
            sequences.append(tensor)
            timeslots.append(t_target)
            coordinates.append(coord_target)
            user_ids.append(user_id)

    return (
        np.array(sequences),
        np.array(timeslots),
        np.array(coordinates),
        np.array(user_ids),
    )


# Generar datos
sequences, timeslots, coordinates, user_ids = generate_sequences(df)
# display(sequences.shape, timeslots.shape, coordinates.shape, user_ids.shape)
# max_position = np.unravel_index(
#     np.argmax(sequences[5][0][0], axis=None), sequences[5][0][0].shape
# )
# display(max_position)
display(sequences[1][0][0].max())
# display(sequences[1][0][0][0][41])
# display(timeslots)
# display(coordinates)
# display(user_ids)

np.float32(0.059248608)

In [12]:
import torch


class LSTMDataset(torch.utils.data.Dataset):
    def __init__(self, sequences, timeslots, coordinates, user_ids):
        """
        sequences: Tensor con las secuencias temporales (N, 3, 200, 200)
        timeslots: Secuencia de targets para los timeslots predichos
        coordinates: Secuencia de targets para las coordenadas predichas (x, y)
        user_ids: Lista de IDs de usuarios
        """
        self.sequences = torch.tensor(sequences, dtype=torch.float32)
        self.timeslots = torch.tensor(timeslots, dtype=torch.float32)
        self.coordinates = torch.tensor(coordinates, dtype=torch.float32)
        self.user_ids = torch.tensor(user_ids, dtype=torch.long)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return (
            self.sequences[idx],  # Tensor espacial
            self.timeslots[idx],  # Target timeslot unificado
            self.coordinates[idx],  # Target (x, y)
            self.user_ids[idx],  # User ID
        )

In [13]:
from torch.utils.data import DataLoader, random_split

# Crear el Dataset
dataset = LSTMDataset(sequences, timeslots, coordinates, user_ids)

# Dividir el dataset en entrenamiento, validación y prueba
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset = torch.utils.data.Subset(dataset, range(0, train_size))
val_dataset = torch.utils.data.Subset(dataset, range(train_size, train_size + val_size))
test_dataset = torch.utils.data.Subset(
    dataset, range(train_size + val_size, len(dataset))
)

print(len(train_dataset), len(val_dataset), len(test_dataset))
# Configurar los DataLoaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

21 4 5


In [14]:
# Verificar la estructura de un batch
for batch in train_loader:
    sequence_batch, timeslot_batch, coordinate_batch, user_id_batch = batch

    # Imprimir dimensiones
    print("Secuencia Espacial:", sequence_batch.shape)  # (batch_size, 5, 3, 200, 200)
    print("Timeslots Target:", timeslot_batch.shape)  # (batch_size,)
    print("Coordenadas Target:", coordinate_batch.shape)  # (batch_size, 2)
    print("IDs de Usuario:", user_id_batch.shape)  # (batch_size,)
    # print("\nEjemplo de Secuencia Espacial (primer elemento del batch):")
    # print(sequence_batch[0])  # Imprime la primera secuencia

    # print("\nEjemplo de Timeslot Target (primer elemento del batch):")
    # print(timeslot_batch[0])  # Imprime el primer timeslot target

    # print("\nEjemplo de Coordenadas Target (primer elemento del batch):")
    # print(coordinate_batch[0])  # Imprime las coordenadas target (x, y)

    # print("\nEjemplo de ID de Usuario (primer elemento del batch):")
    print(user_id_batch[0])  # Imprime el ID del usuario
    break
    # print("\n=====================================\n")

Secuencia Espacial: torch.Size([16, 5, 3, 200, 200])
Timeslots Target: torch.Size([16])
Coordenadas Target: torch.Size([16, 2])
IDs de Usuario: torch.Size([16])
tensor(0)


In [15]:
import torch
import torch.nn as nn
import torch
import torch.nn as nn


class Model(nn.Module):
    def __init__(
        self, num_users, embedding_dim=16, cnn_output_dim=64, lstm_hidden_dim=128
    ):
        super(Model, self).__init__()

        # Embedding para usuarios
        self.user_embedding = nn.Embedding(num_users, embedding_dim)

        # Red convolucional (procesa los 3 canales espaciales)
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(32 * 50 * 50, cnn_output_dim),
        )

        # LSTM para capturar patrones temporales
        self.lstm = nn.LSTM(
            input_size=cnn_output_dim + embedding_dim,
            hidden_size=lstm_hidden_dim,
            batch_first=True,
        )

        # Capa para predecir el siguiente timeslot
        self.fc_timeslot = nn.Linear(lstm_hidden_dim, 1)

        # Capa para predecir las coordenadas (x, y)
        self.fc_coordinates = nn.Linear(lstm_hidden_dim, 2)

    def forward(self, x, user_ids):
        batch_size, seq_len, _, _, _ = x.size()

        # Procesar mapas espaciales con CNN
        cnn_features = []
        for t in range(seq_len):
            cnn_out = self.cnn(x[:, t, :, :, :])  # Procesar cada timeslot
            cnn_features.append(cnn_out)

        cnn_features = torch.stack(
            cnn_features, dim=1
        )  # (batch_size, seq_len, cnn_output_dim)

        # Embedding de usuarios
        user_embeds = self.user_embedding(user_ids).unsqueeze(1).repeat(1, seq_len, 1)

        # Concatenar características de CNN y embeddings
        lstm_input = torch.cat((cnn_features, user_embeds), dim=2)

        # Pasar por LSTM
        lstm_out, _ = self.lstm(lstm_input)

        # Predicción del último timeslot
        timeslot_pred = self.fc_timeslot(lstm_out[:, -1, :]).squeeze(-1)

        # Predicción de las coordenadas (x, y)
        coord_pred = self.fc_coordinates(lstm_out[:, -1, :])

        return timeslot_pred, coord_pred

In [16]:
# Configurar el dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Dispositivo utilizado: {device}")

Dispositivo utilizado: cpu


In [17]:
def loss_function(timeslot_pred, timeslot_true, coord_pred, coord_true):
    timeslot_loss = nn.MSELoss()(timeslot_pred, timeslot_true)
    coord_loss = nn.MSELoss()(coord_pred, coord_true)
    return timeslot_loss + coord_loss



In [18]:
# Inicializar el modelo
num_users = len(df["uid"].unique())  # Número de usuarios únicos en los datos
model = Model(num_users=num_users).to(device)

# Configurar el optimizador
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [19]:
# Entrenamiento del modelo
num_epochs = 1500
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch in train_loader:
        sequence_batch, timeslot_batch, coordinate_batch, user_id_batch = batch
        sequence_batch = sequence_batch.to(device)
        timeslot_batch = timeslot_batch.to(device)
        coordinate_batch = coordinate_batch.to(device)
        user_id_batch = user_id_batch.to(device)

        # Forward pass
        optimizer.zero_grad()
        timeslot_pred, coord_pred = model(sequence_batch, user_id_batch)

        # Calcular pérdida
        loss = loss_function(
            timeslot_pred, timeslot_batch, coord_pred, coordinate_batch
        )

        # Backward y optimización
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

Epoch 1/1500, Loss: 74371.7754
Epoch 2/1500, Loss: 73709.4922
Epoch 3/1500, Loss: 73162.6895
Epoch 4/1500, Loss: 72657.4805
Epoch 5/1500, Loss: 72169.7637
Epoch 6/1500, Loss: 71770.4551
Epoch 7/1500, Loss: 71422.9043
Epoch 8/1500, Loss: 71141.3320
Epoch 9/1500, Loss: 70897.5469
Epoch 10/1500, Loss: 70687.0273
Epoch 11/1500, Loss: 70515.2832
Epoch 12/1500, Loss: 70363.2637
Epoch 13/1500, Loss: 70216.3828
Epoch 14/1500, Loss: 70072.9395
Epoch 15/1500, Loss: 69931.7266
Epoch 16/1500, Loss: 69790.5957
Epoch 17/1500, Loss: 69649.6836
Epoch 18/1500, Loss: 69509.1680
Epoch 19/1500, Loss: 69369.1523
Epoch 20/1500, Loss: 69229.7441
Epoch 21/1500, Loss: 69090.9824
Epoch 22/1500, Loss: 68952.9102
Epoch 23/1500, Loss: 68815.5508
Epoch 24/1500, Loss: 68678.9004
Epoch 25/1500, Loss: 68542.9922
Epoch 26/1500, Loss: 68407.7930
Epoch 27/1500, Loss: 68273.2969
Epoch 28/1500, Loss: 68139.5020
Epoch 29/1500, Loss: 68006.3906
Epoch 30/1500, Loss: 67873.9453
Epoch 31/1500, Loss: 67742.1465
Epoch 32/1500, Lo

In [20]:
# Evaluar el modelo
model.eval()
with torch.no_grad():
    for batch in val_loader:
        sequence_batch, timeslot_batch, coordinate_batch, user_id_batch = batch
        sequence_batch = sequence_batch.to(device)
        timeslot_batch = timeslot_batch.to(device)
        coordinate_batch = coordinate_batch.to(device)
        user_id_batch = user_id_batch.to(device)
        
        timeslot_pred, coord_pred = model(sequence_batch, user_id_batch)
        
        print(f"Predicted Timeslot: {timeslot_pred[0].item()}, True: {timeslot_batch[0].item()}")
        print(f"Predicted Coordinates: {coord_pred[0].tolist()}, True: {coordinate_batch[0].tolist()}")


Predicted Timeslot: 228.72743225097656, True: 241.0
Predicted Coordinates: [20.409244537353516, 60.40855026245117], True: [31.0, 71.0]


In [23]:

# Preparar una consulta específica
recent_data = df[df["uid"] == 0][:5]  # Últimos 5 registros del usuario 0
display(recent_data)

# Crear la secuencia para la consulta
sequence_tensor = create_tensor_from_data(recent_data)  # Reutiliza el código para crear el tensor
user_id_tensor = torch.tensor([0], dtype=torch.long).to(device)

# Hacer la predicción
model.eval()
with torch.no_grad():
    timeslot_pred, coord_pred = model(sequence_tensor.to(device), user_id_tensor)
    
    print(f"Predicted Timeslot: {timeslot_pred.item()}")
    print(f"Predicted Coordinates: ({coord_pred[0, 0].item()}, {coord_pred[0, 1].item()})")


Unnamed: 0,uid,d,t,x,y,t_unificado
0,0,0,0,0,40,0
1,0,0,1,1,41,1
2,0,1,12,2,42,60
3,0,1,13,3,43,61
4,0,2,24,4,44,120


Predicted Timeslot: 228.72743225097656
Predicted Coordinates: (20.409244537353516, 60.40855026245117)
