In [1]:
import numpy as np
import os
from lib.utils import process_sensor_data
df_dict = {}
# Define path to parent directory containing subdirectories with CSV files
parent_dir = './sc sensor'
# adding to the df_dict
# Loop through each subdirectory in the parent directory
df_dict = process_sensor_data(parent_dir, df_dict)  # only process sensor 2

In [2]:
from lib.utils import gen_data_dict
data_dict = gen_data_dict(df_dict)

In [3]:
data_dict['./sc sensor/crossroad1'][:,0,0]

array([ 0, 59, 70, 65, 74, 55, 67, 54, 79, 69, 68, 63, 66, 73, 67, 64,  0,
        0,  0,  0,  0,  0,  0,  0,  0])

In [4]:
import random
def generate_insample_dataset(upstream_data, downstream_data, save_mode=False):
    """
    Upstream data: upstream sensor data (ts, node_num)
    Downstream data: downstream sensor data (ts, 1)
    """
    all_x, all_y = [], []

    x_offsets = np.sort(
            # np.concatenate(([-week_size + 1, -day_size + 1], np.arange(-11, 1, 1)))
            np.concatenate((np.arange(-7, 1, 1),))
        )
    # Predict the next 2 mins   
    y_offsets = np.sort(np.arange(1, 9, 1))
    min_t = abs(min(x_offsets))
    max_t = abs(upstream_data.shape[0]- abs(max(y_offsets)))

    # max_t = abs(N - abs(max(y_offsets)))  # Exclusive
    x, y = [], []
    for t in range(min_t, max_t):
        x_t = upstream_data[t + x_offsets, ...]
        # also store the downstream data to last row of x_t
        x_t = np.concatenate((x_t, downstream_data[t + x_offsets, ...]), axis=1)
        y_t = downstream_data[t + y_offsets, ...]
        x.append(x_t)
        y.append(y_t)

    x = np.stack(x, axis=0)
    y = np.stack(y, axis=0)

    all_x.append(x)
    all_y.append(y)

    zipped_lists = list(zip(all_x, all_y))
    random.shuffle(zipped_lists)  # shuffle data
    all_x, all_y = zip(*zipped_lists)

    x = np.concatenate(all_x, axis=0)
    y = np.concatenate(all_y, axis=0)

    # divide dataset
    num_samples, num_nodes = x.shape[0], x.shape[1]  # num_samples = ts - 12*2 +1

    len_train = round(num_samples * 0.7)
    len_val = round(num_samples * 0.1)
    # x_train: (num_samples, sliced_ts, num_nodes) y_train: (nums_samples, sliced_ts, 1)
    x_train, y_train = x[: len_train, ...], y[: len_train, ...]
    x_val, y_val = x[len_train: len_train + len_val, ...], y[len_train: len_train + len_val, ...]
    x_test, y_test = x[len_train + len_val:, ...], y[len_train + len_val:, ...]
            
    return x_train, y_train, x_val, y_val, x_test, y_test

In [5]:
up = data_dict['./sc sensor/crossroad1'][:,0,0].reshape(-1,1) # shape (ts, num_nodes)
down = data_dict['./sc sensor/crossroad1'][:,1,1].reshape(-1,1) # shape (ts, 1)

In [6]:
x_train, y_train, x_val, y_val, x_test, y_test = generate_insample_dataset(up, down)

In [15]:
x_train.shape, y_train.shape

((7, 8, 2), (7, 8, 1))

In [11]:
8 - np.arange(8, 0, -1)

array([0, 1, 2, 3, 4, 5, 6, 7])

In [14]:
import torch
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

# Suppose you have a list of variable-length sequences as tensors
seq1 = torch.tensor([1, 2, 3])
seq2 = torch.tensor([4, 5])
seq3 = torch.tensor([6, 7, 8, 9])

# Create a list of variable-length sequences
sequences = [seq1, seq2, seq3]

# Pad the sequences with zeros
padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0)

# Print the padded sequences
print("Padded Sequences:")
print(padded_sequences)


Padded Sequences:
tensor([[1, 2, 3, 0],
        [4, 5, 0, 0],
        [6, 7, 8, 9]])


In [15]:
lengths = [len(seq) for seq in sequences]
print("Original Lengths:", lengths)

# You can also pack the padded sequences to deal with variable-length sequences in RNNs
packed_sequences = pack_padded_sequence(padded_sequences, lengths, batch_first=True, enforce_sorted=False)

Original Lengths: [3, 2, 4]


In [18]:
import numpy as np

def diffusion_sequence_vectorized(F, n):
    indices = np.arange(n + 1)
    result = F * np.power(1 - F, indices)
    return result

# Example usage
initial_value = 0.5
sequence_length = 5

result_sequence = diffusion_sequence_vectorized(initial_value, sequence_length)
result_sequence

array([0.5     , 0.25    , 0.125   , 0.0625  , 0.03125 , 0.015625])

In [24]:
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
from lib.dataloader import FlowDataset
from lib.utils import gen_data_dict, process_sensor_data, generate_insample_dataset
from Diffusion import Diffusion_Model

df_dict = {}
# Define path to parent directory containing subdirectories with CSV files
parent_dir = './sc sensor'
# adding to the df_dict
# Loop through each subdirectory in the parent directory
df_dict = process_sensor_data(parent_dir, df_dict)

data_dict = gen_data_dict(df_dict)
up = data_dict['./sc sensor/crossroad1'][:,0,0].reshape(-1,1) # shape (ts, num_nodes)
down = data_dict['./sc sensor/crossroad1'][:,1,1].reshape(-1,1) # shape (ts, 1)


x_train, y_train, x_val, y_val, x_test, y_test = generate_insample_dataset(up, down) # x_train : [batch_size, num_timesteps_input, num_nodes]
num_input_timesteps = x_train.shape[1] # number of input time steps
num_nodes = x_train.shape[2] - 1 # number of ancestor nodes, minus the down stream node

train_dataset = FlowDataset(x_train, y_train, batch_size=2)
train_dataloader = DataLoader(train_dataset, batch_size=2)
model = Diffusion_Model(num_nodes=1, num_timesteps_input=x_train.shape[1])
model.velocity_model.requires_grad_(False)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.MSELoss()
for epoch in range(100):
    for i, (x, y) in enumerate(train_dataloader):
        # training loop x: [batch_size, num_timesteps_input, num_nodes]
        x_up = x[:, :, :-1].reshape(-1, num_input_timesteps, num_nodes)
        x_down = x[:, :, -1].reshape(-1, num_input_timesteps, 1).repeat(1, 1, num_nodes)
        pred = model(x_up, x_down)
        loss = loss_fn(pred, y[:, 0, :])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('Epoch: {}, Loss: {}'.format(epoch, loss.item()))

# test
test_dataset = FlowDataset(x_test, y_test, batch_size=2)
test_dataloader = DataLoader(test_dataset, batch_size=2)
print('*************')

tensor([10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [31]:
for i in range(F.size(0)):
    n = torch.max((total_time_steps - T_idx[i]), torch.FloatTensor([1]))
    indices = torch.arange(n.item()-1, -1, -1)
    F_sequence = F[i] * torch.pow(1 - F[i], indices)
    sequences.append(F_sequence)

array([[[79, 69],
        [69, 83],
        [68, 73],
        [63, 57],
        [66, 67],
        [73, 63],
        [67, 61],
        [64, 71]],

       [[69, 83],
        [68, 73],
        [63, 57],
        [66, 67],
        [73, 63],
        [67, 61],
        [64, 71],
        [ 0, 65]]])

In [None]:
def diffusion_sequence(F, n):
    indices = torch.arange(n.item()-1, -1, -1)
    result = F * torch.pow(1 - F, indices)
    return result

for i in range(F.size(0)): # F: [batch_size, 1]
    F_sequence = diffusion_sequence(F[i], torch.max((total_time_steps - T_idx[i]), torch.FloatTensor([1])))
    sequences.append(F_sequence)

padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0) # [batch_size, num_timesteps_input]

In [4]:
import torch

# Example vector 'n'
n = torch.tensor([3, 5, 2])

# Create indices tensor based on the maximum value in 'n'
indices = torch.arange(n, 10)

print("Shape of indices:", indices.shape)

TypeError: arange() received an invalid combination of arguments - got (Tensor, int), but expected one of:
 * (Number end, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (Number start, Number end, *, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (Number start, Number end, Number step, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
