In [43]:
''' 数据集时间范围
NYCBike1 | 20140401-20140930 | 19(4+5*3) | 1 hour
NYCBike2 | 20160701-20160829 | 35(8+9*3) | 30 min
NYCTaxi  | 20160101-20160229 | 35(8+9*3) | 30 min
'''

' 数据集时间范围\nNYCBike1 | 20140401-20140930 | 19(4+5*3) | 1 hour\nNYCBike2 | 20160701-20160829 | 35(8+9*3) | 30 min\nNYCTaxi  | 20160101-20160229 | 35(8+9*3) | 30 min\n'

In [44]:
import numpy as np
data_name = 'NYCTaxi' #'NYCBike1' 'NYCBike2' 'NYCTaxi'
data_train = np.load(f'./{data_name}/ori/train.npz')
data_val = np.load(f'./{data_name}/ori/val.npz')
data_test = np.load(f'./{data_name}/ori/test.npz')

In [45]:
def get_timestamps_y(timestamps_shape, start_time, steps_per_day):
    y_time = np.zeros(timestamps_shape) # [B, 1, N, 2]
    
    time_of_day, day_of_week = start_time[0], start_time[1]
    for i in range(timestamps_shape[0]):
        y_time[i, 0, :, 0] = time_of_day
        y_time[i, 0, :, 1] = day_of_week
        time_of_day += 1
        if time_of_day == steps_per_day: # steps_per_day 24/48
            time_of_day = 0
            day_of_week = (day_of_week + 1) % 7
    return y_time

In [46]:
def get_timestamps_x(all_timestamps_y, timestamps_shape, x_offsets, steps_per_day):
    x_time = np.zeros(timestamps_shape)  # [B, T, N, 2]

    batch_size, _, num_nodes, _ = timestamps_shape
    baseline_time = all_timestamps_y[:, 0, :, 0].reshape(batch_size, 1, num_nodes, 1)
    baseline_day = all_timestamps_y[:, 0, :, 1].reshape(batch_size, 1, num_nodes, 1)

    for t in range(timestamps_shape[1]):
        offset = x_offsets[t][0]
        current_time = baseline_time + offset - 1
        current_day = baseline_day

        # Adjust for wrapping around time of day
        current_time %= steps_per_day
        over_day_boundary = (baseline_time + offset - 1) // steps_per_day
        current_day = (current_day + over_day_boundary) % 7

        # Assign computed values to x_time
        x_time[:, t, :, 0] = current_time.squeeze()
        x_time[:, t, :, 1] = current_day.squeeze()

    return x_time

In [47]:
''' 预测值的第一个点(Day4)的时间戳(tod, dow)属性
NYCBike1 3/24 5
NYCBike2 5/48 1
NYCTaxi 5/48 1
'''
_, time_his, num_nodes, _ = data_train['x'].shape
_, time_pre, num_nodes, _ = data_train['y'].shape
len_train, len_val, len_test = data_train['y'].shape[0], data_val['y'].shape[0], data_test['y'].shape[0]
batch_size_all = len_train + len_val + len_test
timestamps_shape_x = [batch_size_all, time_his, num_nodes, 2]  # [B, T, N, 2]
timestamps_shape_y = [batch_size_all, time_pre, num_nodes, 2]  # [B, 1, N, 2]

# 得到整个epoch的时间戳数据
start_time = [2,4] if data_name == 'NYCBike1' else [4,0]
steps_per_day = 24 if data_name == 'NYCBike1' else 48
all_timestamps_y = get_timestamps_y(timestamps_shape_y, start_time, steps_per_day)
all_timestamps_x = get_timestamps_x(all_timestamps_y, timestamps_shape_x, data_train['x_offsets'], steps_per_day)
# all_timestamps_y [b_all, 1, N, 2]   all_timestamps_x [b_all, T, N, 2]  

In [48]:

train_x_full = np.concatenate((data_train['x'], all_timestamps_x[0:len_train,:,:,:]), axis=-1)
train_y_full = np.concatenate((data_train['y'], all_timestamps_y[0:len_train,:,:,:]), axis=-1)

val_x_full = np.concatenate((data_val['x'], all_timestamps_x[len_train:len_train+len_val,:,:,:]), axis=-1)
val_y_full = np.concatenate((data_val['y'], all_timestamps_y[len_train:len_train+len_val,:,:,:]), axis=-1)

test_x_full = np.concatenate((data_test['x'], all_timestamps_x[len_train+len_val:,:,:,:]), axis=-1)
test_y_full = np.concatenate((data_test['y'], all_timestamps_y[len_train+len_val:,:,:,:]), axis=-1)

# Save updated data
np.savez_compressed(f'./{data_name}/train.npz', x=train_x_full, y=train_y_full, x_offsets=data_train['x_offsets'], y_offsets=data_train['y_offsets'])
np.savez_compressed(f'./{data_name}/val.npz', x=val_x_full, y=val_y_full, x_offsets=data_val['x_offsets'], y_offsets=data_val['y_offsets'])
np.savez_compressed(f'./{data_name}/test.npz', x=test_x_full, y=test_y_full, x_offsets=data_test['x_offsets'], y_offsets=data_test['y_offsets'])