In [1]:
import h5py
import os
import numpy as np
import pandas as pd

In [68]:
def add_window_horizon(data, window=12, horizon=12, single=False):
    '''
    :param data: shape [B, ...]
    :param window:
    :param horizon:
    :return: X is [B, W, ...], Y is [B, H, ...]
    '''
    length = len(data)
    end_index = length - horizon - window + 1
    X = []      #windows
    Y = []      #horizon
    index = 0
    if single:
        while index < end_index:
            X.append(data[index:index+window])
            Y.append(data[index+window+horizon-1:index+window+horizon])
            index = index + 1
    else:
        while index < end_index:
            X.append(data[index:index+window])
            Y.append(data[index+window:index+window+horizon])
            index = index + 1
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

def split_data_by_ratio(data, val_ratio, test_ratio):
    data_len = data.shape[0]
    test_data = data[-int(data_len*test_ratio):]
    val_data = data[-int(data_len*(test_ratio+val_ratio)):-int(data_len*test_ratio)]
    train_data = data[:-int(data_len*(test_ratio+val_ratio))]
    return train_data, val_data, test_data

In [51]:
nyc_taxi = h5py.File('./nyc_taxi.h5')
nyc_pick = np.expand_dims(nyc_taxi['taxi_pick'][:], axis=-1)
nyc_drop = np.expand_dims(nyc_taxi['taxi_drop'][:], axis=-1)

nyc_data = np.concatenate([nyc_pick, nyc_drop], axis=-1)


In [59]:
nyc_data.shape

(4368, 266, 2)

In [61]:
x, y = add_window_horizon(nyc_data)

In [69]:
x_train, x_val, x_test = split_data_by_ratio(x, 1.5, 1.5)
y_train, y_val, y_test = split_data_by_ratio(y, 1.5, 1.5)

In [71]:
np.savez_compressed('train.npz', x=x_train, y=y_train)
np.savez_compressed('val.npz', x=x_val, y=y_val)
np.savez_compressed('test.npz', x=x_test, y=y_test)