In [12]:
import pandas as pd
import numpy as np

In [13]:
train_dataset = np.load('./data/train_image_pop30.npy')
test_dataset = np.load('./data/test_image_pop30.npy')

print(train_dataset.shape)
print(test_dataset.shape)

(4320, 10, 20)
(1440, 10, 20)


# Sampler

In [14]:
# def sampler(data, lag=8, step=1, temp=False):
#     """This function makes samples of the time series data
#     args:
#     - data : (# of data, height, width)
#     - lag : the length of sampling
#     - step : (step)-ahead forecasting label
#     return: 
#     - data_x (# of samples, height, width, lag)
#     - data_y (# of samples, height, width, 1)
#     """
#     num_row = len(data)
#     data_x, data_y = [], []
#     for idx in range(num_row-lag-step):
#         y = np.array(data[idx+lag : idx+lag+step])
#         if not temp:
#             x = np.transpose(data[idx : idx+lag], [1,2,0])
#             data_x.append(x)
#             y = np.transpose(y, [1,2,0])
        
#         data_y.append(y)

#     print("Sampler Return", np.shape(data_x), np.shape(data_y))
                        
#     if not temp:
#         return np.array(data_x), np.array(data_y)
#     else:
#         return np.array(data_y)

In [15]:
def sampler_stamp(data, stamp, lag, step, temp=False):

    num_row = len(data)
    data_x, data_y = [], []
    for idx in range(num_row-stamp*lag-step):
        y = np.array(data[stamp*lag+idx : stamp*lag+idx+step])
        if not temp:
            x = np.transpose(data[[stamp*i+idx for i in range(lag)],:], [1,2,0])
            data_x.append(x)
            y = np.transpose(y, [1,2,0])
        data_y.append(y)
    print("Sampler Return", np.shape(data_x), np.shape(data_y))
    
    if not temp:
        return np.array(data_x), np.array(data_y)
    else:
        return np.array(data_y)

# Make Temporal Information

In [16]:
print("Train shape: ", np.shape(train_dataset), ", Test shape: ", np.shape(test_dataset))

# Setting Some Parameters 
num_train, num_test = np.shape(train_dataset)[0], np.shape(test_dataset)[0]
num_row = num_train + num_test
print('num_row: ', num_row)

Train shape:  (4320, 10, 20) , Test shape:  (1440, 10, 20)
num_row:  5760


In [17]:
### Initialize numpy array of temporal information (one-hot encoding)
datasets_min_30 = np.zeros([num_row, 48])
datasets_dow = np.zeros([num_row, 7])

# 더미화 하기
# 30 mins, and day-of-week index are calculated below
for i in range(num_row):
    idx_30 = int(int(i)%48)
    idx_dow = int(int(i/48)%7)
    datasets_min_30[i,idx_30] = 1
    datasets_dow[i, idx_dow] = 1

In [18]:
def train_test_split(data, idx):
    return data[:idx], data[idx:]

In [19]:
# Split Train & Test Period

train_index = num_train #144 # 120 days and 144 time index
min_30_train, min_30_test = train_test_split(datasets_min_30, train_index)
dow_train, dow_test = train_test_split(datasets_dow, train_index)

print(min_30_train.shape, min_30_test.shape)
print(dow_train.shape, dow_test.shape)

(4320, 48) (1440, 48)
(4320, 7) (1440, 7)


# Save Final Data

In [21]:
def save_data_stamp(STAMP, LAG, STEP):
    # train 
    x_train, y_train = sampler_stamp(train_dataset, stamp=STAMP, lag=LAG, step=1, temp=False)    
    min_30_train_y = sampler_stamp(min_30_train, stamp=STAMP, lag=LAG, step=1, temp=True)
    dow_train_y = sampler_stamp(dow_train, stamp=STAMP, lag=LAG, step=1, temp=True)
    temporal_train = np.concatenate((dow_train_y, min_30_train_y), axis=-1)
    
    np.savez(f'./data/x_train_stamp{STAMP}_lag{LAG}.npz', x_train)
    np.savez(f'./data/y_train_stamp{STAMP}_lag{LAG}.npz', y_train)
    np.savez(f'./data/temporal_train_stamp{STAMP}_lag{LAG}.npz', temporal_train)
    
    # test
    x_test, y_test = sampler_stamp(test_dataset, stamp=STAMP, lag=LAG, step=STEP, temp=False)
    min_30_test_y = sampler_stamp(min_30_test, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    dow_test_y = sampler_stamp(dow_test, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    temporal_test = np.concatenate((dow_test_y, min_30_test_y), axis=-1)

    np.savez(f'./data/x_test_stamp{STAMP}_lag{LAG}_step{STEP}.npz', x_test)
    np.savez(f'./data/y_test_stamp{STAMP}_lag{LAG}_step{STEP}.npz', y_test)
    np.savez(f'./data/temporal_test_stamp{STAMP}_lag{LAG}_step{STEP}.npz', temporal_test)

## time stamp = 1 (0.5H)

In [22]:
# single-step data
save_data_stamp(STAMP=1, LAG=10, STEP=1)

Sampler Return (4309, 10, 20, 10) (4309, 10, 20, 1)
Sampler Return (0,) (4309, 1, 48)
Sampler Return (0,) (4309, 1, 7)
Sampler Return (1429, 10, 20, 10) (1429, 10, 20, 1)
Sampler Return (0,) (1429, 1, 48)
Sampler Return (0,) (1429, 1, 7)


In [23]:
# # single-step data
# for i in range(2,22,2):
#     save_data_stamp(STAMP=1, LAG=i, STEP=1)

In [24]:
# multi-step data
save_data_stamp(STAMP=1, LAG=10, STEP=48)

Sampler Return (4309, 10, 20, 10) (4309, 10, 20, 1)
Sampler Return (0,) (4309, 1, 48)
Sampler Return (0,) (4309, 1, 7)
Sampler Return (1382, 10, 20, 10) (1382, 10, 20, 48)
Sampler Return (0,) (1382, 48, 48)
Sampler Return (0,) (1382, 48, 7)


In [25]:
# # multi-step data
# for i in range(2,22,2):
#     save_data_stamp(STAMP=1, LAG=i, STEP=48)

## time stamp = 2 (1H)

In [26]:
# multi-step data
for i in range(2,22,2):
    save_data_stamp(STAMP=2, LAG=i, STEP=24)

Sampler Return (4315, 10, 20, 2) (4315, 10, 20, 1)
Sampler Return (0,) (4315, 1, 48)
Sampler Return (0,) (4315, 1, 7)
Sampler Return (1412, 10, 20, 2) (1412, 10, 20, 24)
Sampler Return (0,) (1412, 24, 48)
Sampler Return (0,) (1412, 24, 7)
Sampler Return (4311, 10, 20, 4) (4311, 10, 20, 1)
Sampler Return (0,) (4311, 1, 48)
Sampler Return (0,) (4311, 1, 7)
Sampler Return (1408, 10, 20, 4) (1408, 10, 20, 24)
Sampler Return (0,) (1408, 24, 48)
Sampler Return (0,) (1408, 24, 7)
Sampler Return (4307, 10, 20, 6) (4307, 10, 20, 1)
Sampler Return (0,) (4307, 1, 48)
Sampler Return (0,) (4307, 1, 7)
Sampler Return (1404, 10, 20, 6) (1404, 10, 20, 24)
Sampler Return (0,) (1404, 24, 48)
Sampler Return (0,) (1404, 24, 7)
Sampler Return (4303, 10, 20, 8) (4303, 10, 20, 1)
Sampler Return (0,) (4303, 1, 48)
Sampler Return (0,) (4303, 1, 7)
Sampler Return (1400, 10, 20, 8) (1400, 10, 20, 24)
Sampler Return (0,) (1400, 24, 48)
Sampler Return (0,) (1400, 24, 7)
Sampler Return (4299, 10, 20, 10) (4299, 10,

## time stamp = 48 (24H)

In [17]:
save_data_stamp(STAMP=48, LAG=10, STEP=1)

Sampler Return (4315, 10, 20, 4) (4315, 10, 20, 1)
Sampler Return (0,) (4315, 1, 48)
Sampler Return (0,) (4315, 1, 7)
Sampler Return (1388, 10, 20, 4) (1388, 10, 20, 48)
Sampler Return (0,) (1388, 48, 48)
Sampler Return (0,) (1388, 48, 7)


In [18]:
# for i in range(2,22,2):
#     save_data_stamp(STAMP=48, LAG=i, STEP=1)

Sampler Return (4223, 10, 20, 2) (4223, 10, 20, 1)
Sampler Return (0,) (4223, 1, 48)
Sampler Return (0,) (4223, 1, 7)
Sampler Return (1343, 10, 20, 2) (1343, 10, 20, 1)
Sampler Return (0,) (1343, 1, 48)
Sampler Return (0,) (1343, 1, 7)
