In [2]:
import pandas as pd
import numpy as np

In [3]:
train_dataset = np.load('./data/train_image_pop.npy')
test_dataset = np.load('./data/test_image_pop.npy')

print(train_dataset.shape)
print(test_dataset.shape)

(4320, 10, 20)
(1440, 10, 20)


# Sampler

In [4]:
def sampler_stamp2(data, stamp, lag, step, temp=False):

    num_row = len(data)
    data_x, data_y = [], []
    for idx in range(num_row-stamp*lag-step):
        y = np.array(data[stamp*lag+idx+step-1])
        if not temp:
            x = np.transpose(data[[stamp*i+idx for i in range(lag)],:], [1,2,0])
            data_x.append(x)
#             y = np.transpose(y, [1,2,0])
        data_y.append(y)
    if not temp:
        data_y = np.expand_dims(data_y, axis=-1)
    else:
        data_y = np.expand_dims(data_y, axis=1)
#         data_y = np.reshape(data_y, (np.shape(data_y)[0], np.shape(data_y)[1], np.shape(data_y)[2], 1))
    print("Sampler Return", np.shape(data_x), np.shape(data_y))
    
    if not temp:
        return np.array(data_x), np.array(data_y)
    else:
        return np.array(data_y)

In [5]:
# sampler_stamp2(train_dataset, stamp=2, lag=8, step=24, temp=True)  

# Make Temporal Information

In [6]:
print("Train shape: ", np.shape(train_dataset), ", Test shape: ", np.shape(test_dataset))

# Setting Some Parameters 
num_train, num_test = np.shape(train_dataset)[0], np.shape(test_dataset)[0]
num_row = num_train + num_test
print('num_row: ', num_row)

Train shape:  (4320, 10, 20) , Test shape:  (1440, 10, 20)
num_row:  5760


In [7]:
### Initialize numpy array of temporal information (one-hot encoding)
datasets_min_30 = np.zeros([num_row, 48])
datasets_dow = np.zeros([num_row, 7])

# 더미화 하기
# 30 mins, and day-of-week index are calculated below
for i in range(num_row):
    idx_30 = int(int(i)%48)
    idx_dow = int(int(i/48)%7)
    datasets_min_30[i,idx_30] = 1
    datasets_dow[i, idx_dow] = 1

In [8]:
def train_test_split(data, idx):
    return data[:idx], data[idx:]

In [9]:
# Split Train & Test Period

train_index = num_train #144 # 120 days and 144 time index
min_30_train, min_30_test = train_test_split(datasets_min_30, train_index)
dow_train, dow_test = train_test_split(datasets_dow, train_index)

print(min_30_train.shape, min_30_test.shape)
print(dow_train.shape, dow_test.shape)

(4320, 48) (1440, 48)
(4320, 7) (1440, 7)


# Save Final Data

In [10]:
def save_data_stamp2(STAMP, LAG, STEP):
    # train 
    x_train, y_train = sampler_stamp2(train_dataset, stamp=STAMP, lag=LAG, step=STEP, temp=False)    
    min_30_train_y = sampler_stamp2(min_30_train, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    dow_train_y = sampler_stamp2(dow_train, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    temporal_train = np.concatenate((dow_train_y, min_30_train_y), axis=-1)
    
    np.savez(f'./data/x_train_stamp{STAMP}_lag{LAG}_step{STEP}_v2.npz', x_train)
    np.savez(f'./data/y_train_stamp{STAMP}_lag{LAG}_step{STEP}_v2.npz', y_train)
    np.savez(f'./data/temporal_train_stamp{STAMP}_lag{LAG}_step{STEP}_v2.npz', temporal_train)
    
    # test
    x_test, y_test = sampler_stamp2(test_dataset, stamp=STAMP, lag=LAG, step=STEP, temp=False)
    min_30_test_y = sampler_stamp2(min_30_test, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    dow_test_y = sampler_stamp2(dow_test, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    temporal_test = np.concatenate((dow_test_y, min_30_test_y), axis=-1)

    np.savez(f'./data/x_test_stamp{STAMP}_lag{LAG}_step{STEP}_v2.npz', x_test)
    np.savez(f'./data/y_test_stamp{STAMP}_lag{LAG}_step{STEP}_v2.npz', y_test)
    np.savez(f'./data/temporal_test_stamp{STAMP}_lag{LAG}_step{STEP}_v2.npz', temporal_test)

## time stamp = 1 (0.5H)

In [12]:
time_unit = [0.5,1,2,3,4,6,8,12,24]
stamp_list = [int(2*i) for i in time_unit]
step_list = [int(48/i) for i in stamp_list]
lag_list = [2*(i+1) for i in range(12)]

for i in range(len(time_unit)): # time unit에 따라
    for j in range(len(lag_list)): # lag에 따라
        save_data_stamp2(STAMP=stamp_list[i], LAG=lag_list[j], STEP=step_list[i])

Sampler Return (4270, 10, 20, 2) (4270, 10, 20, 1)
Sampler Return (0,) (4270, 1, 48)
Sampler Return (0,) (4270, 1, 7)
Sampler Return (1390, 10, 20, 2) (1390, 10, 20, 1)
Sampler Return (0,) (1390, 1, 48)
Sampler Return (0,) (1390, 1, 7)
Sampler Return (4268, 10, 20, 4) (4268, 10, 20, 1)
Sampler Return (0,) (4268, 1, 48)
Sampler Return (0,) (4268, 1, 7)
Sampler Return (1388, 10, 20, 4) (1388, 10, 20, 1)
Sampler Return (0,) (1388, 1, 48)
Sampler Return (0,) (1388, 1, 7)
Sampler Return (4266, 10, 20, 6) (4266, 10, 20, 1)
Sampler Return (0,) (4266, 1, 48)
Sampler Return (0,) (4266, 1, 7)
Sampler Return (1386, 10, 20, 6) (1386, 10, 20, 1)
Sampler Return (0,) (1386, 1, 48)
Sampler Return (0,) (1386, 1, 7)
Sampler Return (4264, 10, 20, 8) (4264, 10, 20, 1)
Sampler Return (0,) (4264, 1, 48)
Sampler Return (0,) (4264, 1, 7)
Sampler Return (1384, 10, 20, 8) (1384, 10, 20, 1)
Sampler Return (0,) (1384, 1, 48)
Sampler Return (0,) (1384, 1, 7)
Sampler Return (4262, 10, 20, 10) (4262, 10, 20, 1)
Samp

Sampler Return (4212, 10, 20, 24) (4212, 10, 20, 1)
Sampler Return (0,) (4212, 1, 48)
Sampler Return (0,) (4212, 1, 7)
Sampler Return (1332, 10, 20, 24) (1332, 10, 20, 1)
Sampler Return (0,) (1332, 1, 48)
Sampler Return (0,) (1332, 1, 7)
Sampler Return (4300, 10, 20, 2) (4300, 10, 20, 1)
Sampler Return (0,) (4300, 1, 48)
Sampler Return (0,) (4300, 1, 7)
Sampler Return (1420, 10, 20, 2) (1420, 10, 20, 1)
Sampler Return (0,) (1420, 1, 48)
Sampler Return (0,) (1420, 1, 7)
Sampler Return (4288, 10, 20, 4) (4288, 10, 20, 1)
Sampler Return (0,) (4288, 1, 48)
Sampler Return (0,) (4288, 1, 7)
Sampler Return (1408, 10, 20, 4) (1408, 10, 20, 1)
Sampler Return (0,) (1408, 1, 48)
Sampler Return (0,) (1408, 1, 7)
Sampler Return (4276, 10, 20, 6) (4276, 10, 20, 1)
Sampler Return (0,) (4276, 1, 48)
Sampler Return (0,) (4276, 1, 7)
Sampler Return (1396, 10, 20, 6) (1396, 10, 20, 1)
Sampler Return (0,) (1396, 1, 48)
Sampler Return (0,) (1396, 1, 7)
Sampler Return (4264, 10, 20, 8) (4264, 10, 20, 1)
Sam

Sampler Return (4052, 10, 20, 22) (4052, 10, 20, 1)
Sampler Return (0,) (4052, 1, 48)
Sampler Return (0,) (4052, 1, 7)
Sampler Return (1172, 10, 20, 22) (1172, 10, 20, 1)
Sampler Return (0,) (1172, 1, 48)
Sampler Return (0,) (1172, 1, 7)
Sampler Return (4028, 10, 20, 24) (4028, 10, 20, 1)
Sampler Return (0,) (4028, 1, 48)
Sampler Return (0,) (4028, 1, 7)
Sampler Return (1148, 10, 20, 24) (1148, 10, 20, 1)
Sampler Return (0,) (1148, 1, 48)
Sampler Return (0,) (1148, 1, 7)
Sampler Return (4285, 10, 20, 2) (4285, 10, 20, 1)
Sampler Return (0,) (4285, 1, 48)
Sampler Return (0,) (4285, 1, 7)
Sampler Return (1405, 10, 20, 2) (1405, 10, 20, 1)
Sampler Return (0,) (1405, 1, 48)
Sampler Return (0,) (1405, 1, 7)
Sampler Return (4253, 10, 20, 4) (4253, 10, 20, 1)
Sampler Return (0,) (4253, 1, 48)
Sampler Return (0,) (4253, 1, 7)
Sampler Return (1373, 10, 20, 4) (1373, 10, 20, 1)
Sampler Return (0,) (1373, 1, 48)
Sampler Return (0,) (1373, 1, 7)
Sampler Return (4221, 10, 20, 6) (4221, 10, 20, 1)
S

Sampler Return (0,) (3359, 1, 48)
Sampler Return (0,) (3359, 1, 7)
Sampler Return (479, 10, 20, 20) (479, 10, 20, 1)
Sampler Return (0,) (479, 1, 48)
Sampler Return (0,) (479, 1, 7)
Sampler Return (3263, 10, 20, 22) (3263, 10, 20, 1)
Sampler Return (0,) (3263, 1, 48)
Sampler Return (0,) (3263, 1, 7)
Sampler Return (383, 10, 20, 22) (383, 10, 20, 1)
Sampler Return (0,) (383, 1, 48)
Sampler Return (0,) (383, 1, 7)
Sampler Return (3167, 10, 20, 24) (3167, 10, 20, 1)
Sampler Return (0,) (3167, 1, 48)
Sampler Return (0,) (3167, 1, 7)
Sampler Return (287, 10, 20, 24) (287, 10, 20, 1)
Sampler Return (0,) (287, 1, 48)
Sampler Return (0,) (287, 1, 7)
