In [2]:
import pandas as pd
import numpy as np

In [4]:
train_dataset = np.load('./data/train_image_dust.npy')
test_dataset = np.load('./data/test_image_dust.npy')

print(train_dataset.shape)
print(test_dataset.shape)

(2160, 10, 20)
(720, 10, 20)


# Sampler

In [5]:
def sampler_stamp(data, stamp, lag, step, temp=False):

    num_row = len(data)
    data_x, data_y = [], []
    for idx in range(num_row-stamp*lag-step):
        y = np.array(data[stamp*lag+idx : stamp*lag+idx+step])
        if not temp:
            x = np.transpose(data[[stamp*i+idx for i in range(lag)],:], [1,2,0])
            data_x.append(x)
            y = np.transpose(y, [1,2,0])
        data_y.append(y)
    print("Sampler Return", np.shape(data_x), np.shape(data_y))
    
    if not temp:
        return np.array(data_x), np.array(data_y)
    else:
        return np.array(data_y)

# Make Temporal Information

In [6]:
print("Train shape: ", np.shape(train_dataset), ", Test shape: ", np.shape(test_dataset))

# Setting Some Parameters 
num_train, num_test = np.shape(train_dataset)[0], np.shape(test_dataset)[0]
num_row = num_train + num_test
print('num_row: ', num_row)

Train shape:  (2160, 10, 20) , Test shape:  (720, 10, 20)
num_row:  2880


In [7]:
### Initialize numpy array of temporal information (one-hot encoding)
datasets_1h = np.zeros([num_row, 24])
datasets_dow = np.zeros([num_row, 7])

# 더미화 하기
# 1hour, and day-of-week index are calculated below
for i in range(num_row):
    idx_1h = int(int(i)%24)
    idx_dow = int(int(i/24)%7)
    datasets_1h[i,idx_1h] = 1
    datasets_dow[i, idx_dow] = 1

In [8]:
def train_test_split(data, idx):
    return data[:idx], data[idx:]

In [9]:
# Split Train & Test Period

train_index = num_train #144 # 120 days and 144 time index
hour_1_train, hour_1_test = train_test_split(datasets_1h, train_index)
dow_train, dow_test = train_test_split(datasets_dow, train_index)

print(hour_1_train.shape, hour_1_test.shape)
print(dow_train.shape, dow_test.shape)

(2160, 24) (720, 24)
(2160, 7) (720, 7)


# Save Final Data

In [10]:
def save_data_stamp(STAMP, LAG, STEP):
    # train 
    x_train, y_train = sampler_stamp(train_dataset, stamp=STAMP, lag=LAG, step=1, temp=False)    
    hour_1_train_y = sampler_stamp(hour_1_train, stamp=STAMP, lag=LAG, step=1, temp=True)
    dow_train_y = sampler_stamp(dow_train, stamp=STAMP, lag=LAG, step=1, temp=True)
    temporal_train = np.concatenate((dow_train_y, hour_1_train_y), axis=-1)
    
    np.savez(f'./data/x_train_stamp{STAMP}_lag{LAG}.npz', x_train)
    np.savez(f'./data/y_train_stamp{STAMP}_lag{LAG}.npz', y_train)
    np.savez(f'./data/temporal_train_stamp{STAMP}_lag{LAG}.npz', temporal_train)
    
    # test
    x_test, y_test = sampler_stamp(test_dataset, stamp=STAMP, lag=LAG, step=STEP, temp=False)
    hour_1_test_y = sampler_stamp(hour_1_test, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    dow_test_y = sampler_stamp(dow_test, stamp=STAMP, lag=LAG, step=STEP, temp=True)
    temporal_test = np.concatenate((dow_test_y, hour_1_test_y), axis=-1)

    np.savez(f'./data/x_test_stamp{STAMP}_lag{LAG}_step{STEP}.npz', x_test)
    np.savez(f'./data/y_test_stamp{STAMP}_lag{LAG}_step{STEP}.npz', y_test)
    np.savez(f'./data/temporal_test_stamp{STAMP}_lag{LAG}_step{STEP}.npz', temporal_test)

In [15]:
time_unit = [1,2,3,4,6,8,12,24]
stamp_list = time_unit
step_list = [int(24/i) for i in stamp_list]
lag_list = [2*(i+1) for i in range(12)]

for i in range(len(time_unit)): # time unit에 따라
    for j in range(len(lag_list)): # lag에 따라
        save_data_stamp(STAMP=stamp_list[i], LAG=lag_list[j], STEP=step_list[i])

Sampler Return (2157, 10, 20, 2) (2157, 10, 20, 1)
Sampler Return (0,) (2157, 1, 24)
Sampler Return (0,) (2157, 1, 7)
Sampler Return (694, 10, 20, 2) (694, 10, 20, 24)
Sampler Return (0,) (694, 24, 24)
Sampler Return (0,) (694, 24, 7)
Sampler Return (2155, 10, 20, 4) (2155, 10, 20, 1)
Sampler Return (0,) (2155, 1, 24)
Sampler Return (0,) (2155, 1, 7)
Sampler Return (692, 10, 20, 4) (692, 10, 20, 24)
Sampler Return (0,) (692, 24, 24)
Sampler Return (0,) (692, 24, 7)
Sampler Return (2153, 10, 20, 6) (2153, 10, 20, 1)
Sampler Return (0,) (2153, 1, 24)
Sampler Return (0,) (2153, 1, 7)
Sampler Return (690, 10, 20, 6) (690, 10, 20, 24)
Sampler Return (0,) (690, 24, 24)
Sampler Return (0,) (690, 24, 7)
Sampler Return (2151, 10, 20, 8) (2151, 10, 20, 1)
Sampler Return (0,) (2151, 1, 24)
Sampler Return (0,) (2151, 1, 7)
Sampler Return (688, 10, 20, 8) (688, 10, 20, 24)
Sampler Return (0,) (688, 24, 24)
Sampler Return (0,) (688, 24, 7)
Sampler Return (2149, 10, 20, 10) (2149, 10, 20, 1)
Sampler 

Sampler Return (2087, 10, 20, 24) (2087, 10, 20, 1)
Sampler Return (0,) (2087, 1, 24)
Sampler Return (0,) (2087, 1, 7)
Sampler Return (640, 10, 20, 24) (640, 10, 20, 8)
Sampler Return (0,) (640, 8, 24)
Sampler Return (0,) (640, 8, 7)
Sampler Return (2151, 10, 20, 2) (2151, 10, 20, 1)
Sampler Return (0,) (2151, 1, 24)
Sampler Return (0,) (2151, 1, 7)
Sampler Return (706, 10, 20, 2) (706, 10, 20, 6)
Sampler Return (0,) (706, 6, 24)
Sampler Return (0,) (706, 6, 7)
Sampler Return (2143, 10, 20, 4) (2143, 10, 20, 1)
Sampler Return (0,) (2143, 1, 24)
Sampler Return (0,) (2143, 1, 7)
Sampler Return (698, 10, 20, 4) (698, 10, 20, 6)
Sampler Return (0,) (698, 6, 24)
Sampler Return (0,) (698, 6, 7)
Sampler Return (2135, 10, 20, 6) (2135, 10, 20, 1)
Sampler Return (0,) (2135, 1, 24)
Sampler Return (0,) (2135, 1, 7)
Sampler Return (690, 10, 20, 6) (690, 10, 20, 6)
Sampler Return (0,) (690, 6, 24)
Sampler Return (0,) (690, 6, 7)
Sampler Return (2127, 10, 20, 8) (2127, 10, 20, 1)
Sampler Return (0,)

Sampler Return (1967, 10, 20, 24) (1967, 10, 20, 1)
Sampler Return (0,) (1967, 1, 24)
Sampler Return (0,) (1967, 1, 7)
Sampler Return (525, 10, 20, 24) (525, 10, 20, 3)
Sampler Return (0,) (525, 3, 24)
Sampler Return (0,) (525, 3, 7)
Sampler Return (2135, 10, 20, 2) (2135, 10, 20, 1)
Sampler Return (0,) (2135, 1, 24)
Sampler Return (0,) (2135, 1, 7)
Sampler Return (694, 10, 20, 2) (694, 10, 20, 2)
Sampler Return (0,) (694, 2, 24)
Sampler Return (0,) (694, 2, 7)
Sampler Return (2111, 10, 20, 4) (2111, 10, 20, 1)
Sampler Return (0,) (2111, 1, 24)
Sampler Return (0,) (2111, 1, 7)
Sampler Return (670, 10, 20, 4) (670, 10, 20, 2)
Sampler Return (0,) (670, 2, 24)
Sampler Return (0,) (670, 2, 7)
Sampler Return (2087, 10, 20, 6) (2087, 10, 20, 1)
Sampler Return (0,) (2087, 1, 24)
Sampler Return (0,) (2087, 1, 7)
Sampler Return (646, 10, 20, 6) (646, 10, 20, 2)
Sampler Return (0,) (646, 2, 24)
Sampler Return (0,) (646, 2, 7)
Sampler Return (2063, 10, 20, 8) (2063, 10, 20, 1)
Sampler Return (0,)