In [6]:
# import external libraries
import os
import sys
import random
import numpy as np
import pandas as pd
import torch
import math
import time

# import internal modules
sys.path.insert(1, '../src/')
from models.nn import MLP, TimeSeriesDataset
from utils.data_editor import lag, train_test_split

In [7]:
# read processed data
df = pd.read_csv("../data/processed/tidy_df.csv", index_col=[0, 1, 2])

# empty list for dataframes
y_test_list = []
y_hat_umlp = []

i = df.index.get_level_values(0).unique()[0]
print(i)

# y : "EPS"
y = df.loc[pd.IndexSlice[i, :, :], "EPS"]

# x, exogenous regressors : 'INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF'
#     x = df.loc[pd.IndexSlice[i, :, :], ['INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF']]

# Unlike statsmodel SARIMA package, NN needs to prepare lagged inputs manually if needed.
# y_lag and x_lag (lag 4 for now)
num_lag = 4
y_lag = lag(y, num_lag, drop_nan=False, reset_index=False)
#     x_lag = lag(x, num_lag, drop_nan=False, reset_index=False)

# Redefine data name as target (y) and feature (y_lag) (explanatory variable, predictor)
target = y
feature = y_lag

# save simple test data series
_, target_test_dataset = train_test_split(target, ratio=(4,1))
_, feature_test_dataset = train_test_split(feature, ratio=(4,1))

# drop nan caused by lag()
feature = feature.dropna(axis=0)
target = target[feature.index]

# setting torch
dtype = torch.float # double float problem in layer 
device = torch.device("cpu")

# Make data to torch.tensor
target = torch.tensor(target.values, dtype=dtype)
feature = torch.tensor(feature.values, dtype=dtype)
target_test_dataset = torch.tensor(target_test_dataset.values, dtype=dtype)
feature_test_dataset = torch.tensor(feature_test_dataset.values, dtype=dtype)

あらた


In [9]:
# rolling window data preparation

### ! Hyper-Parameter ! ##########################################################
# all period: 48, train 36, test 12
test_window = len(target_test_dataset)
print("test window: ", test_window)

train_window = len(target) - test_window
print("train window: ", train_window)
##################################################################################

train_dataset = TimeSeriesDataset(feature, target, train_window)
print("len of train dataset: ", len(train_dataset))
#     len(train_dataset) == len(target) - train_window = 48 - 36 = 12 == test_window

test window:  12
train window:  36
len of train dataset:  12


In [4]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False)

for num_window, (feature_train, target_train) in enumerate(train_loader):
#     print(feature_train.size(), target_train.size())
    feature_train = feature_train[0] # extract single batch
    target_train = target_train[0] # extract single batch
    if (num_window == 0) | (num_window == 1):
        print("")
        print("WINDOW: ", num_window)
        print("FEATURES: ")
        print(feature_train)
        print(feature_train.size())
        print("TARGET: ")
        print(target_train)
        print(target_train.size())
        
    # mini-batch loader
    batch_dataset = TimeSeriesDataset(feature_train, target_train, train_window=None)
    minibatch_loader = torch.utils.data.DataLoader(batch_dataset, batch_size=1, shuffle=False)
    
    for num_mini_batch, (feature_train_mini_batch, target_train_mini_batch) in enumerate(minibatch_loader):
        if (num_window == 0) | (num_window == 1):
            print("    NUM MINI BATCH: ", num_mini_batch)
            print("    FEATURES BATCH; ")
            print(feature_train_mini_batch)
            print(feature_train_mini_batch.size())
            print("    TARGET BATCH; ")
            print(target_train_mini_batch)
            print(target_train_mini_batch.size())


WINDOW:  0
FEATURES: 
tensor([[-5.4800e+00,  3.1400e+00,  2.4200e+00,  2.6600e+00],
        [ 6.7800e+00, -5.4800e+00,  3.1400e+00,  2.4200e+00],
        [ 6.6600e+00,  6.7800e+00, -5.4800e+00,  3.1400e+00],
        [ 4.8300e+00,  6.6600e+00,  6.7800e+00, -5.4800e+00],
        [-1.0100e+00,  4.8300e+00,  6.6600e+00,  6.7800e+00],
        [-2.8400e+00, -1.0100e+00,  4.8300e+00,  6.6600e+00],
        [ 8.3300e+00, -2.8400e+00, -1.0100e+00,  4.8300e+00],
        [ 9.2400e+00,  8.3300e+00, -2.8400e+00, -1.0100e+00],
        [-1.2000e+00,  9.2400e+00,  8.3300e+00, -2.8400e+00],
        [ 4.0600e+00, -1.2000e+00,  9.2400e+00,  8.3300e+00],
        [ 7.6100e+00,  4.0600e+00, -1.2000e+00,  9.2400e+00],
        [ 6.2200e+00,  7.6100e+00,  4.0600e+00, -1.2000e+00],
        [ 3.5900e+00,  6.2200e+00,  7.6100e+00,  4.0600e+00],
        [ 5.2500e+00,  3.5900e+00,  6.2200e+00,  7.6100e+00],
        [ 6.6600e+00,  5.2500e+00,  3.5900e+00,  6.2200e+00],
        [ 3.5600e+00,  6.6600e+00,  5.2500e+00,

# Validation rolling sample window

In [11]:
# read processed data
df = pd.read_csv("../data/processed/tidy_df.csv", index_col=[0, 1, 2])

# empty list for dataframes
y_test_list = []
y_hat_umlp = []

i = df.index.get_level_values(0).unique()[0]
print(i)

# y : "EPS"
y = df.loc[pd.IndexSlice[i, :, :], "EPS"]

# x, exogenous regressors : 'INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF'
#     x = df.loc[pd.IndexSlice[i, :, :], ['INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF']]

# Unlike statsmodel SARIMA package, NN needs to prepare lagged inputs manually if needed.
# y_lag and x_lag (lag 4 for now)
num_lag = 4
y_lag = lag(y, num_lag, drop_nan=False, reset_index=False)
#     x_lag = lag(x, num_lag, drop_nan=False, reset_index=False)

# Redefine data name as target (y) and feature (y_lag) (explanatory variable, predictor)
target = y
feature = y_lag

# save simple test data series
_, target_test_dataset = train_test_split(target, test_size=12)
_, feature_test_dataset = train_test_split(feature, test_size=12)

# drop nan caused by lag()
feature = feature.dropna(axis=0)
target = target[feature.index]

# setting torch
dtype = torch.float # double float problem in layer 
device = torch.device("cpu")

# Make data to torch.tensor
target = torch.tensor(target.values, dtype=dtype)
feature = torch.tensor(feature.values, dtype=dtype)
target_test_dataset = torch.tensor(target_test_dataset.values, dtype=dtype)
feature_test_dataset = torch.tensor(feature_test_dataset.values, dtype=dtype)

あらた


In [12]:
# rolling window data preparation with validation

### ! Hyper-Parameter ! ##########################################################
# all period: 48 (train 24, valid 12, test 12)
test_window = len(target_test_dataset)
print("test window: ", test_window)

valid_window = test_window
print("valid window: ", valid_window)

train_window = len(target) - test_window - valid_window
print("train window: ", train_window)

##################################################################################

test window:  12
valid window:  12
train window:  24


In [13]:
train_valid_dataset = TimeSeriesDataset(feature, target, train_window+valid_window)
print("len of train_valid_dataset: ", len(train_valid_dataset))
train_valid_loader = torch.utils.data.DataLoader(train_valid_dataset, batch_size=1, shuffle=False)

for num_window, (feature_train_valid, target_train_valid) in enumerate(train_valid_dataset):
#     print(feature_train.size(), target_train.size())
#     feature_train_valid = feature_train_valid[0] # extract single window batch
#     target_train_valid = target_train_valid[0] # extract single window batch
#     if (num_window == 0) | (num_window == 1):
    print("")
    print("WINDOW: ", num_window)
    print("FEATURES: ")
    print(feature_train_valid)
    print(feature_train_valid.size())
    print("TARGET: ")
    print(target_train_valid)
    print(target_train_valid.size())
        
#     train_dataset = TimeSeriesDataset(feature_train_valid, target_train_valid, train_window)
        
#     # mini-batch loader
#     batch_dataset = TimeSeriesDataset(feature_train, target_train, train_window=None)
#     minibatch_loader = torch.utils.data.DataLoader(batch_dataset, batch_size=1, shuffle=False)
    
#     for num_mini_batch, (feature_train_mini_batch, target_train_mini_batch) in enumerate(minibatch_loader):
#         if (num_window == 0) | (num_window == 1):
#             print("    NUM MINI BATCH: ", num_mini_batch)
#             print("    FEATURES BATCH; ")
#             print(feature_train_mini_batch)
#             print(feature_train_mini_batch.size())
#             print("    TARGET BATCH; ")
#             print(target_train_mini_batch)
#             print(target_train_mini_batch.size())

len of train_valid_dataset:  12

WINDOW:  0
FEATURES: 
tensor([[-5.4800e+00,  3.1400e+00,  2.4200e+00,  2.6600e+00],
        [ 6.7800e+00, -5.4800e+00,  3.1400e+00,  2.4200e+00],
        [ 6.6600e+00,  6.7800e+00, -5.4800e+00,  3.1400e+00],
        [ 4.8300e+00,  6.6600e+00,  6.7800e+00, -5.4800e+00],
        [-1.0100e+00,  4.8300e+00,  6.6600e+00,  6.7800e+00],
        [-2.8400e+00, -1.0100e+00,  4.8300e+00,  6.6600e+00],
        [ 8.3300e+00, -2.8400e+00, -1.0100e+00,  4.8300e+00],
        [ 9.2400e+00,  8.3300e+00, -2.8400e+00, -1.0100e+00],
        [-1.2000e+00,  9.2400e+00,  8.3300e+00, -2.8400e+00],
        [ 4.0600e+00, -1.2000e+00,  9.2400e+00,  8.3300e+00],
        [ 7.6100e+00,  4.0600e+00, -1.2000e+00,  9.2400e+00],
        [ 6.2200e+00,  7.6100e+00,  4.0600e+00, -1.2000e+00],
        [ 3.5900e+00,  6.2200e+00,  7.6100e+00,  4.0600e+00],
        [ 5.2500e+00,  3.5900e+00,  6.2200e+00,  7.6100e+00],
        [ 6.6600e+00,  5.2500e+00,  3.5900e+00,  6.2200e+00],
        [ 3.560

KeyboardInterrupt: 