In [2]:
# import external libraries
import os
import sys
import random
import numpy as np
import pandas as pd
import torch
import math
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# import internal modules
sys.path.insert(1, '../src/')
from models.nn import MLP, TimeSeriesDataset
from utils.data_editor import lag, train_test_split
from utils.accuracy import MAE, MAPE, MSE

# Original Scale

In [26]:
# read processed data
df = pd.read_csv("../data/processed/tidy_df.csv", index_col=[0, 1, 2])

# empty list for dataframes
y_test_list = []
y_hat_umlp = []

i = df.index.get_level_values(0).unique()[-4]
print(i)

# y : "EPS"
y = df.loc[pd.IndexSlice[i, :, :], "EPS"]

# x, exogenous regressors : 'INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF'
#     x = df.loc[pd.IndexSlice[i, :, :], ['INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF']]

# Unlike statsmodel SARIMA package, NN needs to prepare lagged inputs manually if needed.
# y_lag and x_lag (lag 4 for now)
num_lag = 4
y_lag = lag(y, num_lag, drop_nan=False, reset_index=False)
#     x_lag = lag(x, num_lag, drop_nan=False, reset_index=False)

# Redefine data name as target (y) and feature (y_lag) (explanatory variable, predictor)
target = y
feature = y_lag

# save simple test data series
_, target_test_dataset = train_test_split(target, ratio=(4,1))
_, feature_test_dataset = train_test_split(feature, ratio=(4,1))

# drop nan caused by lag()
feature = feature.dropna(axis=0)
target = target[feature.index]

# setting torch
dtype = torch.float # double float problem in layer 
device = torch.device("cpu")

# Make data to torch.tensor
target = torch.tensor(target.values, dtype=dtype)
feature = torch.tensor(feature.values, dtype=dtype)
target_test_dataset = torch.tensor(target_test_dataset.values, dtype=dtype)
feature_test_dataset = torch.tensor(feature_test_dataset.values, dtype=dtype)

# rolling window data preparation

### ! Hyper-Parameter ! ##########################################################
# all period: 48, train 36, test 12
test_window = len(target_test_dataset)
print("test window: ", test_window)

train_window = len(target) - test_window
print("train window: ", train_window)
##################################################################################

train_dataset = TimeSeriesDataset(feature, target, train_window)
print("len of train dataset: ", len(train_dataset))
#     len(train_dataset) == len(target) - train_window = 48 - 36 = 12 == test_window
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False)

np.random.seed(0)
torch.manual_seed(0)

### ! Hyper-Parameter ! ##########################################################
num_epochs = 10000
learning_rate = 1e-3
input_units = 1
hidden_units = 1000
# num_layers = 1
output_units = 1
# Optimizer

model_name = 'umlp' + '_hid' + str(hidden_units) + '_lr' + str(learning_rate) + '_epo' + str(num_epochs)
##################################################################################

# load rolling window data flow
for num_window, (feature_train, target_train) in enumerate(train_loader):
    print("rolling window: ", num_window)
    feature_train = feature_train[0] # extract single batch
    target_train = target_train[0] # extract single batch

#     #####Init the Model #######################
#     mlp = MLP(input_features=feature_train.size()[1], hidden_units=hidden_units, output_units=output_units)
#     ##### Set Criterion Optimzer and scheduler ####################
#     criterion = torch.nn.MSELoss(reduction="mean")
#     optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate) # link to mlp parameters (lr should be 1e-2)    
    
    #(only first window)
    if num_window == 0:
        #####Init the Model #######################
        mlp = MLP(input_features=feature_train.size()[1], hidden_units=hidden_units, output_units=output_units)
        ##### Set Criterion Optimzer and scheduler ####################
        criterion = torch.nn.MSELoss(reduction="mean")
        optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate) # link to mlp parameters (lr should be 1e-2)
    else:
        pass

    # Train the model: Learning iteration
    # use pre window trained model's weight as initial weight (continue training)
    #             print("initial weight: ", mlp.state_dict()['hidden.weight'][0])

    for step in range(num_epochs):
        # Forward pass
        target_pred = mlp(feature_train)
        # let y_pred be the same size as y
        target_pred = target_pred.squeeze(1)

        # Compute loss
        loss = criterion(target_pred, target_train) # link to mlp output
        if (step == 0) | (step == 1) | (step == 100) | (step == 499) | (step == 5000) | (step == num_epochs-1):
            print(f"step {step}: loss {loss.item()}")

        # Zero gradients, perform backward pass, and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Save the trained model
#     PATH = '../../assets/trained_models/univariate/mlp/' + model_name + "_" + i + "_" + "win" + str(num_window) + '.pth'
#     torch.save(mlp.state_dict(), PATH)
    # use the existing trained model and continute training next window.
    #             print("inherit weight: ", mlp.state_dict()['hidden.weight'][0])

    # predict y_hat (target_hat) <- 良くないかも、with torch_nograd() と model.eval()
    with torch.no_grad():
        target_test = target_test_dataset[num_window]
        feature_test = feature_test_dataset[num_window]
        y_hat_umlp.append(mlp(feature_test).squeeze().detach().numpy())
    #                 print(feature_test)
    #                 print(target_test)
    #                 print(y_hat_umlp[-1])

ＷＤＢホールディングス
test window:  12
train window:  36
len of train dataset:  12
rolling window:  0
step 0: loss 9314380.0
step 1: loss 9312439.0
step 100: loss 9119387.0
step 499: loss 8409325.0
step 5000: loss 2917395.75
step 9999: loss 1732541.375
rolling window:  1
step 0: loss 1683435.375
step 1: loss 1684772.75
step 100: loss 1676189.5
step 499: loss 1625735.5
step 5000: loss 1359126.875
step 9999: loss 1214809.625
rolling window:  2
step 0: loss 1215475.875
step 1: loss 1222420.375
step 100: loss 1495302.75
step 499: loss 1416631.125
step 5000: loss 1168318.375
step 9999: loss 1060579.375
rolling window:  3
step 0: loss 1060599.625
step 1: loss 1060565.5
step 100: loss 1111067.25
step 499: loss 1091223.75
step 5000: loss 1114240.75
step 9999: loss 1025016.4375
rolling window:  4
step 0: loss 1025472.5625
step 1: loss 1025207.4375
step 100: loss 1103348.375
step 499: loss 1044879.4375
step 5000: loss 964406.4375
step 9999: loss 996024.3125
rolling window:  5
step 0: loss 995641.125
step 

In [27]:
abs(np.array(target_test_dataset) - np.array(y_hat_umlp)).mean()

114.32865

In [28]:
MAE(np.array(target_test_dataset), np.array(y_hat_umlp))

114.32865

In [29]:
MAPE(np.array(target_test_dataset), np.array(y_hat_umlp))

3.1487675

In [30]:
MSE(np.array(target_test_dataset), np.array(y_hat_umlp))

16.414862

# Standardise

In [20]:
# read processed data
df = pd.read_csv("../data/processed/tidy_df.csv", index_col=[0, 1, 2])

# empty list for dataframes
y_test_list = []
y_hat_umlp = []

i = df.index.get_level_values(0).unique()[-4]
print(i)

# y : "EPS"
y = df.loc[pd.IndexSlice[i, :, :], "EPS"]

# x, exogenous regressors : 'INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF'
#     x = df.loc[pd.IndexSlice[i, :, :], ['INV', 'AR', 'CAPX', 'GM', 'SA', 'ETR', 'LF']]

# Unlike statsmodel SARIMA package, NN needs to prepare lagged inputs manually if needed.
# y_lag and x_lag (lag 4 for now)
num_lag = 4
y_lag = lag(y, num_lag, drop_nan=False, reset_index=False)
#     x_lag = lag(x, num_lag, drop_nan=False, reset_index=False)

# Redefine data name as target (y) and feature (y_lag) (explanatory variable, predictor)
target = y
feature = y_lag

# save simple test data series
_, target_test_dataset = train_test_split(target, ratio=(4,1))
_, feature_test_dataset = train_test_split(feature, ratio=(4,1))

# drop nan caused by lag()
feature = feature.dropna(axis=0)
target = target[feature.index]

# setting torch
dtype = torch.float # double float problem in layer 
device = torch.device("cpu")

# Make data to torch.tensor
target = torch.tensor(target.values, dtype=dtype)
feature = torch.tensor(feature.values, dtype=dtype)
target_test_dataset = torch.tensor(target_test_dataset.values, dtype=dtype)
feature_test_dataset = torch.tensor(feature_test_dataset.values, dtype=dtype)

# rolling window data preparation

### ! Hyper-Parameter ! ##########################################################
# all period: 48, train 36, test 12
test_window = len(target_test_dataset)
print("test window: ", test_window)

train_window = len(target) - test_window
print("train window: ", train_window)
##################################################################################

train_dataset = TimeSeriesDataset(feature, target, train_window)
print("len of train dataset: ", len(train_dataset))
#     len(train_dataset) == len(target) - train_window = 48 - 36 = 12 == test_window
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False)

np.random.seed(0)
torch.manual_seed(0)

### ! Hyper-Parameter ! ##########################################################
num_epochs = 10000
learning_rate = 1e-3
input_units = 1
hidden_units = 1000
# num_layers = 1
output_units = 1
# Optimizer

model_name = 'umlp' + '_hid' + str(hidden_units) + '_lr' + str(learning_rate) + '_epo' + str(num_epochs)
##################################################################################

# load rolling window data flow
for num_window, (feature_train, target_train) in enumerate(train_loader):
    print("rolling window: ", num_window)
    feature_train = feature_train[0] # extract single batch
    target_train = target_train[0] # extract single batch

    target_test = target_test_dataset[num_window] # indexing test
    feature_test = feature_test_dataset[num_window] # indexing test
    
    # Normalize 
#     target_train_scaler = StandardScaler(with_mean=False, with_std=False).fit(target_train.reshape(-1, 1))
#     target_train_scaler = StandardScaler().fit(target_train.reshape(-1, 1))
#     target_train_std = torch.tensor(target_train_scaler.transform(target_train.reshape(-1, 1)).reshape(target_train.shape), dtype=dtype)
#     target_test_std = torch.tensor(target_train_scaler.transform(target_test.reshape(-1, 1)).reshape(target_test.shape), dtype=dtype)

#     feature_train_scaler = StandardScaler(with_mean=False, with_std=False).fit(feature_train)
    feature_train_scaler = StandardScaler().fit(feature_train)
    feature_train_std = torch.tensor(feature_train_scaler.transform(feature_train).reshape(feature_train.shape), dtype=dtype)
    feature_test_std = torch.tensor(feature_train_scaler.transform(feature_test.reshape(1, -1)).reshape(feature_test.shape), dtype=dtype)
    
#     #####Init the Model #######################
#     mlp = MLP(input_features=feature_train.size()[1], hidden_units=hidden_units, output_units=output_units)
#     ##### Set Criterion Optimzer and scheduler ####################
#     criterion = torch.nn.MSELoss(reduction="mean")
#     optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate) # link to mlp parameters (lr should be 1e-2)    
    
    #(only first window)
    if num_window == 0:
        #####Init the Model #######################
        mlp = MLP(input_features=feature_train_std.size()[1], hidden_units=hidden_units, output_units=output_units)
        ##### Set Criterion Optimzer and scheduler ####################
        criterion = torch.nn.MSELoss(reduction="mean")
        optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate) # link to mlp parameters (lr should be 1e-2)
    else:
        pass

    # Train the model: Learning iteration
    # use pre window trained model's weight as initial weight (continue training)
    #             print("initial weight: ", mlp.state_dict()['hidden.weight'][0])
    mlp.train()
    for step in range(num_epochs):
        # Forward pass
        target_pred = mlp(feature_train_std)
        # let y_pred be the same size as y
        target_pred = target_pred.squeeze(1)

        # Compute loss
        loss = criterion(target_pred, target_train) # link to mlp output
        if (step == 0) | (step == 1) | (step == 100) | (step == 499) | (step == 5000) | (step == num_epochs-1):
            print(f"step {step}: loss {loss.item()}")

        # Zero gradients, perform backward pass, and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Save the trained model
#     PATH = '../../assets/trained_models/univariate/mlp/' + model_name + "_" + i + "_" + "win" + str(num_window) + '.pth'
#     torch.save(mlp.state_dict(), PATH)
    # use the existing trained model and continute training next window.
    #             print("inherit weight: ", mlp.state_dict()['hidden.weight'][0])

    # predict y_hat (target_hat) <- 良くないかも、with torch_nograd() と model.eval()
    mlp.eval()
    with torch.no_grad():
#         target_test = target_test_dataset[num_window]
#         feature_test = feature_test_dataset[num_window]
        y_hat_umlp.append(mlp(feature_test_std).squeeze().detach().numpy())
    #                 print(feature_test)
    #                 print(target_test)
    #                 print(y_hat_umlp[-1])

ＷＤＢホールディングス
test window:  12
train window:  36
len of train dataset:  12
rolling window:  0
step 0: loss 9314944.0
step 1: loss 9313099.0
step 100: loss 9096360.0
step 499: loss 7722076.5
step 5000: loss 1940638.25
step 9999: loss 910220.25
rolling window:  1
step 0: loss 910695.0
step 1: loss 910603.625
step 100: loss 901470.8125
step 499: loss 868259.25
step 5000: loss 558945.5625
step 9999: loss 296499.34375
rolling window:  2
step 0: loss 307185.25
step 1: loss 306515.46875
step 100: loss 293931.25
step 499: loss 280229.125
step 5000: loss 166083.59375
step 9999: loss 103654.828125
rolling window:  3
step 0: loss 104729.8515625
step 1: loss 104571.109375
step 100: loss 103078.3515625
step 499: loss 99817.5625
step 5000: loss 68881.3671875
step 9999: loss 42385.27734375
rolling window:  4
step 0: loss 48942.6953125
step 1: loss 48319.86328125
step 100: loss 41948.87890625
step 499: loss 40136.25390625
step 5000: loss 22820.26171875
step 9999: loss 8254.861328125
rolling window:  5
s

In [21]:
np.array(y_hat_umlp)

array([92.80571 , 38.798252, 35.09224 , 40.690945, 27.085918, 31.466843,
       38.31025 , 33.972435, 38.784184, 38.849777, 38.41575 , 43.80153 ],
      dtype=float32)

In [22]:
abs(y.iloc[-12:].values - np.array(y_hat_umlp)).mean()

10.879345506605262

In [23]:
MAE(np.array(target_test_dataset), np.array(y_hat_umlp))

10.879346

In [24]:
MAPE(np.array(target_test_dataset), np.array(y_hat_umlp))

0.300287

In [25]:
MSE(np.array(target_test_dataset), np.array(y_hat_umlp))

0.25833967

standardiseすると、必ずしも制度は向上しないけど、そのままだと全然収束しないようなデータだとうまく収束するときもある。

"eBASE"は2013年度Q1に株式数が桁違いに増えている。スケールすると収束しやすくなってるかも (WOWOW, ＷＤＢホールディングスも同様)

* testを含めたスケーリングはしてはいけない(testは予測時に持っていないから。)
* trainでスケーリング(standardize)したスケール(mean と std)でfeature_testをスケーリング。
* train test split した後にscaling

https://datascience.stackexchange.com/questions/38395/standardscaler-before-and-after-splitting-data

https://stackoverflow.com/questions/63037248/is-it-correct-to-use-a-single-standardscaler-before-splitting-data

* targetのスケーリングはしても意味ない

https://stackoverflow.com/questions/56596653/inverse-scale-of-predicted-data-in-keras

* training mode and evaluation mode

https://stackoverflow.com/questions/60018578/what-does-model-eval-do-in-pytorch

In [117]:
y_train = np.array([1, 2, 3, 4, 5])
y_test = np.array([6])

x_train = np.array([
    [30, 4, 43],
    [30, 5, 23],
    [30, 2, 53],
    [30, 3, 13],
    [30, 1, 33],
])

x_test = np.array([
    [60, 3, 1],
])

In [118]:
y_train_scaler = StandardScaler().fit(y_train.reshape(-1, 1))
y_train_std = y_train_scaler.transform(y_train.reshape(-1, 1))
y_test_std = y_train_scaler.transform(y_test.reshape(-1, 1))
print(y_train_std)
print(y_test_std)

[[-1.41421356]
 [-0.70710678]
 [ 0.        ]
 [ 0.70710678]
 [ 1.41421356]]
[[2.12132034]]


In [119]:
x_train_scaler = StandardScaler().fit(x_train)
x_train_std = x_train_scaler.transform(x_train)
x_test_std = x_train_scaler.transform(x_test)
print(x_train_std)
print(x_test_std)

[[ 0.          0.70710678  0.70710678]
 [ 0.          1.41421356 -0.70710678]
 [ 0.         -0.70710678  1.41421356]
 [ 0.          0.         -1.41421356]
 [ 0.         -1.41421356  0.        ]]
[[30.         0.        -2.2627417]]


In [171]:
# Normalize 
target_train_scaler = StandardScaler(with_mean=False, with_std=False).fit(target_train.reshape(-1, 1))
target_train_std = torch.tensor(target_train_scaler.transform(target_train.reshape(-1, 1)).reshape(target_train.shape), dtype=dtype)
target_test_std = torch.tensor(target_train_scaler.transform(target_test.reshape(-1, 1)).reshape(target_test.shape), dtype=dtype)

feature_train_scaler = StandardScaler().fit(feature_train)
feature_train_std = torch.tensor(feature_train_scaler.transform(feature_train).reshape(feature_train.shape), dtype=dtype)
feature_test_std = torch.tensor(feature_train_scaler.transform(feature_test.reshape(1, -1)).reshape(feature_test.shape), dtype=dtype)

In [172]:
target_train, target_train_std

(tensor([ 3.5900e+00,  5.2500e+00,  6.6600e+00,  3.5600e+00,  7.4600e+00,
          6.0900e+00,  4.5700e+00,  6.1000e+00,  1.4840e+01, -6.4877e-02,
          3.0000e+00,  8.8700e+00,  2.7300e+00,  1.1614e+01,  4.1860e+01,
          8.0890e+01,  2.7950e+01,  8.6670e+01,  7.6900e+01,  9.9700e+01,
          6.7500e+01,  1.2117e+02,  9.0920e+01,  1.1988e+02,  6.9340e+01,
          1.0542e+02,  9.0440e+01,  1.0624e+02,  9.9220e+01,  1.1685e+02,
          1.0176e+02,  1.1393e+02,  7.9900e+01,  1.4523e+02,  1.2068e+02,
          1.5335e+02]),
 tensor([ 3.5900e+00,  5.2500e+00,  6.6600e+00,  3.5600e+00,  7.4600e+00,
          6.0900e+00,  4.5700e+00,  6.1000e+00,  1.4840e+01, -6.4877e-02,
          3.0000e+00,  8.8700e+00,  2.7300e+00,  1.1614e+01,  4.1860e+01,
          8.0890e+01,  2.7950e+01,  8.6670e+01,  7.6900e+01,  9.9700e+01,
          6.7500e+01,  1.2117e+02,  9.0920e+01,  1.1988e+02,  6.9340e+01,
          1.0542e+02,  9.0440e+01,  1.0624e+02,  9.9220e+01,  1.1685e+02,
          1.01

In [173]:
target_test, target_test_std

(tensor(61.4034), tensor(61.4034))

In [174]:
feature_train, feature_train_std

(tensor([[ 6.2200e+00,  7.6100e+00,  4.0600e+00, -1.2000e+00],
         [ 3.5900e+00,  6.2200e+00,  7.6100e+00,  4.0600e+00],
         [ 5.2500e+00,  3.5900e+00,  6.2200e+00,  7.6100e+00],
         [ 6.6600e+00,  5.2500e+00,  3.5900e+00,  6.2200e+00],
         [ 3.5600e+00,  6.6600e+00,  5.2500e+00,  3.5900e+00],
         [ 7.4600e+00,  3.5600e+00,  6.6600e+00,  5.2500e+00],
         [ 6.0900e+00,  7.4600e+00,  3.5600e+00,  6.6600e+00],
         [ 4.5700e+00,  6.0900e+00,  7.4600e+00,  3.5600e+00],
         [ 6.1000e+00,  4.5700e+00,  6.0900e+00,  7.4600e+00],
         [ 1.4840e+01,  6.1000e+00,  4.5700e+00,  6.0900e+00],
         [-6.4877e-02,  1.4840e+01,  6.1000e+00,  4.5700e+00],
         [ 3.0000e+00, -6.4877e-02,  1.4840e+01,  6.1000e+00],
         [ 8.8700e+00,  3.0000e+00, -6.4877e-02,  1.4840e+01],
         [ 2.7300e+00,  8.8700e+00,  3.0000e+00, -6.4877e-02],
         [ 1.1614e+01,  2.7300e+00,  8.8700e+00,  3.0000e+00],
         [ 4.1860e+01,  1.1614e+01,  2.7300e+00,  8.870

In [175]:
feature_test, feature_test_std

(tensor([153.3500, 120.6800, 145.2300,  79.9000]),
 tensor([2.0153, 1.4145, 2.1031, 0.7033]))

In [177]:
torch.tensor(feature_train_scaler.inverse_transform(feature_test_std))

tensor([153.3500, 120.6800, 145.2300,  79.9000])