## まずは`torch.nn.LSTM`の挙動チェック

In [1]:
import torch

torch.manual_seed(0)

seq_len = 100 # レコード期間
input_dim = 4 # 特徴量数
hidden_dim = 10 # hidden unit数
num_layers = 1 # rnn hidden layer数
batch_size = 1 # ミニバッチしたくないからバッチサイズ=1でいいってことだよね?(塊でやらない)

lstm = torch.nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, bias=True)

In [2]:
lstm

LSTM(4, 10)

In [3]:
# 全ウェイト数 W_x(i,f,g,o), b_x(i,f,g,o), W_h(i,f,g,o), b_h(i,f,g,o)
len(lstm.all_weights[0])

4

In [4]:
# W_x(i,f,g,o)
# (W_xi|W_xf|W_xg|W_xo) of shape (4*hidden_size, input_size)
lstm.weight_ih_l0.size() 
# lstm.weight_ih_l0 == lstm.all_weights[0][0]

torch.Size([40, 4])

In [5]:
# W_xi, shape (hidden_size, input_size)
lstm.weight_ih_l0[:hidden_dim, :].size()

torch.Size([10, 4])

In [6]:
# b_x(i,f,g,o) 
# (b_xi|b_xf|b_xg|b_xo), of shape (4*hidden_size)
lstm.bias_ih_l0.size()
# lstm.bias_ih_l0 == lstm.all_weights[0][2]

torch.Size([40])

In [7]:
# W_h(i,f,g,o)
# (W_hi|W_hf|W_hg|W_ho) of shape (4*hidden_size, input_size)
lstm.weight_hh_l0.size() 
# lstm.weight_hh_l0 == lstm.all_weights[0][1]

torch.Size([40, 10])

In [8]:
# b_h(i,f,g,o) 
# (b_hi|b_hf|b_hg|b_ho), of shape (4*hidden_size)
lstm.bias_hh_l0.size()
# lstm.bias_hh_l0 == lstm.all_weights[0][3]

torch.Size([40])

## toy data で`nn.LSTM`のoutputを確認

In [9]:
# toy data: input of shape (seq_len, batch, input_size)
x = torch.randn(seq_len, batch_size, input_dim)
x.size()

torch.Size([100, 1, 4])

In [10]:
# hidden state (lstm層のアクティベーション) of shape (num_layers, batch, hidden_size):
h_0 = torch.randn(num_layers, x.size(1), hidden_dim)
h_0.size()

torch.Size([1, 1, 10])

In [11]:
# cell state (メモリセル) of shape (num_layers, batch, hidden_size): 
c_0 = torch.randn(num_layers, x.size(1), hidden_dim)
c_0.size()

torch.Size([1, 1, 10])

In [12]:
# run forward pass of LSTM layer
output, (h_n, c_n) = lstm(x, (h_0, c_0))

In [13]:
output.size()

torch.Size([100, 1, 10])

In [14]:
# change shape of h_out inorder to input in linear layer (squeeze batch dim)
h_out = output.view(-1, hidden_dim)
h_out.size()

torch.Size([100, 10])

In [15]:
h_n.size()

torch.Size([1, 1, 10])

In [16]:
c_n.size()

torch.Size([1, 1, 10])

In [17]:
# the last row of output equals to h_n
output[-1] == h_n

tensor([[[True, True, True, True, True, True, True, True, True, True]]])

## LSTM Class オブジェクトを構築

In [18]:
class LSTM(torch.nn.Module):
    """
    long short-term memory network.
    Nso batches (batch_size=1), single hidden lstm layer.
    
    Reference :
    https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python
    https://colab.research.google.com/github/dlmacedo/starter-academic/blob/master/content/courses/deeplearning/notebooks/pytorch/Time_Series_Prediction_with_LSTM_Using_PyTorch.ipynb
    https://curiousily.com/posts/time-series-forecasting-with-lstm-for-daily-coronavirus-cases/
    https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/recurrent_neural_network/main.py#L39-L58
    """
    def __init__(self, input_dim, hidden_dim, output_dim=1, num_layers=1, batch_size=1):
        super().__init__()
        """
        Instantiate model layers.
        
        Parameters
        ----------
        input_size : int
            the number of features in the input layer
        hidden_size : int
            the number of units (neurons) in each hidden layer
            (single hidden lstm layer for now)
        output_size : int, Default: 1
            the number of dimension for the output
        
        <value changes not recomemnded>
        num_layers : int, Default: 1
            Number of recurrent layers
            (single lstm layer for now)
        batch_size : int, Default: 1
            size of batches
            (No batches for now; batch_size=1)
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        
        # the layers
        # first layer (hidden lstm layer)
        # https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
        self.hidden_lstm = torch.nn.LSTM(input_size=input_dim, 
                                         hidden_size=hidden_dim, 
                                         # num_layers=num_layers, 
                                         bias=True,
                                         batch_first=True)
        # second layer (linear output layer)
        self.output = torch.nn.Linear(hidden_dim, 
                                      output_dim, 
                                      bias=True)

    def forward(self, x):
        """
        Set initial hidden and cell states as zeros (stateless LSTM)
        and forward propagate LSTM.

        math::
            \begin{array}{ll} \\
                i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\
                f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\
                g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\
                o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\
                c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
                h_t = o_t \odot \tanh(c_t) \\
            \end{array}
        
        :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`.
        
        (input, (h_0, c_0)) are the inputs and
        (output, (h_n, c_n)) are the returns of lstm layer for t = n lstm cell.
        
        Parameters
        ----------
        x: inputs; shape (seq_len, batch_size=1, input_dim)
            tensor containing the features of the input sequence. 
            must include batch_size(=1) dimension as dim=1
        
        Returns
        -------
        h: 
        """
        # reset_hidden_state (stateless LSTM)
        # hidden state (lstm層のアクティベーション) of shape (batch, num_layers, hidden_size): batch_first=True
        h_0 = torch.zeros(self.batch_size, 1 * self.num_layers, self.hidden_dim)
        # cell state (メモリセル) of shape (batch, num_layers, hidden_size): batch_first=True
        c_0 = torch.zeros(self.batch_size, 1 * self.num_layers, self.hidden_dim)
        
        # Forward pass
        # input to hidden LSTM layer
        # out: tensor of shape (batch_size, seq_length, hidden_size): batch_first=True
        h_all_time, (h_latest, c_latest) = self.hidden_lstm(x, (h_0, c_0))
        
        # change shape of h_all_time from (batch, num_layers, hidden_size) to (batch*num_layers, hidden_size)
        h = h_latest.view(-1, self.hidden_dim)
        
        # hidden LSTM layer to output layer
        out = self.output(h)
        return out

## データの準備

In [19]:
# import external libraries
import sys
import numpy as np
import pandas as pd
import torch
import math

# import internal modules
sys.path.insert(1, '../')
from models.nn import MLP
from utils.data_editor import lag, train_test_split

# set seeds for reproductibility
np.random.seed(0)
torch.manual_seed(0)

# Prepare Data --> 関数

# read processed data
df = pd.read_csv("../../dataset/processed/dataset.csv")

# save column names
earning_v = df.columns[4: 10].values
account_v_bs = df.columns[11:].values
account_v_pl = df.columns[10:11].values

# y: "１株当たり利益［３ヵ月］"
y = df[earning_v[-1]]

# x: ['棚卸資産', '資本的支出', '期末従業員数', '受取手形・売掛金／売掛金及びその他の短期債権', 
#     '販売費及び一般管理費']
x = df[np.append(account_v_bs, account_v_pl)]

# Unlike MLP, LSTM needs to prepare lagged inputs with seq_len matrix.
# feature must be lag1 (y||x)
num_lag = 1
y_lag = lag(y, num_lag, drop_nan=False, reset_index=False)
x_lag = lag(x, num_lag, drop_nan=False, reset_index=False)

# Redefine data name as target (y) and feature (y_lag and x_lag)
target = y
feature = pd.concat([y_lag, x_lag], axis=1)

# time series train test split (4/5) : (1/5), yearly bases
# DataLoader使うからTrainとtestぶつ切りにしたらtest時サンプル減るんじゃね?
target_train, target_test = train_test_split(target, ratio=(4,1))
feature_train, feature_test = train_test_split(feature, ratio=(4,1))

train_date = df["決算期"][target_train.index] # for plotting !!!! <-- 改善の余地あり, targetはtensorになってindexがなくなるから
test_date = df["決算期"][target_test.index] # for y_hat index !!!! <-- 改善の余地あり, targetはtensorになってindexがなくなるから

In [20]:
print(feature_train.shape, target_train.shape)
print(feature_test.shape, target_test.shape)

(56, 6) (56,)
(16, 6) (16,)


In [21]:
# add time length of "seq_len - 1" to test (for seq_len DataLoader)
seq_len = 4 # training_window for one step prediction ## HYPARAM

print("add to feature_test")
print(feature_train[-(seq_len-1) :])
print("add to target_test")
print(target_train[-(seq_len-1) :])

feature_test = pd.concat([feature_train[-(seq_len-1) :], feature_test], axis=0)
target_test = pd.concat([target_train[-(seq_len-1) :], target_test], axis=0)

add to feature_test
    １株当たり利益_lag1  棚卸資産_lag1  資本的支出_lag1  期末従業員数_lag1  \
53        205.41  2201802.0   1594721.0     349131.0   
54        193.98  2195186.0   3079472.0     349508.0   
55        202.05  2104725.0   3773177.0     349766.0   

    受取手形・売掛金／売掛金及びその他の短期債権_lag1  販売費及び一般管理費_lag1  
53                    1922211.0         674482.0  
54                    1988350.0         726063.0  
55                    1915883.0         730674.0  
add to target_test
53    193.98
54    202.05
55    139.92
Name: １株当たり利益, dtype: float64


In [22]:
print(feature_train.shape, target_train.shape)
print(feature_test.shape, target_test.shape)

(56, 6) (56,)
(19, 6) (19,)


In [23]:
# drop nan in train data head caused by lag()
feature_train = feature_train.dropna(axis=0)
target_train = target_train[feature_train.index]

# setting torch
dtype = torch.float # double float problem in layer 
device = torch.device("cpu")

# Make data to torch.tensor
target_train = torch.tensor(target_train.values, dtype=dtype)
feature_train = torch.tensor(feature_train.values, dtype=dtype)
target_test = torch.tensor(target_test.values, dtype=dtype)
feature_test = torch.tensor(feature_test.values, dtype=dtype)

In [24]:
print(feature_train.size(), target_train.size())
print(feature_test.size(), target_test.size())

torch.Size([55, 6]) torch.Size([55])
torch.Size([19, 6]) torch.Size([19])


In [25]:
# Unlike MLP, LSTM needs to prepare lagged inputs with seq_len matrix.
# inherit from the torch.utils.data.Dataset class
class TimeseriesDataset(torch.utils.data.Dataset):   
    """
    Torch based time-series dataset object class.
    This object could be the input for torch.utils.data.DataLoader()
    
    https://stackoverflow.com/questions/57893415/pytorch-dataloader-for-time-series-task
    """
    def __init__(self, feature_lag1, target, seq_len=None):
        """
        Parameters
        ----------
        feature_lag1 : torch.tensor
            explanatory variables matrix with only 1 lag.
        feature_lag1 : torch.tensor
            explained variable vector with no lag.
        seq_len : int
            There're so many names for this.
                * rolling window size
                * sliding window size
                * training window size
                * sequence length
            Overall, this is the number of window lags of inputs for a single forward prediction.
        """
        self.feature_lag1 = feature_lag1
        self.target = target
        self.seq_len = seq_len

    def __len__(self):
        return self.feature_lag1.__len__() - (self.seq_len-1)

    def __getitem__(self, index):
        return (self.feature_lag1[index:index+self.seq_len], self.target[index+self.seq_len-1])

In [26]:
# Data Loader

train_dataset = TimeseriesDataset(feature_train, target_train, seq_len=seq_len)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 1, shuffle = False)
# for t, (feature_t, target_t) in enumerate(train_loader):
#     print(t, feature_t.size(), target_t.size())
#     print(t, feature_t, target_t)
# print("######## End Train ########")
    
test_dataset = TimeseriesDataset(feature_test, target_test, seq_len=seq_len)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 1, shuffle = False)
# for t, (feature_t, target_t) in enumerate(test_loader):
#     print(t, feature_t.size(), target_t.size())
#     print(t, feature_t, target_t)

## モデルのinstanciation

In [27]:
input_dim = feature_train.size()[1] # 特徴量数
hidden_dim = 100 # hidden unit数
num_layers = 1 # rnn hidden layer数
batch_size = 1 # ミニバッチしたくないからバッチサイズ=1でいいってことだよね?(塊でやらない)
torch.manual_seed(0)

lstm = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=1)
lstm

LSTM(
  (hidden_lstm): LSTM(6, 100, batch_first=True)
  (output): Linear(in_features=100, out_features=1, bias=True)
)

## モデルの学習・訓練

In [28]:
learning_rate = 0.005
num_epochs = 1000
torch.manual_seed(0)

# Loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

total_step = len(feature_train) # バッチなしだと時系列の長さseq_lenと同じ?
for epoch in range(num_epochs):
    # LSTMはtを1つずつ進めて学習?
    for t, (feature_t, target_t) in enumerate(train_loader):
        # feature_t; x_t (seq_len x D_x)
        # torch.nn.LSTM() に渡すために変形 input: (seq_len, batch, input_size)
#         feature_t = feature_t.view(1, seq_len, input_dim) # (1, 1, 24)ではなく(batch, seq_len, input_size) = (1, 4, 6): batch_size=True
        
        # Forward pass
        target_t_pred = lstm(feature_t)
        loss = criterion(target_t_pred.view(1), target_t) # size()をtorch.Size([1])にそろえる

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#         print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
#                .format(epoch+1, num_epochs, t+1, total_step, loss.item()))
    if (epoch+1) % 100 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, loss.item()))

Epoch [100/1000], Loss: 962.9001
Epoch [200/1000], Loss: 261.3388
Epoch [300/1000], Loss: 83.6308
Epoch [400/1000], Loss: 18.2971
Epoch [500/1000], Loss: 0.7993
Epoch [600/1000], Loss: 0.0015
Epoch [700/1000], Loss: 0.0001
Epoch [800/1000], Loss: 0.0013
Epoch [900/1000], Loss: 0.0014
Epoch [1000/1000], Loss: 0.0014


## モデルの保存

In [36]:
torch.save(lstm.state_dict(), '../../assets/trained_models/_lstm_prototype.pth')

## モデルの評価(test)

In [31]:
# Load model
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=1)
model.load_state_dict(torch.load("../../assets/trained_models/_lstm_prototype.pth"))

<All keys matched successfully>

In [32]:
# set model to evaluation mode
model.eval()

LSTM(
  (hidden_lstm): LSTM(6, 100, batch_first=True)
  (output): Linear(in_features=100, out_features=1, bias=True)
)

In [33]:
# predict
y_hat_lstm = []
with torch.no_grad():
    for feature_t, target_t in test_loader:
        y_hat = model(feature_t)
        y_hat_lstm.append(y_hat.item())

In [34]:
# to DataFrame and save as csv
y_hat_lstm = pd.Series(y_hat_lstm)

model_name = "lstm_prptotype"
y_hat_lstm.name = 'y_hat_' + model_name
y_hat_lstm.index = test_date

y_hat_lstm

決算期
2016-06-01    118.892754
2016-09-01     29.729053
2016-12-01     -9.026224
2017-03-01     70.719826
2017-06-01    118.939949
2017-09-01     57.563999
2017-12-01    100.504547
2018-03-01    104.691216
2018-06-01    120.140800
2018-09-01     57.563999
2018-12-01     52.551453
2019-03-01     96.856369
2019-06-01    120.475082
2019-09-01     57.791954
2019-12-01    118.219749
2020-03-01    109.860718
Name: y_hat_lstm_prptotype, dtype: float64

In [35]:
y_hat_lstm.to_csv('../../assets/y_hats/y_hat_' + model_name + '.csv')