## まずは`torch.nn.LSTM`の挙動チェック

In [1]:
import torch

torch.manual_seed(0)

seq_len = 100 # レコード期間
input_dim = 4 # 特徴量数
hidden_dim = 10 # hidden unit数
num_layers = 1 # rnn hidden layer数
batch_size = 1 # ミニバッチしたくないからバッチサイズ=1でいいってことだよね?(塊でやらない)

lstm = torch.nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, bias=True)

In [2]:
lstm

LSTM(4, 10)

In [3]:
# 全ウェイト数 W_x(i,f,g,o), b_x(i,f,g,o), W_h(i,f,g,o), b_h(i,f,g,o)
len(lstm.all_weights[0])

4

In [4]:
# W_x(i,f,g,o)
# (W_xi|W_xf|W_xg|W_xo) of shape (4*hidden_size, input_size)
lstm.weight_ih_l0.size() 
# lstm.weight_ih_l0 == lstm.all_weights[0][0]

torch.Size([40, 4])

In [5]:
# W_xi, shape (hidden_size, input_size)
lstm.weight_ih_l0[:hidden_dim, :].size()

torch.Size([10, 4])

In [6]:
# b_x(i,f,g,o) 
# (b_xi|b_xf|b_xg|b_xo), of shape (4*hidden_size)
lstm.bias_ih_l0.size()
# lstm.bias_ih_l0 == lstm.all_weights[0][2]

torch.Size([40])

In [7]:
# W_h(i,f,g,o)
# (W_hi|W_hf|W_hg|W_ho) of shape (4*hidden_size, input_size)
lstm.weight_hh_l0.size() 
# lstm.weight_hh_l0 == lstm.all_weights[0][1]

torch.Size([40, 10])

In [8]:
# b_h(i,f,g,o) 
# (b_hi|b_hf|b_hg|b_ho), of shape (4*hidden_size)
lstm.bias_hh_l0.size()
# lstm.bias_hh_l0 == lstm.all_weights[0][3]

torch.Size([40])

## toy data で`nn.LSTM`のoutputを確認

In [9]:
# toy data: input of shape (seq_len, batch, input_size)
x = torch.randn(seq_len, batch_size, input_dim)
x.size()

torch.Size([100, 1, 4])

In [10]:
# hidden state (lstm層のアクティベーション) of shape (num_layers, batch, hidden_size):
h_0 = torch.randn(num_layers, x.size(1), hidden_dim)
h_0.size()

torch.Size([1, 1, 10])

In [11]:
# cell state (メモリセル) of shape (num_layers, batch, hidden_size): 
c_0 = torch.randn(num_layers, x.size(1), hidden_dim)
c_0.size()

torch.Size([1, 1, 10])

In [12]:
# run forward pass of LSTM layer
output, (h_n, c_n) = lstm(x, (h_0, c_0))

In [13]:
output.size()

torch.Size([100, 1, 10])

In [14]:
# change shape of h_out inorder to input in linear layer (squeeze batch dim)
h_out = output.view(-1, hidden_dim)
h_out.size()

torch.Size([100, 10])

In [15]:
h_n.size()

torch.Size([1, 1, 10])

In [16]:
c_n.size()

torch.Size([1, 1, 10])

In [17]:
# the last row of output equals to h_n
output[-1] == h_n

tensor([[[True, True, True, True, True, True, True, True, True, True]]])

## LSTM Class オブジェクトを構築

In [48]:
class LSTM(torch.nn.Module):
    """
    long short-term memory network.
    Nso batches (batch_size=1), single hidden lstm layer.
    
    Reference :
    https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python
    https://colab.research.google.com/github/dlmacedo/starter-academic/blob/master/content/courses/deeplearning/notebooks/pytorch/Time_Series_Prediction_with_LSTM_Using_PyTorch.ipynb
    https://curiousily.com/posts/time-series-forecasting-with-lstm-for-daily-coronavirus-cases/
    https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/recurrent_neural_network/main.py#L39-L58
    """
    def __init__(self, input_dim, hidden_dim, output_dim=1, num_layers=1, batch_size=1):
        super().__init__()
        """
        Instantiate model layers.
        
        Parameters
        ----------
        input_size : int
            the number of features in the input layer
        hidden_size : int
            the number of units (neurons) in each hidden layer
            (single hidden lstm layer for now)
        output_size : int, Default: 1
            the number of dimension for the output
        
        <value changes not recomemnded>
        num_layers : int, Default: 1
            Number of recurrent layers
            (single lstm layer for now)
        batch_size : int, Default: 1
            size of batches
            (No batches for now; batch_size=1)
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        
        # the layers
        # first layer (hidden lstm layer)
        # https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
        self.hidden_lstm = torch.nn.LSTM(input_size=input_dim, 
                                  hidden_size=hidden_dim, 
                                  # num_layers=num_layers, 
                                  bias=True)
        # second layer (linear output layer)
        self.output = torch.nn.Linear(hidden_dim, 
                                      output_dim, 
                                      bias=True)

    def forward(self, x):
        """
        Set initial hidden and cell states as zeros (stateless LSTM)
        and forward propagate LSTM.

        math::
            \begin{array}{ll} \\
                i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\
                f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\
                g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\
                o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\
                c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
                h_t = o_t \odot \tanh(c_t) \\
            \end{array}
        
        :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`.
        
        (input, (h_0, c_0)) are the inputs and
        (output, (h_n, c_n)) are the returns of lstm layer for t = n lstm cell.
        
        Parameters
        ----------
        x: inputs; shape (seq_len, batch_size=1, input_dim)
            tensor containing the features of the input sequence. 
            must include batch_size(=1) dimension as dim=1
        
        Returns
        -------
        h: 
        """
        # reset_hidden_state (stateless LSTM)
        # hidden state (lstm層のアクティベーション) of shape (num_layers, batch, hidden_size):
        h_0 = torch.zeros(1 * self.num_layers, self.batch_size, self.hidden_dim)
        # cell state (メモリセル) of shape (num_layers, batch, hidden_size): 
        c_0 = torch.zeros(1 * self.num_layers, self.batch_size, self.hidden_dim)
        
        # Forward pass
        # input to hidden LSTM layer
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        h_all_time, (h_latest, c_latest) = self.hidden_lstm(x, (h_0, c_0))
        
        # change shape of h_all_time from (num_layers, batch, hidden_size) to (num_layers*batch, hidden_size)
        h = h_all_time.view(-1, self.hidden_dim)
        
        # hidden LSTM layer to output layer
        out = self.output(h)
        return out

## データの準備

In [20]:
# import external libraries
import sys
import numpy as np
import pandas as pd
import torch
import math

# import internal modules
sys.path.insert(1, '../')
from models.nn import MLP
from utils.data_editor import lag, train_test_split

# set seeds for reproductibility
np.random.seed(0)
torch.manual_seed(0)

# Prepare Data --> 関数

# read processed data
df = pd.read_csv("../../data/processed/dataset.csv")

# save column names
earning_v = df.columns[4: 10].values
account_v_bs = df.columns[11:].values
account_v_pl = df.columns[10:11].values

# y: "１株当たり利益［３ヵ月］"
y = df[earning_v[-1]]

# x: ['棚卸資産', '資本的支出', '期末従業員数', '受取手形・売掛金／売掛金及びその他の短期債権', 
#     '販売費及び一般管理費']
x = df[np.append(account_v_bs, account_v_pl)]

# Unlike statsmodel SARIMA package, NN needs to prepare lagged inputs manually if needed.
# y_lag and x_lag (lag 4 for now)
num_lag = 4
y_lag = lag(y, num_lag, drop_nan=False, reset_index=False)
x_lag = lag(x, num_lag, drop_nan=False, reset_index=False)

# Redefine data name as target (y) and feature (y_lag and x_lag)
target = y
feature = pd.concat([y_lag, x_lag], axis=1)

# time series train test split (4/5) : (1/5), yearly bases
target_train, target_test = train_test_split(target, ratio=(4,1))
feature_train, feature_test = train_test_split(feature, ratio=(4,1))

# drop nan caused by lag()
feature_train = feature_train.dropna(axis=0)
target_train = target_train[feature_train.index]

train_date = df["決算期"][target_train.index] # for plotting !!!! <-- 改善の余地あり, targetはtensorになってindexがなくなるから

# setting torch
dtype = torch.float # double float problem in layer 
device = torch.device("cpu")

# Make data to torch.tensor
target_train = torch.tensor(target_train.values, dtype=dtype)
feature_train = torch.tensor(feature_train.values, dtype=dtype)
target_test = torch.tensor(target_test.values, dtype=dtype)
feature_test = torch.tensor(feature_test.values, dtype=dtype)

In [21]:
target_train.size()

torch.Size([52])

In [22]:
feature_train.size()

torch.Size([52, 24])

## モデルのinstanciation

In [49]:
seq_len = 100 # (バッチなしなので総)レコード期間(ラグ引いた後の長さ?sliding window sets)
input_dim = feature_train.size()[1] # 特徴量数
hidden_dim = 10 # hidden unit数
num_layers = 1 # rnn hidden layer数
batch_size = 1 # ミニバッチしたくないからバッチサイズ=1でいいってことだよね?(塊でやらない)

lstm = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=1)
lstm

LSTM(
  (hidden_lstm): LSTM(24, 10)
  (output): Linear(in_features=10, out_features=1, bias=True)
)

## モデルの学習・訓練

In [54]:
learning_rate = 0.001
num_epochs = 10

# Loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

total_step = len(feature_train) # バッチなしだと時系列の長さseq_lenと同じ?
for epoch in range(num_epochs):
    for t in range(total_step):
        # LSTMはtを1つずつ進めて学習?
        feature_t = feature_train[t] # x_t (1 x D_x) vector
        # torch.nn.LSTM() に渡すために変形 input: (seq_len, batch, input_size)
        feature_t = feature_t.view(seq_len, 1, input_dim)
        target_t = target_train[t]

        # Forward pass
        target_t_pred = lstm(feature_t)
        loss = criterion(target_t_pred, target_t)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
               .format(epoch+1, num_epochs, t+1, total_step, loss.item()))

RuntimeError: shape '[100, 1, 24]' is invalid for input of size 24

In [None]:
# Debug

In [35]:
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=1)

In [39]:
feature_train.size()

torch.Size([52, 24])

In [38]:
model.forward(feature_train[0])

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [41]:
h_0 = torch.zeros(1 * 1, 1, 24)
c_0 = torch.zeros(1 * 1, 1, 24)

In [44]:
h_0.size(), c_0.size()

(torch.Size([1, 1, 24]), torch.Size([1, 1, 24]))

In [55]:
model.hidden_lstm(feature_train[0], (h_0, c_0))

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')