# RNNのサンプル

In [118]:
import torch
import torch.nn as nn
import torch.utils.data.dataset as dataset

In [119]:
from itertools import product
from torch import optim

In [120]:
import numpy as np

In [131]:
rnn = nn.RNN(5, 5, num_layers=1, batch_first=True)
linear = nn.Linear(5, 10)

### RNNのウェイトの確認

In [127]:
for name, param in rnn.named_parameters():
    print(name, param.shape)

weight_ih_l0 torch.Size([5, 8])
weight_hh_l0 torch.Size([5, 5])
bias_ih_l0 torch.Size([5])
bias_hh_l0 torch.Size([5])


In [5]:
rnn.weight_hh_l0

Parameter containing:
tensor([[ 0.1304,  0.0484, -0.0746, -0.2373,  0.2006],
        [-0.2036,  0.4044,  0.2243, -0.0367, -0.1840],
        [ 0.1824,  0.1060, -0.4190,  0.1855,  0.1987],
        [-0.2924, -0.1629, -0.1887,  0.4057,  0.1515],
        [-0.1252,  0.3730, -0.2733, -0.4062, -0.3863]], requires_grad=True)

In [128]:
rnn._flat_weights_names

['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0']

## RNNのウェイト

In [129]:
_rnn = nn.RNN(5, 5, num_layers=2, batch_first=True)
#linear = nn.Linear(5, 10)

In [130]:
_rnn._flat_weights_names

['weight_ih_l0',
 'weight_hh_l0',
 'bias_ih_l0',
 'bias_hh_l0',
 'weight_ih_l1',
 'weight_hh_l1',
 'bias_ih_l1',
 'bias_hh_l1']

- バッチサイズ: 3
- 時系列の時間: 4
- 一回の入力の次元: 2
の場合で出力

In [134]:
_x = np.arange(60).reshape(3, 4, 5) # mini batch数
x = torch.tensor(_x).float()

In [135]:
rnn(x)
# 入力, 中間状態

(tensor([[[ 0.8440, -0.4665, -0.2097,  0.9931,  0.9468],
          [ 0.9982, -0.8589, -0.4894,  1.0000,  0.9970],
          [ 1.0000, -0.9728, -0.9148,  1.0000,  1.0000],
          [ 1.0000, -0.9945, -0.9902,  1.0000,  1.0000]],
 
         [[ 1.0000, -0.9981, -0.9999,  1.0000,  1.0000],
          [ 1.0000, -0.9997, -0.9999,  1.0000,  1.0000],
          [ 1.0000, -0.9999, -1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000]],
 
         [[ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000]]],
        grad_fn=<TransposeBackward1>),
 tensor([[[ 1.0000, -0.9945, -0.9902,  1.0000,  1.0000],
          [ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000, -1.0000,  1.0000,  1.0000]]],
        grad_fn=<StackBackward>))

In [136]:
rnn(x)[0].shape

torch.Size([3, 4, 5])

In [145]:
list(product([0, 1, 2, 3, 4], [0, 1, 2, 3, 4]))

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (4, 0),
 (4, 1),
 (4, 2),
 (4, 3),
 (4, 4)]

In [144]:
torch.eye(10)[3]
# 3の位置が立つone-hot

tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

In [143]:
torch.eye(5)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

## 足し算のデータセット

In [146]:
class PlusDataset(dataset.Dataset):
    def __init__(self):
        super(PlusDataset).__init__()
        self.dataset  = list(product([0, 1,2,3,4], [0, 1, 2, 3, 4]))
        
    def __len__(self):
        return len(self.dataset)
        
    def __getitem__(self, idx):
        temp = torch.eye(5)
        inp = self.dataset[idx]
        out = inp[0] + inp[1]
        inp = torch.cat((temp[inp[0]], temp[inp[1]]),0).view(2, -1)
        inp = inp.float()
        
        
        return inp, torch.tensor(out)

In [147]:
data = PlusDataset()

In [150]:
data[24]

(tensor([[0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.]]),
 tensor(8))

In [152]:
class MathRNN(nn.Module):
    def __init__(self):
        super(MathRNN, self).__init__()
        self.rnn = nn.RNN(5, 5, num_layers=1, batch_first=True)
        self.l = nn.Linear(5, 10)
        
    def forward(self, x):
        pred, hidden = self.rnn(x)
        h = pred[:, -1]
        return self.l(h)

In [153]:
rnn = MathRNN()

In [154]:
dataloader = torch.utils.data.DataLoader(data, batch_size=5)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(rnn.parameters(), lr=0.1)

## 足し算の学習

In [155]:
for i in range(1000):
    for x, y in dataloader:
        optimizer.zero_grad()
        rnn.train()
        h = rnn(x)
        loss = criterion(h, y)
        loss.backward()
        optimizer.step()
    if i % 100 == 0:
        print(loss) 
        #writer.add_scalar('Loss/train', loss, i)

tensor(2.3150, grad_fn=<NllLossBackward>)
tensor(1.1440, grad_fn=<NllLossBackward>)
tensor(0.7044, grad_fn=<NllLossBackward>)
tensor(0.5135, grad_fn=<NllLossBackward>)
tensor(0.4167, grad_fn=<NllLossBackward>)
tensor(0.3278, grad_fn=<NllLossBackward>)
tensor(0.2687, grad_fn=<NllLossBackward>)
tensor(0.2365, grad_fn=<NllLossBackward>)
tensor(0.2190, grad_fn=<NllLossBackward>)
tensor(0.2081, grad_fn=<NllLossBackward>)


In [156]:
for x, y in dataloader:
    h = rnn(x)
    print("pred", torch.argmax(h, dim=1))
    print("ans", y)

pred tensor([0, 1, 2, 3, 4], grad_fn=<NotImplemented>)
ans tensor([0, 1, 2, 3, 4])
pred tensor([1, 2, 3, 4, 5], grad_fn=<NotImplemented>)
ans tensor([1, 2, 3, 4, 5])
pred tensor([2, 3, 4, 5, 6], grad_fn=<NotImplemented>)
ans tensor([2, 3, 4, 5, 6])
pred tensor([3, 4, 5, 6, 8], grad_fn=<NotImplemented>)
ans tensor([3, 4, 5, 6, 7])
pred tensor([4, 5, 6, 7, 7], grad_fn=<NotImplemented>)
ans tensor([4, 5, 6, 7, 8])


In [481]:
x

tensor([[[0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 1., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 1., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.]]])

In [426]:
torch.argmax(h,dim=1)

tensor([0, 1, 2, 4, 4], grad_fn=<NotImplemented>)

In [427]:
y

tensor([0, 1, 2, 3, 4])

In [414]:
h

tensor([[ 0.3659, -0.2023,  0.0612, -0.1573,  0.6058,  0.2608, -0.0274,  0.6628,
          0.0113, -0.6395],
        [ 0.0879,  0.2451,  0.1780, -0.0040,  0.5666,  0.5742,  0.1723,  0.3757,
         -0.4286, -0.3901],
        [-0.4286,  0.4608, -0.1104, -0.1323,  0.4608,  0.5899,  0.5407,  0.5890,
         -0.1533, -0.2838],
        [-0.3141,  0.0639, -0.3741, -0.2679,  0.5420,  0.2932,  0.4965,  0.9807,
          0.3498, -0.5635],
        [-0.2892, -0.0716, -0.4387, -0.3085,  0.5471,  0.1944,  0.4602,  1.0667,
          0.4795, -0.6223]], grad_fn=<AddmmBackward>)

In [369]:
x

tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])

In [370]:
torch.argmax(pred[:, 1],dim=1)

tensor([4, 5, 5, 6, 6], grad_fn=<NotImplemented>)

In [376]:
x

tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])

In [375]:
for x, y in dataloader:
    pred, h = rnn(x)
    print("pred", torch.argmax(pred[:, 1],dim=1))
    print("ans", y)

pred tensor([4, 4, 4, 4, 4], grad_fn=<NotImplemented>)
ans tensor([0, 1, 2, 3, 4])
pred tensor([4, 4, 4, 4, 5], grad_fn=<NotImplemented>)
ans tensor([1, 2, 3, 4, 5])
pred tensor([3, 4, 3, 3, 6], grad_fn=<NotImplemented>)
ans tensor([2, 3, 4, 5, 6])
pred tensor([4, 5, 5, 5, 6], grad_fn=<NotImplemented>)
ans tensor([3, 4, 5, 6, 7])
pred tensor([4, 5, 5, 6, 6], grad_fn=<NotImplemented>)
ans tensor([4, 5, 6, 7, 8])


In [371]:
y

tensor([4, 5, 6, 7, 8])

In [344]:
loss

tensor(2.4156, grad_fn=<NllLossBackward>)

In [321]:
y

tensor([0, 1, 2, 3, 4, 1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 3, 4, 5, 6, 7, 4, 5, 6, 7,
        8])

In [309]:
h.shape

torch.Size([1, 2, 10])

In [306]:
pred.shape

torch.Size([25, 2, 10])

## 演習
- RNNをMSEで実装せよ
  - なかなかうまくいかないことを確認して欲しい
- あまりうまくいかなかったので改善

# RNNの計算グラフ

In [559]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [560]:
writer.add_graph(rnn, x)

In [557]:
write.close()

In [558]:
x

tensor([[[0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 1., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 1., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.]]])

In [551]:
rnn

MathRNN(
  (rnn): RNN(5, 5, batch_first=True)
  (l): Linear(in_features=5, out_features=10, bias=True)
)

# LSTM

In [492]:
lstm = nn.LSTM(5, 5, num_layers=1, batch_first=True)
l2 = nn.Linear(5, 10)

In [522]:
class MathLSTM(nn.Module):
    def __init__(self):
        super(MathLSTM, self).__init__()
        self.lstm = nn.LSTM(5, 5, num_layers=1, batch_first=True)
        self.l = nn.Linear(5, 10)
        
    def forward(self, x):
        pred, hidden = self.lstm(x)
        h = pred[:, -1]
        return self.l(h)

In [523]:
lstm = MathLSTM()

In [524]:
dataloader = torch.utils.data.DataLoader(data, batch_size=5)
criterion = nn.CrossEntropyLoss()
# MSEで学習させたらひどいことになった
optimizer = optim.SGD(lstm.parameters(), lr=0.1)

In [544]:
for i in range(1000):
    for x, y in dataloader:
        optimizer.zero_grad()
        lstm.train()
        pred = lstm(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
    if i % 1000 == 0:
        print(loss) 

tensor(0.0007, grad_fn=<NllLossBackward>)


In [545]:
for x, y in dataloader:
    pred  = lstm(x)
    print("pred", torch.argmax(pred, dim=1))
    print("ans", y)

pred tensor([0, 1, 2, 3, 4], grad_fn=<NotImplemented>)
ans tensor([0, 1, 2, 3, 4])
pred tensor([1, 2, 3, 4, 5], grad_fn=<NotImplemented>)
ans tensor([1, 2, 3, 4, 5])
pred tensor([2, 3, 4, 5, 6], grad_fn=<NotImplemented>)
ans tensor([2, 3, 4, 5, 6])
pred tensor([3, 4, 5, 6, 7], grad_fn=<NotImplemented>)
ans tensor([3, 4, 5, 6, 7])
pred tensor([4, 5, 6, 7, 8], grad_fn=<NotImplemented>)
ans tensor([4, 5, 6, 7, 8])


In [597]:
_x = torch.cat([torch.tensor([[1, 2], [5,6]]), torch.tensor([[3,4], [7,8]])]).view(2, 2, 2)
torch.transpose(_x, 0, 1)

tensor([[5, 6],
        [7, 8]])

## NaiveなRNNの実装

In [30]:
import torch.nn.functional as F

In [50]:
class NaiveRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNNCell, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        
    def forward(self, inp, hidden):
        return F.relu(self.h2h(hidden) + self.i2h(inp))

In [55]:
class NaiveRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.hidden = torch.zeros(hidden_size)
        self.rnn_cell = NaiveRNNCell(input_size, hidden_size)
        
    def forward(self, inputs):
        outputs = []
        seq_len = inputs.shape[1]
        batch_size = inputs.shape[0]
        for i in range(seq_len):
            inp = inputs[:, i]
            hidden = self.rnn_cell(inp, self.hidden)
            outputs.append(self.hidden)
        
        _y = torch.cat(outputs).view(seq_len, batch_size, -1)
        return torch.transpose(_y, 0, 1)   

In [83]:
class NaiveRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.hidden = torch.zeros(hidden_size)
        
    def forward(self, inputs):
        outputs = [self.hidden]
        seq_len = inputs.shape[1]
        batch_size = inputs.shape[0]
        for i in range(seq_len):
            inp = inputs[:, i]
            hidden = F.relu(self.h2h(outputs[-1]) + self.i2h(inp))
            outputs.append(hidden)
        
        _y = torch.cat(outputs[1:]).view(seq_len, batch_size, -1)
        return torch.transpose(_y, 0, 1)

In [159]:
class NaiveRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        
    def forward(self, inputs):
        hidden = torch.zeros(self.hidden_size)
        outputs = []
        seq_len = inputs.shape[1]
        batch_size = inputs.shape[0]
        for i in range(seq_len):
            inp = inputs[:, i]
            hidden = F.relu(self.h2h(hidden) + self.i2h(inp))
            outputs.append(hidden)
        _y = torch.cat(outputs).view(seq_len, batch_size, -1)
        # batch, seq, dimに変形
        return torch.transpose(_y, 0, 1)

In [160]:
class NMath(nn.Module):
    def __init__(self):
        super(NMath, self).__init__()
        self.rnn = NaiveRNN(5, 5)
        self.l = nn.Linear(5, 10)
        
    def forward(self, x):
        pred = self.rnn(x)
        h = pred[:, -1]
        return self.l(h)

In [161]:
n_rnn = NaiveRNN(5, 5)

In [162]:
n_rnn(x)

tensor([[[0.0000, 0.3257, 0.0000, 0.0000, 0.1836],
         [0.1489, 0.2018, 0.0024, 0.2134, 0.0000]],

        [[0.0000, 0.3257, 0.0000, 0.0000, 0.1836],
         [0.2429, 0.0000, 0.0000, 0.7218, 0.0000]],

        [[0.0000, 0.3257, 0.0000, 0.0000, 0.1836],
         [0.2851, 0.0000, 0.0000, 0.0304, 0.0000]],

        [[0.0000, 0.3257, 0.0000, 0.0000, 0.1836],
         [0.4303, 0.2198, 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.3257, 0.0000, 0.0000, 0.1836],
         [0.0000, 0.3445, 0.0000, 0.0000, 0.1162]]],
       grad_fn=<TransposeBackward0>)

In [104]:
x.shape

torch.Size([5, 2, 5])

In [163]:
n_math  = NMath()

In [164]:
n_optimizer = optim.SGD(n_math.parameters(), lr=0.1)

In [107]:
import time#time関数を取得

start = time.time()#開始時刻

In [165]:
start = time.time()#開始時刻

for i in range(1000):
    for x, y in dataloader:
        n_optimizer.zero_grad()
        n_math.train()
        h = n_math(x)
        loss = criterion(h, y)
        loss.backward()
        n_optimizer.step()
    if i % 100 == 0:
        print(loss)
        #writer.add_scalar('Loss/train', loss, i)

end = time.time()#終了時刻
print(end-start, '秒')#終了時刻-開始時刻でかかった時間

tensor(2.2062, grad_fn=<NllLossBackward>)
tensor(1.5152, grad_fn=<NllLossBackward>)
tensor(0.7306, grad_fn=<NllLossBackward>)
tensor(0.3185, grad_fn=<NllLossBackward>)
tensor(0.1271, grad_fn=<NllLossBackward>)
tensor(0.0931, grad_fn=<NllLossBackward>)
tensor(0.0652, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.0350, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
4.145867109298706 秒


In [None]:
print(x)

In [638]:
loss

tensor(2.1491, grad_fn=<NllLossBackward>)

## NaiveなLSTM実装
- 前提 bathch, sequence, dimが入力
- initiのときの引数は input_dim, hidden_dim