# RNNのサンプル

In [23]:
import torch
import torch.nn as nn
import  torch.utils.data.dataset as dataset

In [37]:
from itertools import product
from torch import optim

In [10]:
import numpy as np

In [645]:
rnn = nn.RNN(5, 5, num_layers=1, batch_first=True)
linear = nn.Linear(5, 10)

### RNNのウェイトの確認

In [646]:
rnn.weight_hh_l0

Parameter containing:
tensor([[-0.2343,  0.3293, -0.0650, -0.0586, -0.1384],
        [-0.1853,  0.2573,  0.4297, -0.0663, -0.3710],
        [ 0.3644,  0.2861, -0.0428,  0.3978, -0.3781],
        [-0.4281, -0.0053,  0.4115,  0.4333,  0.3549],
        [ 0.1766,  0.2981,  0.3812,  0.0575,  0.0387]], requires_grad=True)

In [435]:
rnn._flat_weights_names

['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0']

- バッチサイズ: 3
- 時系列の時間: 4
- 一回の入力の次元: 2
の場合で出力

In [436]:
_x = np.arange(60).reshape(3, 4, 5) # mini batch数
x = torch.tensor(_x).float()

In [437]:
rnn(x)
# 入力, 中間状態

(tensor([[[ 0.6747, -0.5906,  0.9993,  0.9918,  0.9502],
          [ 0.9688, -0.9860,  1.0000,  1.0000,  0.9965],
          [ 0.9964, -0.9995,  1.0000,  1.0000,  0.9998],
          [ 0.9996, -1.0000,  1.0000,  1.0000,  1.0000]],
 
         [[ 0.9999, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000]],
 
         [[ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000]]],
        grad_fn=<TransposeBackward1>),
 tensor([[[ 0.9996, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000],
          [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000]]],
        grad_fn=<StackBackward>))

In [188]:
torch.eye(10)[3]

tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

## 足し算の教育

In [456]:
class PlusDataset(dataset.Dataset):
    def __init__(self):
        super(PlusDataset).__init__()
        self.dataset  = list(product([0, 1,2,3,4], [0, 1, 2, 3, 4]))
        
    def __len__(self):
        return len(self.dataset)
        
    def __getitem__(self, idx):
        temp = torch.eye(5)
        inp = self.dataset[idx]
        out = inp[0] + inp[1]
        inp = torch.cat((temp[inp[0]], temp[inp[1]]),0).view(2, -1)
        inp = inp.float()
        
        
        return inp, torch.tensor(out)

In [457]:
data = PlusDataset()

In [440]:
data[0]

(tensor([[1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.]]),
 tensor(0))

In [536]:
class MathRNN(nn.Module):
    def __init__(self):
        super(MathRNN, self).__init__()
        self.rnn = nn.RNN(5, 5, num_layers=1, batch_first=True)
        self.l = nn.Linear(5, 10)
        
    def forward(self, x):
        pred, hidden = self.rnn(x)
        h = pred[:, -1]
        return self.l(h)

In [537]:
rnn = MathRNN()

In [538]:
dataloader = torch.utils.data.DataLoader(data, batch_size=5)
criterion = nn.CrossEntropyLoss()
# MSEで学習させたらひどいことになった
optimizer = optim.SGD(rnn.parameters(), lr=0.1)

## 足し算の学習

In [561]:
for i in range(1000):
    for x, y in dataloader:
        optimizer.zero_grad()
        rnn.train()
        h = rnn(x)
        loss = criterion(h, y)

        loss.backward()
        optimizer.step()
    if i % 100 == 0:
        print(loss) 
        writer.add_scalar('Loss/train', loss, i)

tensor(0.0060, grad_fn=<NllLossBackward>)
tensor(0.0060, grad_fn=<NllLossBackward>)
tensor(0.0059, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBackward>)
tensor(0.0057, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.0055, grad_fn=<NllLossBackward>)
tensor(0.0055, grad_fn=<NllLossBackward>)


In [548]:
for x, y in dataloader:
    h = rnn(x)
    print("pred", torch.argmax(h, dim=1))
    print("ans", y)

pred tensor([0, 1, 2, 3, 4], grad_fn=<NotImplemented>)
ans tensor([0, 1, 2, 3, 4])
pred tensor([1, 2, 3, 4, 5], grad_fn=<NotImplemented>)
ans tensor([1, 2, 3, 4, 5])
pred tensor([2, 3, 4, 5, 6], grad_fn=<NotImplemented>)
ans tensor([2, 3, 4, 5, 6])
pred tensor([3, 4, 5, 6, 7], grad_fn=<NotImplemented>)
ans tensor([3, 4, 5, 6, 7])
pred tensor([4, 5, 6, 7, 8], grad_fn=<NotImplemented>)
ans tensor([4, 5, 6, 7, 8])


In [481]:
x

tensor([[[0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 1., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 1., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.]]])

In [426]:
torch.argmax(h,dim=1)

tensor([0, 1, 2, 4, 4], grad_fn=<NotImplemented>)

In [427]:
y

tensor([0, 1, 2, 3, 4])

In [414]:
h

tensor([[ 0.3659, -0.2023,  0.0612, -0.1573,  0.6058,  0.2608, -0.0274,  0.6628,
          0.0113, -0.6395],
        [ 0.0879,  0.2451,  0.1780, -0.0040,  0.5666,  0.5742,  0.1723,  0.3757,
         -0.4286, -0.3901],
        [-0.4286,  0.4608, -0.1104, -0.1323,  0.4608,  0.5899,  0.5407,  0.5890,
         -0.1533, -0.2838],
        [-0.3141,  0.0639, -0.3741, -0.2679,  0.5420,  0.2932,  0.4965,  0.9807,
          0.3498, -0.5635],
        [-0.2892, -0.0716, -0.4387, -0.3085,  0.5471,  0.1944,  0.4602,  1.0667,
          0.4795, -0.6223]], grad_fn=<AddmmBackward>)

In [369]:
x

tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])

In [370]:
torch.argmax(pred[:, 1],dim=1)

tensor([4, 5, 5, 6, 6], grad_fn=<NotImplemented>)

In [376]:
x

tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])

In [375]:
for x, y in dataloader:
    pred, h = rnn(x)
    print("pred", torch.argmax(pred[:, 1],dim=1))
    print("ans", y)

pred tensor([4, 4, 4, 4, 4], grad_fn=<NotImplemented>)
ans tensor([0, 1, 2, 3, 4])
pred tensor([4, 4, 4, 4, 5], grad_fn=<NotImplemented>)
ans tensor([1, 2, 3, 4, 5])
pred tensor([3, 4, 3, 3, 6], grad_fn=<NotImplemented>)
ans tensor([2, 3, 4, 5, 6])
pred tensor([4, 5, 5, 5, 6], grad_fn=<NotImplemented>)
ans tensor([3, 4, 5, 6, 7])
pred tensor([4, 5, 5, 6, 6], grad_fn=<NotImplemented>)
ans tensor([4, 5, 6, 7, 8])


In [371]:
y

tensor([4, 5, 6, 7, 8])

In [344]:
loss

tensor(2.4156, grad_fn=<NllLossBackward>)

In [321]:
y

tensor([0, 1, 2, 3, 4, 1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 3, 4, 5, 6, 7, 4, 5, 6, 7,
        8])

In [309]:
h.shape

torch.Size([1, 2, 10])

In [306]:
pred.shape

torch.Size([25, 2, 10])

## 演習
- RNNをMSEで実装せよ
  - なかなかうまくいかないことを確認して欲しい
- あまりうまくいかなかったので改善

# RNNの計算グラフ

In [559]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [560]:
writer.add_graph(rnn, x)

In [557]:
write.close()

In [558]:
x

tensor([[[0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 1., 0., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 1., 0., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.]],

        [[0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.]]])

In [551]:
rnn

MathRNN(
  (rnn): RNN(5, 5, batch_first=True)
  (l): Linear(in_features=5, out_features=10, bias=True)
)

# LSTM

In [492]:
lstm = nn.LSTM(5, 5, num_layers=1, batch_first=True)
l2 = nn.Linear(5, 10)

In [522]:
class MathLSTM(nn.Module):
    def __init__(self):
        super(MathLSTM, self).__init__()
        self.lstm = nn.LSTM(5, 5, num_layers=1, batch_first=True)
        self.l = nn.Linear(5, 10)
        
    def forward(self, x):
        pred, hidden = self.lstm(x)
        h = pred[:, -1]
        return self.l(h)

In [523]:
lstm = MathLSTM()

In [524]:
dataloader = torch.utils.data.DataLoader(data, batch_size=5)
criterion = nn.CrossEntropyLoss()
# MSEで学習させたらひどいことになった
optimizer = optim.SGD(lstm.parameters(), lr=0.1)

In [544]:
for i in range(1000):
    for x, y in dataloader:
        optimizer.zero_grad()
        lstm.train()
        pred = lstm(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
    if i % 1000 == 0:
        print(loss) 

tensor(0.0007, grad_fn=<NllLossBackward>)


In [545]:
for x, y in dataloader:
    pred  = lstm(x)
    print("pred", torch.argmax(pred, dim=1))
    print("ans", y)

pred tensor([0, 1, 2, 3, 4], grad_fn=<NotImplemented>)
ans tensor([0, 1, 2, 3, 4])
pred tensor([1, 2, 3, 4, 5], grad_fn=<NotImplemented>)
ans tensor([1, 2, 3, 4, 5])
pred tensor([2, 3, 4, 5, 6], grad_fn=<NotImplemented>)
ans tensor([2, 3, 4, 5, 6])
pred tensor([3, 4, 5, 6, 7], grad_fn=<NotImplemented>)
ans tensor([3, 4, 5, 6, 7])
pred tensor([4, 5, 6, 7, 8], grad_fn=<NotImplemented>)
ans tensor([4, 5, 6, 7, 8])


In [597]:
_x = torch.cat([torch.tensor([[1, 2], [5,6]]), torch.tensor([[3,4], [7,8]])]).view(2, 2, 2)
torch.transpose(_x, 0, 1)

tensor([[5, 6],
        [7, 8]])

## NaiveなRNNの実装

In [575]:
import torch.nn.functional as F

In [642]:
class NaiveRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNNCell, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        
    def forward(self, inp, hidden):
        return F.relu(self.h2h(hidden) + self.i2h(inp))

In [643]:
class NaiveRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.hidden = torch.zeros(hidden_size)
        self.rnn_cell = NaiveRNNCell(input_size, hidden_size)
        
    def forward(self, inputs):
        outputs = []
        seq_len = inputs.shape[1]
        batch_size = inputs.shape[0]
        for i in range(seq_len):
            inp = inputs[:, i]
            self.hidden = self.rnn_cell(inp, self.hidden)
            outputs.append(self.hidden)
        
        _y = torch.cat(outputs).view(seq_len, batch_size, -1)
        return torch.transpose(_y, 0, 1)   

In [607]:
class NaiveRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NaiveRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.hidden = torch.zeros(hidden_size)
        
    def forward(self, inputs):
        outputs = []
        seq_len = inputs.shape[1]
        batch_size = inputs.shape[0]
        for i in range(seq_len):
            inp = inputs[:, i]
            self.hidden = F.relu(self.h2h(self.hidden) + self.i2h(inp))
            outputs.append(self.hidden)
        
        _y = torch.cat(outputs).view(seq_len, batch_size, -1)
        return torch.transpose(_y, 0, 1)

In [633]:
class NMath(nn.Module):
    def __init__(self):
        super(NMath, self).__init__()
        self.rnn = NaiveRNN(5, 5)
        self.l = nn.Linear(5, 10)
        
    def forward(self, x):
        pred = self.rnn(x)
        h = pred[:, -1]
        return self.l(h)

In [634]:
n_rnn = NaiveRNN(5, 5)

In [636]:
n_rnn(x).shape

torch.Size([5, 2, 5])

In [609]:
x.shape

torch.Size([5, 2, 5])

In [614]:
n_math  = NMath()

In [615]:
n_optimizer = optim.SGD(n_math.parameters(), lr=0.1)

In [647]:
for i in range(1000):
    for x, y in dataloader:
        optimizer.zero_grad()
        n_math.train()
        h = n_math(x)
        loss = criterion(h, y)
        loss.backward(retain_graph=True)
        optimizer.step()
    if i % 100 == 0:
        print(loss) 
        writer.add_scalar('Loss/train', loss, i)

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [641]:
loss.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [638]:
loss

tensor(2.1491, grad_fn=<NllLossBackward>)

## NaiveなLSTM実装
- 前提 bathch, sequence, dimが入力
- initiのときの引数は input_dim, hidden_dim