In [1]:
import torch
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:
# 입력, 은닉 상태의 크기

input_size = 5
hidden_size = 8

In [3]:
# [I, am, a, student]라는 문장을 RNN에 넣는다고 가정.

# (1) inputs의 shape : (데이터의 개수-N, 문장의 길이-L, 단어 벡터의 차원-H_in) if batch_first=True
# (2) inputs의 shape : (문장의 길이-L, 데이터의 개수-N, 단어 벡터의 차원-H_in) if batch_first=False

# * 데이터의 개수 = batch_size

inputs = torch.Tensor(2, 4, 5)    # (1) case로 진행
print(inputs.shape)

torch.Size([2, 4, 5])


## 1 layer (Basic RNN)

In [4]:
# modelling - 1 layer
## 10 layers (Deep RNN)
cell = nn.RNN(input_size, hidden_size, batch_first=True)

In [5]:
# output 뽑기

# outputs : 모든 시점의 은닉 상태의 값
# hidden : 마지막 시점의 은닉 상태의 값

outputs, hidden = cell(inputs)

In [6]:
# (N, L, H_out) if batch_first=True
# (L, N, H_out) if batch_first=False

print(outputs.shape)
print(outputs)

torch.Size([2, 4, 8])
tensor([[[ 0.3067, -0.0985, -0.1506, -0.1892,  0.3761, -0.0633,  0.1175,
          -0.0425],
         [ 0.4813, -0.1452, -0.1330, -0.3016,  0.3191,  0.1238, -0.1026,
          -0.0296],
         [-1.0000,  1.0000,  1.0000, -1.0000,  1.0000,  1.0000, -1.0000,
          -1.0000],
         [ 1.0000,  1.0000,  1.0000, -1.0000, -1.0000,  1.0000, -1.0000,
           1.0000]],

        [[ 0.3067, -0.0985, -0.1506, -0.1892,  0.3761, -0.0633,  0.1175,
          -0.0425],
         [ 0.4813, -0.1452, -0.1330, -0.3016,  0.3191,  0.1238, -0.1026,
          -0.0296],
         [ 0.4965, -0.1939, -0.2288, -0.3265,  0.3176,  0.1100, -0.1071,
          -0.0180],
         [ 0.5009, -0.2010, -0.2180, -0.3479,  0.3169,  0.0953, -0.1498,
          -0.0397]]], grad_fn=<TransposeBackward1>)


In [7]:
# (num_layers, N, H_out)

print(hidden.shape)
print(hidden)

torch.Size([1, 2, 8])
tensor([[[ 1.0000,  1.0000,  1.0000, -1.0000, -1.0000,  1.0000, -1.0000,
           1.0000],
         [ 0.5009, -0.2010, -0.2180, -0.3479,  0.3169,  0.0953, -0.1498,
          -0.0397]]], grad_fn=<StackBackward>)


## 10 layers (Deep RNN)

In [8]:
# modelling - 10 layers (Deep RNN)

cell = nn.RNN(input_size, hidden_size, num_layers=10, batch_first=True)

In [9]:
# output 뽑기

# outputs : 모든 시점의 은닉 상태의 값
# hidden : 마지막 시점의 은닉 상태의 값

outputs, hidden = cell(inputs)

In [10]:
# (N, L, H_out) if batch_first=True
# (L, N, H_out) if batch_first=False

print(outputs.shape)
print(outputs)

torch.Size([2, 4, 8])
tensor([[[ 0.2962, -0.1407,  0.2966,  0.4263,  0.1446, -0.1136,  0.2729,
           0.2922],
         [ 0.4342,  0.0308,  0.4722,  0.3067,  0.2648, -0.2280,  0.3936,
           0.2017],
         [ 0.4444,  0.0978,  0.4206,  0.1936,  0.3557, -0.3841,  0.3629,
           0.1360],
         [ 0.4942,  0.1388,  0.3564,  0.1916,  0.3304, -0.3886,  0.3302,
           0.2090]],

        [[ 0.2962, -0.1407,  0.2966,  0.4263,  0.1446, -0.1136,  0.2729,
           0.2922],
         [ 0.4342,  0.0308,  0.4722,  0.3067,  0.2648, -0.2280,  0.3936,
           0.2017],
         [ 0.4445,  0.0973,  0.4222,  0.1945,  0.3551, -0.3823,  0.3634,
           0.1371],
         [ 0.4906,  0.1436,  0.3520,  0.1878,  0.3373, -0.3924,  0.3265,
           0.2045]]], grad_fn=<TransposeBackward1>)


In [11]:
# (num_layers, N, H_out)

print(hidden.shape)
print(hidden)

torch.Size([10, 2, 8])
tensor([[[-1.0000, -1.0000, -1.0000, -1.0000,  1.0000,  1.0000, -1.0000,
          -1.0000],
         [-0.1510,  0.3594, -0.1591,  0.3550, -0.0389, -0.4780,  0.4107,
           0.2962]],

        [[-0.0619,  0.4232, -0.6657, -0.0714,  0.7100, -0.2129, -0.8556,
           0.1747],
         [ 0.5073,  0.1180,  0.2232, -0.4746,  0.1202, -0.5087, -0.5746,
          -0.2706]],

        [[-0.4026, -0.2068, -0.5620,  0.2659,  0.3675,  0.6346,  0.5677,
          -0.2350],
         [-0.1220, -0.3302, -0.2359,  0.1231,  0.5168,  0.2501,  0.6149,
          -0.7460]],

        [[ 0.4548, -0.0333,  0.1240, -0.0499, -0.4204,  0.1680,  0.0921,
           0.2493],
         [ 0.5800,  0.1771,  0.0279,  0.0269, -0.2989,  0.0553, -0.2891,
           0.3899]],

        [[-0.4026, -0.8132,  0.1109, -0.3461, -0.4110, -0.5472,  0.0054,
           0.8204],
         [-0.5609, -0.8317,  0.1370, -0.3791, -0.3819, -0.6318, -0.0194,
           0.7368]],

        [[ 0.6924, -0.4067, -0.4998, 

## Bidirectional RNN

In [12]:
# modelling - Bidirectional RNN

cell = nn.RNN(input_size, hidden_size, batch_first=True, bidirectional=True)

In [13]:
# output 뽑기

# outputs : 모든 시점의 은닉 상태의 값
# hidden : 마지막 시점의 은닉 상태의 값

outputs, hidden = cell(inputs)

In [14]:
# (N, L, D * H_out) if batch_first=True
# (L, N, D * H_out) if batch_first=False

# D=2 if bidirectional=True

print(outputs.shape)
print(outputs)

torch.Size([2, 4, 16])
tensor([[[ 0.4331, -0.1366,  0.0667,  0.3345, -0.2354, -0.2794,  0.0582,
          -0.1048,  0.5471, -0.5658,  0.3570,  0.1079,  0.3587,  0.0760,
           0.2334,  0.2860],
         [ 0.4218, -0.2053, -0.1181,  0.4289, -0.3886, -0.3087,  0.2650,
           0.0508, -0.2298, -0.7240, -0.5537, -0.5189, -0.5911, -0.7698,
          -0.2365, -0.0926],
         [ 1.0000,  1.0000, -1.0000,  1.0000, -1.0000,  1.0000,  1.0000,
           1.0000, -1.0000, -1.0000,  1.0000, -1.0000,  1.0000,  1.0000,
          -1.0000, -1.0000],
         [ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000,
          -1.0000,  1.0000, -1.0000, -1.0000, -1.0000,  1.0000, -1.0000,
          -1.0000, -1.0000]],

        [[ 0.4331, -0.1366,  0.0667,  0.3345, -0.2354, -0.2794,  0.0582,
          -0.1048,  0.3613, -0.4168,  0.2051,  0.3820,  0.3476, -0.6169,
           0.0468,  0.2883],
         [ 0.4218, -0.2053, -0.1181,  0.4289, -0.3886, -0.3087,  0.2650,
           0.0508,  0.3165,

In [15]:
# (D* num_layers, N, H_out)

# D=2 if bidirectional=True

print(hidden.shape)
print(hidden)

torch.Size([2, 2, 8])
tensor([[[ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000,
          -1.0000],
         [ 0.4741, -0.2365, -0.1545,  0.4817, -0.4268, -0.2056,  0.2112,
           0.2195]],

        [[ 0.5471, -0.5658,  0.3570,  0.1079,  0.3587,  0.0760,  0.2334,
           0.2860],
         [ 0.3613, -0.4168,  0.2051,  0.3820,  0.3476, -0.6169,  0.0468,
           0.2883]]], grad_fn=<StackBackward>)


In [16]:
# 순방향

print(hidden[0].shape)
print(hidden[0])

torch.Size([2, 8])
tensor([[ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000, -1.0000],
        [ 0.4741, -0.2365, -0.1545,  0.4817, -0.4268, -0.2056,  0.2112,  0.2195]],
       grad_fn=<SelectBackward>)


In [17]:
print(hidden[0, :, :].shape)
print(hidden[0, :, :])

torch.Size([2, 8])
tensor([[ 1.0000, -1.0000,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000, -1.0000],
        [ 0.4741, -0.2365, -0.1545,  0.4817, -0.4268, -0.2056,  0.2112,  0.2195]],
       grad_fn=<SliceBackward>)


In [18]:
# 역방향

print(hidden[1].shape)
print(hidden[1])

torch.Size([2, 8])
tensor([[ 0.5471, -0.5658,  0.3570,  0.1079,  0.3587,  0.0760,  0.2334,  0.2860],
        [ 0.3613, -0.4168,  0.2051,  0.3820,  0.3476, -0.6169,  0.0468,  0.2883]],
       grad_fn=<SelectBackward>)
