In [5]:
import torch 
import torch.nn as nn
input = torch.randn(5, 3, 10)

rnn_origin = nn.RNN(10, 20, 2, batch_first=False, bidirectional=True)
output, hn = rnn_origin(input)

print(output.size()) # [L, N, H_out]
print(hn.size())   # [num_layers, N, H_out]

# input2 = torch.randn(5, 3, 10)
# rnn_origin = nn.RNN(10, 20, 2)
# output, hn = rnn_origin(input2)
# # rnn_origin = nn.RNN(20, 20, 1, batch_first=True)
# # output, hn = rnn_origin(output)
# print(output.size()) # [batch_size, seq_length, h_out]
# print(hn.size())   # [num_layers, batch_size, h_out]



torch.Size([5, 3, 40])
torch.Size([4, 3, 20])


In [2]:
import torch 
from nlp.models.rnn import RNN

input = torch.randn(5, 3, 10) # -> [L, N, H_in]

rnn_custom = RNN(input_size=10, hidden_size=20, num_layers=2, batch_first=False, bidirectional=True)
output, hn = rnn_custom(input)
print(f"output: {output.size()}")
print(f"hn: {hn.size()}")

rnn_custom = RNN(input_size=10, hidden_size=20, num_layers=2, batch_first=True, bidirectional=True)
output, hn = rnn_custom(input)
print(f"output: {output.size()}")
print(f"hn: {hn.size()}")


# 차원이 바뀌는 지점에서 차원 기입
# 1. batch_first = False, Bidirectional = True
# 2. batch_first = True, Bidirectional = True



batch_size: 3
sequence_size: 5
output: torch.Size([5, 3, 40])
hn: torch.Size([4, 3, 20])
batch_size: 5
sequence_size: 3
output: torch.Size([5, 3, 40])
hn: torch.Size([4, 5, 20])


In [4]:
lst = [1,2,3,4,5]
for i in range(len(lst)):
    print(f"forward : {lst[i]}")
    print(f"backward : {lst[-(i+1)]}")

forward : 1
backward : 5
forward : 2
backward : 4
forward : 3
backward : 3
forward : 4
backward : 2
forward : 5
backward : 1


In [19]:
from collections import Counter

counter = Counter([1,2,3,4,1,1,1,2,2,2,])
counter[1]

4

In [1]:
import torch 
import torch.nn as nn
from nlp.models.rnn import RNN
input = torch.randn(5, 3, 10)

rnn_origin = nn.RNN(10, 20, 2, batch_first=False, bidirectional=True)
output, hn = rnn_origin(input)

rnn_custom = RNN(input_size=10, hidden_size=20, num_layers=2, batch_first=False, bidirectional=True)
output, hn = rnn_custom(input)
print(f"output: {output.size()}")
print(f"hn: {hn.size()}")

batch_size: 3
sequence_size: 5
output: torch.Size([5, 3, 40])
hn: torch.Size([4, 3, 20])


In [7]:
from nlp.models.rnn import RNNCell
rnn_cell = RNNCell(10, 20)

for name, state in rnn_cell.state_dict().items():
    print(f"{name} -> size: {state.shape}")

ih.weight -> size: torch.Size([20, 10])
ih.bias -> size: torch.Size([20])
hh.weight -> size: torch.Size([20, 20])
hh.bias -> size: torch.Size([20])


In [18]:
import torch 
import torch.nn as nn

input = torch.randn(5, 3, 10)
lstm_origin = nn.LSTM(10, 20, 2, batch_first=False, bidirectional=True)
output, hn = lstm_origin(input)
print(output.size()) # [batch_size, seq_length, h_out]
print(hn[0].size())   # [num_layers, batch_size, h_out]
print(hn[1].size())   # [num_layers, batch_size, h_out]


torch.Size([5, 3, 40])
torch.Size([4, 5, 20])
torch.Size([4, 5, 20])


In [20]:
import torch
from nlp.models.lstm import LSTM
input = torch.randn(5, 3, 10)
lstm_custom = LSTM(10, 20, 2, batch_first=False, bidirectional=True)
output, hn = lstm_custom(input)
print(output.size()) # [batch_size, seq_length, h_out]
print(hn[0].size())   # [num_layers, batch_size, h_out]
print(hn[1].size())   # [num_layers, batch_size, h_out]

torch.Size([5, 3, 40])
torch.Size([4, 3, 20])
torch.Size([4, 3, 20])


In [4]:
import torch
from torch import Tensor

def get_look_ahead_mask(dec_input: Tensor) -> Tensor:
    """look ahead mask 생성 함수

    자기 자신보다 미래에 있는 단어들을 참고할 수 없도록 마스킹하는 함수

    Args:
        dec_input (Tensor): 입력문장, [batch_size, seq_len] 

    Returns:
        Tensor: _description_
    """
    look_ahead_mask = (
        torch.ones_like(dec_input) # [bs, seq_len]
        .unsqueeze(-1) # [bs, seq_len, 1]
        .expand(dec_input.size(0), dec_input.size(1), dec_input.size(1))
    )  # => [batch_size, seq_len, seq_len]
    look_ahead_mask = look_ahead_mask.triu(
        diagonal=1
    )  # upper triangular part of a matrix(2-D) => [batch_size, seq_len, seq_len]
    return look_ahead_mask.eq(1)


tensor = torch.tensor([
    [1, 2, 4, 5, 3, 3, 3],
    [34, 21, 23, 2, 4, 3, 3]
    ]) # => [2, 7]
# idx 3 : padding id 

print(tensor.size())
output = get_look_ahead_mask(tensor)
print(output)
print(output.size())

torch.Size([2, 7])
tensor([[[False,  True,  True,  True,  True,  True,  True],
         [False, False,  True,  True,  True,  True,  True],
         [False, False, False,  True,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False, False,  True],
         [False, False, False, False, False, False, False]],

        [[False,  True,  True,  True,  True,  True,  True],
         [False, False,  True,  True,  True,  True,  True],
         [False, False, False,  True,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False, False,  True],
         [False, False, False, False, False, False, False]]])
torch.Size([2, 7, 7])


In [5]:
def get_padding_mask(inputs: Tensor, padding_id: int) -> Tensor:
    """padding token에 mask를 씌우는 함수 

    Args:
        input_tensor (Tensor): 입력문장, [batch_size, seq_len] 
        padding_id (int): padding id 

    Returns:
        Tensor: 입력문장 padding 포함여부 [batch_size, seq_len, seq_len]
    """
    pad_attn_mask = inputs.data.eq(padding_id).unsqueeze(1)  # => [batch_size, 1, len_k]  True / False
    return pad_attn_mask.expand(
        inputs.size(0), inputs.size(1), inputs.size(1)
    ).contiguous()


tensor = torch.tensor([
    [1, 2, 4, 5, 3, 3, 3],
    [34, 21, 23, 2, 4, 3, 3]
    ])
print(tensor.size())
output = get_padding_mask(tensor, 3)
print(output)
print(output.size())

torch.Size([2, 7])
tensor([[[False, False, False, False,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False,  True,  True,  True],
         [False, False, False, False,  True,  True,  True]],

        [[False, False, False, False, False,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False,  True,  True],
         [False, False, False, False, False,  True,  True]]])
torch.Size([2, 7, 7])


In [13]:
def get_position(inputs: Tensor) -> Tensor:
    position = (
        torch.arange(
            inputs.size(1), device=inputs.device, dtype=inputs.dtype
        )
        .expand(inputs.size(0), inputs.size(1))
        .contiguous()
    )  # -> [bs, max_seq_size]
    return position

tensor = torch.tensor([
    [1, 2, 4, 5, 3, 3, 3],
    [34, 21, 23, 2, 4, 3, 3]
    ])
print(tensor.size())
output = get_position(tensor)
print(output)
print(output.size())

torch.Size([2, 7])
tensor([[0, 1, 2, 3, 4, 5, 6],
        [0, 1, 2, 3, 4, 5, 6]])
torch.Size([2, 7])
