In [1]:
import torch
import torch.nn as nn

In [2]:
# 입력 (3단어 시퀀스, 임베딩 차원 = 4)
x3 = torch.randn(3,1,4) #(seq_len, batch_size, input_size)

In [4]:
# RNN
rnn = nn.RNN(input_size=4, hidden_size=8, num_layers=1)
output3, hidden3 = rnn(x3)

In [5]:
print("Output shape:", output3.shape) # 각 시점의 은닉 상태
print("Hidden shape:", hidden3.shape) # 마지막 시점의 은닉 상태

Output shape: torch.Size([3, 1, 8])
Hidden shape: torch.Size([1, 1, 8])


In [6]:
len(output3)

3

In [7]:
print(output3[0])

tensor([[-0.1107,  0.3089,  0.3718, -0.3137,  0.3310,  0.2441, -0.1495, -0.6145]],
       grad_fn=<SelectBackward0>)


In [8]:
print(output3[1])

tensor([[-0.4009,  0.8071, -0.1728,  0.0732, -0.6253,  0.4546, -0.0564, -0.5961]],
       grad_fn=<SelectBackward0>)


In [9]:
print(output3[2])

tensor([[-0.1552,  0.4137, -0.1165, -0.3998, -0.1453,  0.6767, -0.1031, -0.5053]],
       grad_fn=<SelectBackward0>)


In [11]:
print(hidden3[0])

tensor([[-0.1552,  0.4137, -0.1165, -0.3998, -0.1453,  0.6767, -0.1031, -0.5053]],
       grad_fn=<SelectBackward0>)


In [12]:
import torch 
import torch.nn as nn

In [13]:
# 입력 시퀀스: batch=1, seq_len=3, input_dim=4
x = torch.randn(3,1,4)

In [14]:
#LSTM

In [15]:
lstm = nn.LSTM(input_size=4, hidden_size=8)
output, (h_n, c_n) = lstm(x)

In [17]:
print("Output shape:", output.shape)
print("Hidden state:", h_n.shape) 
print("Cell state:", c_n.shape)

Output shape: torch.Size([3, 1, 8])
Hidden state: torch.Size([1, 1, 8])
Cell state: torch.Size([1, 1, 8])


### --- 작업 결과 확인용 출력

In [18]:
len(output)

3

In [20]:
print(output3[0])

tensor([[-0.1107,  0.3089,  0.3718, -0.3137,  0.3310,  0.2441, -0.1495, -0.6145]],
       grad_fn=<SelectBackward0>)


In [21]:
print(output3[1])

tensor([[-0.4009,  0.8071, -0.1728,  0.0732, -0.6253,  0.4546, -0.0564, -0.5961]],
       grad_fn=<SelectBackward0>)


In [22]:
print(output3[2])

tensor([[-0.1552,  0.4137, -0.1165, -0.3998, -0.1453,  0.6767, -0.1031, -0.5053]],
       grad_fn=<SelectBackward0>)


In [23]:
print(h_n[0])

tensor([[ 1.2367e-01, -1.2442e-01, -1.2336e-02, -2.5259e-05, -7.1959e-02,
          1.7293e-01, -5.5002e-02,  8.8754e-02]], grad_fn=<SelectBackward0>)


# LSTM 기반 LM을 이용한 문장 생성

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim

(1) 문장을 토큰화한 데이터

In [26]:
#데이터
sentences = [
    ['we','like','nlp'],
    ['nlp','is','fun'],
    ['i','love','nlp'],
    ['you','like','ai']
]

(2) 단어 인덱싱

In [27]:
vocab = list(set(sum(sentences,[])))
word2idx = {w: i for i, w in enumerate(vocab)}
idx2word = {i: w for w,i in word2idx.items()}

(3) dataset을 입력X와 정답Y 시쿼스 나누기

In [28]:
def make_sequence(sent):
    input_seq = [word2idx[w] for w in sent[:-1]]
    target_seq = [word2idx[w] for w in sent[1:]]
    return torch.tensor(input_seq), torch.tensor(target_seq)

In [29]:
X, Y = zip(*[make_sequence(s) for s in sentences])

(4) LM-LSTM 모델 정의

In [38]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, emb_size, hidden_size):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_size)
        self.lstm = nn.LSTM(emb_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.emb(x)
        out, _ = self.lstm(x)
        out = self.fc(out)
        return out

(5) 모델 객체 생성, 최적화 모델 설정

In [43]:
model = LSTMModel(len(vocab), 10, 16)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

(6) 모델 학습 수행

In [44]:
for epoch in range(1000):
    total_loss = 0
    for x, y in zip(X,Y):
        optimizer.zero_grad()
        output = model(x.unsqueeze(0))
        loss = criterion(output.squeeze(0), y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {total_loss:.4f}")

Epoch 0, Loss: 8.8633
Epoch 50, Loss: 0.0427
Epoch 100, Loss: 0.0144
Epoch 150, Loss: 0.0077
Epoch 200, Loss: 0.0049
Epoch 250, Loss: 0.0034
Epoch 300, Loss: 0.0025
Epoch 350, Loss: 0.0019
Epoch 400, Loss: 0.0015
Epoch 450, Loss: 0.0012
Epoch 500, Loss: 0.0010
Epoch 550, Loss: 0.0008
Epoch 600, Loss: 0.0007
Epoch 650, Loss: 0.0006
Epoch 700, Loss: 0.0005
Epoch 750, Loss: 0.0004
Epoch 800, Loss: 0.0004
Epoch 850, Loss: 0.0003
Epoch 900, Loss: 0.0003
Epoch 950, Loss: 0.0003


(7) 문장 생성

In [46]:
# 문장 생성 작업 초기화ㅣ 시작 단어 'i'
start = torch.tensor([[word2idx['i']]])
generated = ['i']
hidden = None

In [47]:
# 다음 단어 예측 5회 반복 작업을 통해 문장 생성하기
for _ in range(5):
    output = model(start)
    next_word = idx2word[output.argmax(2)[:,-1].item()]
    generated.append(next_word)
    start = torch.tensor([[word2idx[next_word]]])

In [49]:
# 결과 출력
print("Generated:", ' '.join(generated))

Generated: i love is fun like ai
