## 임베딩 함수를 이용한 LSTM, GRU 모델 구현

### Character Recurrent Neural Network
- 셰익스피어 문체를 모방하는 순환신경망 실습 코드입니다.
- Embedding 레이어 및 RNN 모델로 구성되어 있습니다.

!pip install unidecode

In [9]:
import torch
import torch.nn as nn

In [1]:
import unidecode
import string
import random
import re
import time, math

In [2]:
all_characters = string.printable
n_characters = len(all_characters)
print(all_characters)
print('num_chars = ', n_characters)

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

num_chars =  100


In [5]:
!wget https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt -P ./data

--2025-03-13 12:00:26--  https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8001::154, 2606:50c0:8003::154, 2606:50c0:8000::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8001::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘./data/input.txt’


2025-03-13 12:00:27 (2.70 MB/s) - ‘./data/input.txt’ saved [1115394/1115394]



In [6]:
file = unidecode.unidecode(open('./data/input.txt').read())
file_len = len(file)
print('file_len =', file_len)

file_len = 1115394


### Hyperparameter

In [7]:
num_epochs = 2000
print_every = 100
plot_every = 10

# chunk에 대한 설명은 아래 함수정의하면서 하겠습니다.
chunk_len = 200

hidden_size = 100
batch_size = 1
num_layers = 1
embedding_size = 70
lr = 0.002

In [8]:
def random_check():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

In [11]:
def char_tensor(string):
    tensor = torch.zeros(len(string), dtype=torch.long)
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return tensor

print(char_tensor('ABCdef'))

tensor([36, 37, 38, 13, 14, 15])


In [12]:
def random_training_set():
    chunk = random_check()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [14]:
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size

        self.encoder = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.RNN(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        out = self.encoder(input.view(1, -1))
        out, hidden = self.rnn(out, hidden)
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden
    
    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        return hidden

In [15]:
model = RNN(input_size=n_characters,
            embedding_size=embedding_size,
            hidden_size=hidden_size,
            output_size=n_characters,
            num_layers=2
            )

In [16]:
inp = char_tensor("A")
hidden = model.init_hidden()
out, hidden = model(inp, hidden)

In [18]:
# 임의의 문자(start_str)로 시작하는 길이 200짜리 모방 글을 생성하는 코드입니다.
def test():
    start_str = "b"
    inp = char_tensor(start_str)
    hidden = model.init_hidden()
    x = inp

    print(start_str,end="")
    for i in range(200):
        output,hidden = model(x,hidden)

        # 여기서 max값을 사용하지 않고 multinomial을 사용하는 이유는 만약 max 값만 쓰는 경우에
        # 생성되는 텍스트가 다 the the the the the 이런식으로 나오기 때문입니다.
        # multinomial 함수를 통해 높은 값을 가지는 문자들중에 램덤하게 다음 글자를 뽑아내는 방식으로 자연스러운 텍스트를 생성해냅니다.
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]

        print(predicted_char,end="")

        x = char_tensor(predicted_char)

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

In [19]:
for i in range(num_epochs):
    total = char_tensor(random_check())
    inp = total[:-1]
    label = total[1:]
    hidden = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len - 1):
        x = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n","="*100)


 tensor([4.6174], grad_fn=<DivBackward0>) 

mg+%!3$qAX|Wc("n^wdEj+aGH@x( o~H/JPsc)(FV,!pLxIu$5J
B)lRF=VtLfHjF~Rz'Ag&Eo/t,\3V>o0YONj3n4*]]2$4$~?;1$Ro7ScS>KC-Vt:]GB"BWcMH-%

 tensor([2.4498], grad_fn=<DivBackward0>) 

bit on for.

LUwans, ming sor the mausg, ani ofe ur waralo thare yon cor lr.
af fit and wnwoy hy hir as antes thon wisg ir thar rortre wous, cile;
ondthe or athes heer sinaring 
Tore ar mas hive fass f

 tensor([2.3921], grad_fn=<DivBackward0>) 

b, dech futhece porpy imde wrete, sont cIA:
Thy miet pith shind the hers, bage hat are iler nouest mey;
Whath mowea! wering beverisheldey hoy stall, you brome ghlest she wuree, sife, the nito morenger 

 tensor([2.0533], grad_fn=<DivBackward0>) 

be if that, wish he you not wine ar? and on to to lill deed you wousmin& trave te to moud, the my to lave thins mise my he sery the we hapings dalt o thich araty to thing onds ou and me my many,
-comin

 tensor([2.1013], grad_fn=<DivBackward0>) 

brofss.

TIIUCHOR:
Sece well miegeter,


In [31]:
class GRU(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(GRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size

        self.encoder = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        out = self.encoder(input.view(1, -1))
        out, hidden = self.gru(out, hidden)
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden
    
    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        return hidden

In [32]:
model_gru = GRU(input_size=n_characters,
            embedding_size=embedding_size,
            hidden_size=hidden_size,
            output_size=n_characters,
            num_layers=2
            )

inp = char_tensor("A")
hidden = model.init_hidden()
out, hidden = model(inp, hidden)

optimizer = torch.optim.Adam(model_gru.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

In [33]:
for i in range(num_epochs):
    total = char_tensor(random_check())
    inp = total[:-1]
    label = total[1:]
    hidden = model_gru.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len - 1):
        x = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model_gru(x, hidden)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n","="*100)


 tensor([4.5872], grad_fn=<DivBackward0>) 

by and God what;
Gold to shall persain tear, it is any what hours.

QUEEN ELIZABETH:
Is toints stainssering the was fried decomeended; by not that carition of that;
If sill no that had Tare Jomkers her

 tensor([2.3812], grad_fn=<DivBackward0>) 

bat,
This are is be sent had condect undeay?

DUKE OF YORK:
His duched a tright of starratiinstaite, for deash.

CLARAD
LEROT:
I'll me expirent the manfer a griest our sober
To may the shall him in a f

 tensor([2.2237], grad_fn=<DivBackward0>) 

bling is with
For end nase not hasts her sparitor and see little her, a kilk; and saw, and the seed, and be struer,
Yet strects, made for of come contury in for the delision,
That them but may the good

 tensor([2.2607], grad_fn=<DivBackward0>) 

by follow and for conforss,
Time that counded, to him, I am lay seeked will preist
And with sight, here a for, my life, lame?

CALT:
Which need somp? so, too them brise, what steek at stright
Than stay

 tensor([2.

In [38]:
class LSTM(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size

        self.encoder = nn.Embedding(input_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden, cell):
        out = self.encoder(input.view(1, -1))
        out, (hidden, cell) = self.lstm(out, (hidden, cell))
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden, cell
    
    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        return hidden, cell

In [39]:
model_lstm = LSTM(n_characters, embedding_size, hidden_size, n_characters, num_layers)

In [41]:
inp = char_tensor("A")
print(inp)
hidden,cell = model_lstm.init_hidden()
print(hidden.size())

out,hidden,cell = model_lstm(inp,hidden,cell)
print(out.size())

tensor([36])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [42]:
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

In [44]:
for i in range(num_epochs):
    # 랜덤한 텍스트 덩어리를 샘플링하고 이를 인덱스 텐서로 변환합니다. 
    inp,label = random_training_set()
    hidden, cell = model_lstm.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x  = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y,hidden,cell = model_lstm(x, hidden, cell)
        loss += loss_func(y,y_)

    loss.backward()
    optimizer.step()
    
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n","="*100)


 tensor([4.5843], grad_fn=<DivBackward0>) 

band you are is down,
The waty the deifenty thou truther: is crow lawt,-budes reaaat cut droise, she had!
What the parant and besistuouses; a hadging,
That was is my same and in but thee to thee life,


 tensor([2.8146], grad_fn=<DivBackward0>) 

by: where long canless
They froath a love of the kind for
Caice; sholate word, there, but sir, be crown:
If clast pease the speak, is it doo how them doth at say,
Come to be speaks countonds to ague a 

 tensor([2.2264], grad_fn=<DivBackward0>) 

bain the prisure a suffed of a hend
Theale and stay the sunss, no shall you? Carspast are; or thou care, and sore.
Do that ware if the will down they she no last the name and me
the sumby's his steet I

 tensor([2.3018], grad_fn=<DivBackward0>) 

by: whild him:
This pieful to you some, serves: came are the heart:
This as that the shall, that this not reseek'st the shall sace, to do an good me death.

Sroive;
That how cost there one, so go the s

 tensor([2.