In [1]:
import torch
import string
from torch import nn, optim
from torch.utils.data import (Dataset, DataLoader, TensorDataset)
import tqdm
from statistics import mean

In [2]:
all_chars = string.printable
vocab_size = len(all_chars)
vocab_dict = dict((c, i) for (i, c) in enumerate(all_chars))

def str2ints(s, vocab_dict):
    return [vocab_dict[c] for c in s]

def ints2str(x, vocab_array):
    return "".join([vocab_array[i] for i in x])

In [3]:
class ShakespearDataset(Dataset):
    def __init__(self, path, chunk_size=200):
        # 수치 리스트 변환
        data = str2ints(open(path).read().strip(), vocab_dict)
        # size split
        data = torch.tensor(data, dtype=torch.int64).split(chunk_size)
        if len(data[-1]) < chunk_size:
            data = data[:-1]
        self.data = data
        self.n_chunks = len(self.data)
    
    def __len__(self):
        return self.n_chunks
    
    def __getitem__(self, idx):
        return self.data[idx]

In [4]:
ds = ShakespearDataset("d:/data/tinyshakespeare.txt", chunk_size=200)
loader = DataLoader(ds, batch_size=32, shuffle=True, num_workers=0)

In [5]:
class SequenceGenerationNet(nn.Module):
    def __init__(self, num_embeddings, embedding_dim=50, hidden_size=50, num_layers=1, dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, num_embeddings)
    
    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = self.linear(x)
        return x, h

In [10]:
def generated_seq(net, start_phrase="The King said ", length=200, temperature=0.8, device='cuda:0'):
    net.eval()  # 평가모드
    results = []
    start_tensor = torch.tensor(str2ints(start_phrase, vocab_dict), dtype=torch.int64).to(device)
    x0 = start_tensor.unsqueeze(0)
    o, h = net(x0)
    out_dist = o[:, -1].view(-1).exp()
    top_i = torch.multinomial(out_dist, 1)[0]
    results.append(top_i)
    
    for i in range(length):
        inp = torch.tensor([[top_i]],dtype=torch.int64)
        inp = inp.to(device)
        o, h = net(inp, h)
        out_dist = o.view(-1).exp()
        top_i = torch.multinomial(out_dist, 1)[0]
        results.append(top_i)
    
    return start_phrase + ints2str(results, all_chars)
    

In [11]:
net = SequenceGenerationNet(vocab_size, embedding_dim=20, hidden_size=50, num_layers=2, dropout=0.1)
net.to("cuda:0")
opt = optim.Adam(net.parameters())
loss_f = nn.CrossEntropyLoss()

for epoch in range(50):
    net.train()
    losses = []
    for data in tqdm.tqdm(loader):
        x = data[:, :-1]
        y = data[:, 1:]
        
        x = x.to("cuda:0")
        y = y.to("cuda:0")
        
        y_pred, _ = net(x)
        loss = loss_f(y_pred.view(-1, vocab_size), y.view(-1))
        net.zero_grad()
        loss.backward()
        opt.step()
        losses.append(loss.item())    
    print(epoch, mean(losses))
    with torch.no_grad():
        print(generated_seq(net, device="cuda:0"))

100%|████████████████████████████████████████████████████████████████████████████████| 175/175 [00:08<00:00, 20.80it/s]


0 3.4457021808624266


NameError: name 'result' is not defined