In [1]:
import sys
import numpy as np
import itertools
import torch

print("Python version:", sys.version)
print('torch', torch.__version__)
print('numpy', np.__version__)

# from torch.autograd import Variable (torch.Tensor로 통합됨)

import time
import math
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from name_dataset import NameDataset #py파일 (names_train.csv 처리)

Python version: 3.9.7 (default, Sep 16 2021, 16:59:28) [MSC v.1916 64 bit (AMD64)]
torch 2.2.0+cpu
numpy 1.19.5


# classification basic

In [2]:
HIDDEN_SIZE = 100
N_CHARS = 128
N_CLASSES = 18

In [9]:
class RNNClassifier(torch.nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = torch.nn.Embedding(input_size, hidden_size)
        self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        
    def forward(self, input):
        
        batch_size = input.size(0) # input = B x S . size(0) = B
        
        input = input.t()  # input:  B x S  -- (transpose) --> S x B
        
        print(" input", input.size())
        embedded = self.embedding(input)
        print(" embedding", embedded.size())
        
        #hidden = self._init_hidden(batch_size)
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        
        output, hidden = self.gru(embedded, hidden)
        print(" gru hidden output", hidden.size())
        
        fc_output = self.fc(hidden)
        print(" fc output", fc_output.size())
        return fc_output
    
    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        return hidden

In [10]:
def str2ascii_arr(msg):
    arr = [ord(c) for c in msg]
    return arr, len(arr)

def pad_sequences(vectorized_seqs, seq_lengths):
    seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()
    for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
    return seq_tensor

def make_variables(names):
    sequence_and_length = [str2ascii_arr(name) for name in names]
    vectorized_seqs = [sl[0] for sl in sequence_and_length]
    seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length])
    return pad_sequences(vectorized_seqs, seq_lengths)

In [14]:
if __name__ == '__main__':
    names = ['adylov', 'solan', 'hard', 'san']
    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_CLASSES)
    
    for name in names:
        arr, _ = str2ascii_arr(name)
        inp = torch.LongTensor([arr])
        out = classifier(inp)
        print("in", inp.size(), "out", out.size())
        
    inputs = make_variables(names)
    out = classifier(inputs)
    print('batch in', inputs.size(), 'batch out', out.size())

 input torch.Size([6, 1])
 embedding torch.Size([6, 1, 100])
 gru hidden output torch.Size([1, 1, 100])
 fc output torch.Size([1, 1, 18])
in torch.Size([1, 6]) out torch.Size([1, 1, 18])
 input torch.Size([5, 1])
 embedding torch.Size([5, 1, 100])
 gru hidden output torch.Size([1, 1, 100])
 fc output torch.Size([1, 1, 18])
in torch.Size([1, 5]) out torch.Size([1, 1, 18])
 input torch.Size([4, 1])
 embedding torch.Size([4, 1, 100])
 gru hidden output torch.Size([1, 1, 100])
 fc output torch.Size([1, 1, 18])
in torch.Size([1, 4]) out torch.Size([1, 1, 18])
 input torch.Size([3, 1])
 embedding torch.Size([3, 1, 100])
 gru hidden output torch.Size([1, 1, 100])
 fc output torch.Size([1, 1, 18])
in torch.Size([1, 3]) out torch.Size([1, 1, 18])
 input torch.Size([6, 4])
 embedding torch.Size([6, 4, 100])
 gru hidden output torch.Size([1, 4, 100])
 fc output torch.Size([1, 4, 18])
batch in torch.Size([4, 6]) batch out torch.Size([1, 4, 18])


# rnn_classification

In [12]:
HIDDEN_SIZE = 100
N_LAYERS = 2
BATCH_SIZE = 256
N_EPOCHS = 100

test_dataset = NameDataset(is_train_set=False)
test_loader = DataLoader(dataset=test_dataset,
                        batch_size = BATCH_SIZE, shuffle=True)

train_dataset = NameDataset(is_train_set=True)
train_loader = DataLoader(dataset=train_dataset,
                        batch_size = BATCH_SIZE, shuffle=True)

N_COUNTRIES = len(train_dataset.get_countries())
print(N_COUNTRIES, "countries")
N_CHARS = 128

18 countries


In [43]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m,s)

def creative_variable(tensor):
    if torch.cuda.is_available():
        return tensor.cuda()
    else:
        return tensor
        
def pad_sequences(vectorized_seqs, seq_lengths, countries):
    seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()
    for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
        
    seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
    seq_tensor = seq_tensor[perm_idx]
    
    target = countries2tensor(countries)
    if len(countries):
        target = target[perm_idx]
        
    return creative_variable(seq_tensor), creative_variable(seq_lengths), creative_variable(target)

def make_variables(names, countries):
    sequence_and_length = [str2ascii_arr(name) for name in names]
    vectorized_seqs = [sl[0] for sl in sequence_and_length]
    seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length])
    return pad_sequences(vectorized_seqs, seq_lengths, countries)

def str2ascii_arr(msg):
    arr = [ord(c) for c in msg]
    return arr, len(arr)

def countries2tensor(countries):
    country_ids = [train_dataset.get_country_id(country) 
                   for country in countries]
    return torch.LongTensor(country_ids)

In [44]:
class RNNClassifier(torch.nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size,
                n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = int(bidirectional) + 1
        
        self.embedding = torch.nn.Embedding(input_size, hidden_size)
        self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers,
                               bidirectional=bidirectional)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        
    def forward(self, input, seq_lengths):
        input = input.t()
        batch_size = input.size(1)
        
        hidden = self._init_hidden(batch_size)
        
        embedded = self.embedding(input)
        
        gru_input = pack_padded_sequence(
                    embedded, seq_lengths.data.cpu().numpy())
        
        self.gru.flatten_parameters()
        output, hidden = self.gru(gru_input, hidden)
        
        fc_output = self.fc(hidden[-1])
        return fc_output
    
    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions,
                            batch_size, self.hidden_size)
        return creative_variable(hidden)

In [59]:
def train():
    total_loss = 0
    
    for ii, (names, countries) in enumerate(train_loader, 1):
        input, seq_lengths, target = make_variables(names, countries)
        output = classifier(input, seq_lengths)
        
        loss = criterion(output, target)
        total_loss += loss.item() #loss.data[0]
        
        classifier.zero_grad()
        loss.backward()
        optimizer.step()
        
        if ii % 10 == 0:
            print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format(
                time_since(start), epoch,  ii *
                len(names), len(train_loader.dataset),
                100. * ii * len(names) / len(train_loader.dataset),
                total_loss / ii * len(names)))
            
    return total_loss

In [68]:
def test(name=None):
    if name:
        input, seq_lengths, target = make_variables([name], [])
        output = classifier(input, seq_lengths)
        pred = output.data.max(1, keepdim=True)[1]
        country_id = pred.cpu().numpy()[0][0]
        print(name, "is", train_dataset.get_country(country_id))
        return
    
    print('evaluating trained model ...')
    correct = 0
    train_data_size = len(test_loader.dataset)
    
    for names, countries in test_loader:
        input, seq_lengths, target = make_variables(names, countries)
        output = classifier(input, seq_lengths)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, train_data_size, 100. * correct / train_data_size))

In [None]:
if __name__ == '__main__':
    
    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRIES, N_LAYERS)
    if torch.cuda.device_count() > 1:
        print("let's use", torch.cuda.device_count(), "GPUs!")
        classifier = torch.nn.DataParallel(classifier)
    
    if torch.cuda.is_available():
        classifier.cuda()
        
    optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
    criterion = torch.nn.CrossEntropyLoss()
    
    start = time.time()
    print("Training for %d epoch..." % N_EPOCHS)
    for epoch in range(1, N_EPOCHS + 1):
        train()
        
        test()
        
        # Testing several samples
        test("Sung")
        test("Jungwoo")
        test("Soojin")
        test("Nako")

Training for 100 epoch...
evaluating trained model ...

Test set: Accuracy: 4173/6700 (62%)

Sung is Arabic
Jungwoo is Russian
Soojin is Russian
Nako is Russian
evaluating trained model ...

Test set: Accuracy: 4652/6700 (69%)

Sung is Arabic
Jungwoo is Russian
Soojin is Russian
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 4993/6700 (75%)

Sung is Chinese
Jungwoo is Russian
Soojin is Russian
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5164/6700 (77%)

Sung is Chinese
Jungwoo is Chinese
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5303/6700 (79%)

Sung is Chinese
Jungwoo is Chinese
Soojin is Russian
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5417/6700 (81%)

Sung is Chinese
Jungwoo is Chinese
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5438/6700 (81%)

Sung is Chinese
Jungwoo is Chinese
Soojin is Russian
Nako is Japanese
evaluating trained mod

evaluating trained model ...

Test set: Accuracy: 5619/6700 (84%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5605/6700 (84%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5597/6700 (84%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5588/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5606/6700 (84%)

Sung is Chinese
Jungwoo is Russian
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5578/6700 (83%)

Sung is Chinese
Jungwoo is Chinese
Soojin is English
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5575/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 560

evaluating trained model ...

Test set: Accuracy: 5553/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5538/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5514/6700 (82%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5564/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5558/6700 (83%)

Sung is Chinese
Jungwoo is German
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5556/6700 (83%)

Sung is Chinese
Jungwoo is German
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5558/6700 (83%)

Sung is Chinese
Jungwoo is Chinese
Soojin is Irish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5568/67

evaluating trained model ...

Test set: Accuracy: 5563/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5553/6700 (83%)

Sung is Chinese
Jungwoo is German
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5492/6700 (82%)

Sung is Chinese
Jungwoo is German
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5548/6700 (83%)

Sung is Chinese
Jungwoo is German
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5542/6700 (83%)

Sung is Chinese
Jungwoo is German
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5579/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5587/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5595/6700

# char rnn

In [109]:
from text_loader import TextDataset

hidden_size = 128
n_layers = 3
batch_size = 3*64
n_epochs = 100
n_characters  = 128 #ASCII

In [110]:
class RNN(torch.nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.embedding = torch.nn.Embedding(input_size, hidden_size)
        self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers)
        self.linear = torch.nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden):
        embed = self.embedding(input.view(1,-1)) # S(=1) x I
        embed = embed.view(1,1,-1) # S(=1) x B(=1) x I (embedding size)
        output, hidden = self.gru(embed, hidden)
        output = self.linear(output.view(1, -1)) # S(=1) x I
        return output, hidden
    
    def init_hidden(self):
        if torch.cuda.is_available():
            hidden = torch.zeros(self.n_layers, 1, self.hidden_size).cuda()
        else:
            hidden = torch.zeros(self.n_layers, 1, self.hidden_size)
        return hidden

In [111]:
def str2tensor(string):
    tensor = [ord(c) for c in string]
    tensor = torch.LongTensor(tensor)

    if torch.cuda.is_available():
        tensor = tensor.cuda()
    
    return tensor

def generate(decoder, prime_str = 'A', predict_len = 100, temperature = 0.8):
    hidden = decoder.init_hidden()
    prime_input = str2tensor(prime_str)
    predicted = prime_str
    
    for pp in range(len(prime_str) -1):
        _, hidden = decoder(prime_input[pp], hidden)
    
    inp = prime_input[-1]
    
    for pp in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        predicted_char = chr(top_i)
        predicted += predicted_char
        inp = str2tensor(predicted_char)
    
    return predicted

def train_teacher_forching(line):
    input = str2tensor(line[:-1])
    target = str2tensor(line[1:])

    hidden = decoder.init_hidden()
    loss = 0

    for cc in range(len(input)):
        output, hidden = decoder(input[cc], hidden)
        loss += criterion(output, target[cc])

    decoder.zero_grad()
    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / len(input)

In [112]:
def train(line):
    input = str2tensor(line[:-1])
    target = str2tensor(line[1:])

    hidden = decoder.init_hidden()
    decoder_in = input[0]
    loss = 0

    for cc in range(len(input)):
        output, hidden = decoder(decoder_in, hidden)
        
        # 모델의 출력에서 가장 높은 확률을 가진 문자의 인덱스로 변환
        # torch.argmax 함수를 사용하여 가장 높은 값의 인덱스를 가져옴
        predicted_char_index = torch.argmax(output)
        
        # 타겟 데이터는 정수 형태이므로, torch.tensor 형태로 변환
        target_char_index = torch.tensor(target[cc])
        
        # 손실을 계산할 때 모델의 출력과 타겟 데이터의 크기가 일치해야 함
        # 따라서 모델의 출력과 타겟 데이터를 각각 인덱스로 비교하여 손실을 계산
        loss += criterion(output.view(1, -1), target_char_index.view(1))
        
        # 다음 입력으로 사용할 디코더의 입력 설정
        decoder_in = target_char_index
    
    decoder.zero_grad()
    loss.backward()
    decoder_optimizer.step()
    
    return loss.item() / len(input)  # loss.item()을 사용하여 손실 값을 가져옴
        
# 기존 코드 중에서 size 안 맞아서 안 되는 부분 수정 전 원본.
#         #loss += criterion(output, target[c])
#         decoder_in = output.max(1)[1]

#     decoder.zero_grad()
#     loss.backward()
#     decoder_optimizer.step()

#     return loss.data[0] / len(input)

In [113]:
if __name__ == '__main__':

    decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
    if torch.cuda.is_available():
        decoder.cuda()

    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=0.001)
    criterion = torch.nn.CrossEntropyLoss()

    train_loader = DataLoader(dataset=TextDataset(),
                              batch_size=batch_size,
                              shuffle=True)

    print("Training for %d epochs..." % n_epochs)
    for epoch in range(1, n_epochs + 1):
        for i, (lines, _) in enumerate(train_loader):
            loss = train(lines[0])  # Batch size is 1

            if i % 100 == 0:
                print('[(%d %d%%) loss: %.4f]' %
                      (epoch, epoch / n_epochs * 100, loss))
                print(generate(decoder, 'Wh', 100), '\n')

Training for 100 epochs...
[(1 1%) loss: 4.8559]


  target_char_index = torch.tensor(target[cc])


Wh3t; Dts:	/
a5$j0o3swqZ!qF}CPez
vY)nszi%cQDV p)jz@Rl/S!&|RbbY(Omfn>z3~!7)cjO;^0)P*u8 

[(1 1%) loss: 2.8737]
Whtmhdsranirostritescsaseerwysirosraswteaa'enrdg,ss,raorieeeedarryaleosmsoahsoeemwdddeathlanhvreeatdta 

[(2 2%) loss: 3.0422]
Whisiiaottehderh-wynatnennnosentaotsosaesaithsorayneeyere.aidslreatiaedt'blshvceoomhsetalmaeerhelau:t: 

[(2 2%) loss: 3.3414]
Wheroawineatroecrawerteowomebnler-msfrtodo,urhltosrkenskeesmenmotorthe:t!iemrryetoslnrelettiausnastsod 

[(3 3%) loss: 2.8198]
Whimbarvesledsesyyhetmrothouisvayononhearesenmobris,oterthhbnesrpavtafaans!thylepethathrebetothirowhem 

[(3 3%) loss: 2.7395]
Whmatheondyaonlamhendlestyouslendetedananther,entendethindlenwhelvedstufbesthenthondnttofredenc,plalsi 

[(4 4%) loss: 2.9374]
Whartnesewi;yo!imcoreawfemeveleirselbeounand.nedaa:tenwedeshen'teneundthastheun,othee;enereshenelthocr 

[(4 4%) loss: 2.7550]
Wheelsizeorsenitinanthwenogarperethearbalgarnyiyhenesadeswindalbsousthalnam.esdoinandnouswindomouc,ege 

[(5 5%

[(33 33%) loss: 2.0404]
Whousinsayforthedeathereavephere,norsedwouldbetsbeing,ishallselfyourand--hardonotlords,letter,iray,asi 

[(33 33%) loss: 2.1928]
Whisslord;butiganditthemberescontedletistoreashiveanddidone,itforthemanased.mymadysayaftisqueenemi,lig 

[(34 34%) loss: 2.2879]
Whenrytoandclageoflordareswoundasidonder:butyeswarceishetrucioussother,pindyes,doded'sdeartthemasterce 

[(34 34%) loss: 0.7664]
Wheruswerethestofelfandtheking,donesgraagath,theysockandinmy,andyoumindmage,butinowtobro!through,kingl 

[(35 35%) loss: 2.2551]
Whuswannedmither!werevenceloughsomeshoresome,makeroughtitheram,withyourand?ieredosed,thenandthoughstoh 

[(35 35%) loss: 2.2611]
Whereadusconderselfhingpidssinourofourgengtothat,---xeaitemsofherition,aptersedgeathandallaccess,tooth 

[(36 36%) loss: 1.7744]
Whowhislord;andtheyouway,wherebywelceswellatandtheryoursomeohouldschold,wither;andthe,hownottheeshi'll 

[(36 36%) loss: 1.7294]
Whathershallyoucoun'd,ifmystrangatensofmysone?alingeidandhathereadstriver

[(65 65%) loss: 2.5215]
Whatsweethisbeetchhim.thedrow.giseyoucome.sir?come,criendcallyeadthegod?witharinius:andhe'llofgrock'df 

[(65 65%) loss: 2.2371]
Whowwelltoputityou,doyouwouldthearetheirfriends;i'llasedsweertthisson?thisso.thetatter'sthee,thechildd 

[(66 66%) loss: 1.7363]
Wherestedwardapjess,withbepursesparts,withhereath,ipropenkingrichardredonowmadefather.whattheseparitit 

[(66 66%) loss: 2.0808]
Wherkindied:henries'tisasormysir!tohewarculeturneadsmothreath.themile--sim,andtheir,whatisintheirsepli 

[(67 67%) loss: 2.5443]
Wheboonshethankthebrotherenrustproveshe'shalladvershespeak,suchbe'smer'dthishandsttheel.themse,thereav 

[(67 67%) loss: 2.2199]
Whethatimerhoisethatdome-astheircom'dedward,itbeing.corao'aboutthouhadding.butimerknight,hedaure,strac 

[(68 68%) loss: 1.5623]
Whygravesterssedwithmensenent.givencersofmiserendevershalltheirstandfirsts.i'llknow,isavestathtodidedd 

[(68 68%) loss: 2.5398]
Whewereisethingsosuchardtoo:thisnowshethebeargreat.wouldgrenpriciledsttob

[(97 97%) loss: 2.4432]
Whatharingofcoriolanus:andyourshichheirad,thenrypart,mymoptay'smoalwhat,yousunconteintiltofleastall,th 

[(97 97%) loss: 1.6251]
Whownownoogeche,thathisplige.howthematilyofthissofyork:hownownownandgragion?whom.now,thatvicthentothem 

[(98 98%) loss: 2.3944]
Whiss,ifthemadashallhermhowthesrunsedamanstthenandherast,thathavethenanourformine-profoutisthyme.andfl 

[(98 98%) loss: 2.2321]
Whespitionelustopformorivistain.andtoherablehisseyetwell.iamforce:her'swould!killbeingbrokesoursthrour 

[(99 99%) loss: 1.7998]
Whouson,eed;yoursedbyandewest,sinderthyorthere:yourhomeconthere--god;asyouarewontentnool:made?forthere 

[(99 99%) loss: 2.2056]
Wherearther,likeinshearttomakeintheheart.heiheandmylibe,orhaveourhouseoffrincentio.ashairyoushallheeda 

[(100 100%) loss: 2.0953]
Whousettoourinperispartthatfore.hewould;thatapassforgoodforchome.inmine;thatbegone?withallwhosit,ors'd 

[(100 100%) loss: 2.0964]
Whavenovenforgodhouse.soreyeto'toppetruchio?hewouldbehapendamandenish

# pack_pad

In [89]:
import torch.nn.functional as F

def flatten(l):
    return list(itertools.chain.from_iterable(l))

In [90]:
seqs = ['ghatmasala', 'nicela', 'chutpakodas']

vocab = ['<pad>'] + sorted(list(set(flatten(seqs))))

embedding_size = 3

embed = torch.nn.Embedding(len(vocab), embedding_size)
lstm = torch.nn.LSTM(embedding_size, 5)

In [91]:
vectorized_seqs = [[vocab.index(tok) for tok in seq] for seq in seqs]
print('vectorized seqs', vectorized_seqs)

print([x for x in map(len, vectorized_seqs)])

seq_lengths = torch.LongTensor([x for x in map(len, vectorized_seqs)])

vectorized seqs [[5, 6, 1, 15, 10, 1, 14, 1, 9, 1], [11, 7, 2, 4, 9, 1], [2, 6, 16, 15, 13, 1, 8, 12, 3, 1, 14]]
[10, 6, 11]


In [92]:
# dump padding everywhere, and place seqs on the left.
# NOTE: you only need a tensor as big as your longest sequence

seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()

for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
    seq_tensor[idx, :seqlen] = torch.LongTensor(seq)

print("seq_tensor", seq_tensor)

seq_tensor tensor([[ 5,  6,  1, 15, 10,  1, 14,  1,  9,  1,  0],
        [11,  7,  2,  4,  9,  1,  0,  0,  0,  0,  0],
        [ 2,  6, 16, 15, 13,  1,  8, 12,  3,  1, 14]])


In [93]:
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]

print("seq_tensor after sorting", seq_tensor)

seq_tensor = seq_tensor.transpose(0, 1)  # (B,L,D) -> (L,B,D)
print("seq_tensor after transporting", seq_tensor.size(), seq_tensor.data)

seq_tensor after sorting tensor([[ 2,  6, 16, 15, 13,  1,  8, 12,  3,  1, 14],
        [ 5,  6,  1, 15, 10,  1, 14,  1,  9,  1,  0],
        [11,  7,  2,  4,  9,  1,  0,  0,  0,  0,  0]])
seq_tensor after transporting torch.Size([11, 3]) tensor([[ 2,  5, 11],
        [ 6,  6,  7],
        [16,  1,  2],
        [15, 15,  4],
        [13, 10,  9],
        [ 1,  1,  1],
        [ 8, 14,  0],
        [12,  1,  0],
        [ 3,  9,  0],
        [ 1,  1,  0],
        [14,  0,  0]])


In [94]:
embeded_seq_tensor = embed(seq_tensor)
print("seq_tensor after embedding",embeded_seq_tensor.size(), embeded_seq_tensor.data )

seq_tensor after embedding torch.Size([11, 3, 3]) tensor([[[-0.6058, -1.2204,  0.4031],
         [ 0.3229, -1.0837,  0.9217],
         [ 0.4397,  0.3536, -0.3924]],

        [[ 0.6011,  0.6126, -0.0748],
         [ 0.6011,  0.6126, -0.0748],
         [ 0.8720,  1.5821,  0.7780]],

        [[-1.1258, -0.3440, -0.1232],
         [ 1.0625,  0.4981, -1.5001],
         [-0.6058, -1.2204,  0.4031]],

        [[ 1.5140,  0.9574,  0.9148],
         [ 1.5140,  0.9574,  0.9148],
         [ 0.3444,  1.0493,  1.7402]],

        [[-2.9877,  0.2516,  0.3182],
         [ 0.5330,  0.5235,  1.0946],
         [-0.8676,  0.2975, -0.6976]],

        [[ 1.0625,  0.4981, -1.5001],
         [ 1.0625,  0.4981, -1.5001],
         [ 1.0625,  0.4981, -1.5001]],

        [[-2.1000,  0.9419,  1.6039],
         [ 0.1195, -0.3871,  0.6173],
         [-0.8361,  0.8061,  0.5474]],

        [[-0.0511, -0.2240,  0.9062],
         [ 1.0625,  0.4981, -1.5001],
         [-0.8361,  0.8061,  0.5474]],

        [[ 0.6097, -0.

In [96]:
packed_input = pack_padded_sequence(embeded_seq_tensor,
                                   seq_lengths.cpu().numpy())

packed_output, (ht, ct) = lstm(packed_input)

output, _ = pad_packed_sequence(packed_output)
print('lstm output', output.size(), output.data)

lstm output torch.Size([11, 3, 5]) tensor([[[-0.0147,  0.1987,  0.0190,  0.1467, -0.1479],
         [-0.0806,  0.1208, -0.0613,  0.0827, -0.1047],
         [-0.0882, -0.0659,  0.0325,  0.0088,  0.0346]],

        [[-0.0968,  0.0414,  0.0229,  0.0077, -0.0263],
         [-0.1508,  0.0109, -0.0024, -0.0138, -0.0017],
         [-0.1468, -0.0880, -0.0033, -0.0987,  0.0280]],

        [[-0.0292,  0.2070,  0.0846,  0.0966, -0.0941],
         [-0.2447, -0.1290,  0.0407,  0.0186,  0.0506],
         [-0.0873,  0.1538,  0.0093,  0.0602, -0.0864]],

        [[-0.1720,  0.0318, -0.0142, -0.0554, -0.0166],
         [-0.2623, -0.1253, -0.0321, -0.0968,  0.0479],
         [-0.0887,  0.1855, -0.0321, -0.0992, -0.0891]],

        [[-0.0328,  0.4967,  0.0819,  0.0512, -0.2463],
         [-0.2062, -0.0206, -0.0431, -0.1191,  0.0267],
         [-0.0388,  0.1905,  0.0595,  0.0092, -0.0239]],

        [[-0.1609,  0.1150,  0.1299,  0.0099, -0.0183],
         [-0.2868, -0.1463,  0.0194,  0.0043,  0.0608],
   

In [97]:
print('last output', ht[-1].size(), ht[-1].data)

last output torch.Size([3, 5]) tensor([[-0.1644,  0.0729, -0.0139,  0.0368, -0.0131],
        [-0.2691, -0.2024,  0.0692,  0.0655,  0.0645],
        [-0.1756, -0.0341,  0.0702,  0.0128,  0.0398]])
