In [31]:
import numpy as np
from layer import Rnnlm
from util import softmax

class RnnlmGen(Rnnlm):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        super().__init__(vocab_size, embedding_size, hidden_size)

    def generate(self, start_id, skip_ids, sample_size):
        input = start_id
        word_ids = [start_id]

        while len(word_ids) < sample_size:
            input = np.array(input, dtype=np.int32).reshape(1, 1)  # ⬅️ 정수형 유지
            logit = self.predict(input).reshape(-1)
            prob = softmax(logit)

            sample = np.random.choice(len(prob), size=1, p=prob)[0]
            if sample in skip_ids:
                continue
            word_ids.append(sample)
            input = sample

        return word_ids


In [32]:
from dataset import ptb

corpus, word_to_id, id_to_word = ptb.load_data('train')

vocab_size = len(word_to_id)
wordvec_size = 100
hidden_size = 100

model = RnnlmGen(vocab_size, wordvec_size, hidden_size)
#model.load_params('rnnlm_params.npy')

start_id = word_to_id['you']
skip_word = ['N', '<unk>', '$']
skip_ids = []
for word in skip_word:
    skip_ids.append(word_to_id[word])
skip_ids

generated_word_ids = model.generate(start_id, skip_ids, sample_size=30)
generated_words = []
for id in generated_word_ids:
    generated_words.append(id_to_word[id])
    
generated_text = ' '.join(generated_words)
generated_text

'you claimed iowa issues coupled lawrence boosted predicted private roth shocks loath surveys depended contributed offerings defense statements prentice yearly compromise recruit no. particular mccall unix split goods tesoro crackdown'

In [None]:
from layer import TimeEmbedding, TimeLSTM

class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        self.V = vocab_size
        self.D = wordvec_size
        self.H = hidden_size
        
        self.params = []
        self.grads = []
        
        self.W_embed = (0.01 * np.random.randn(self.V, self.D)).astype('float32')
        self.W_x_lstm = ((1 / np.sqrt(self.D)) * np.random.randn(self.D, 4*self.H)).astype('float32')
        self.W_h_lstm = ((1 / np.sqrt(self.H)) * np.random.randn(self.H, 4*self.H)).astype('float32')
        self.b_lstm = np.zeros(4*self.H).astype('float32')
        
        self.layers = []
        self.layers.append(TimeEmbedding(self.W_embed))
        self.layers.append(TimeLSTM(self.W_x_lstm, self.W_h_lstm, self.b_lstm, stateful=False))
        
        for layer in self.layers:
            for param, grad in zip(layer.params, layer.grads):
                self.params.append(param)
                self.grads.append(grad)
    
    
    def forward(self, xs):
        out = xs
        for layer in self.layers:
            out = layer.forward(out)
        
        self.hs = out
        return out[:, -1, :]
    
    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh
        dout = dhs
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

array([[ 2.37227930e-03, -2.24177679e-03,  4.72477404e-03,
         4.15270799e-04, -4.68666200e-04,  6.59893500e-04,
        -5.18815359e-04,  9.47673107e-05, -5.87560062e-04,
        -1.13624008e-03, -8.60574364e-04,  6.00228086e-05,
         4.11789119e-03,  6.51015376e-04,  8.93977587e-04,
         2.75384198e-04, -1.43559824e-03, -8.36388965e-04,
        -2.60920357e-03, -5.47516393e-03,  4.67574631e-04,
        -2.98718689e-03,  2.61759432e-03,  2.03606859e-03,
        -2.39678426e-03, -1.37167948e-03,  3.24085180e-04,
         1.27934525e-03, -6.81686797e-04,  4.73596109e-03,
         6.78029435e-04, -1.00032950e-03,  2.08116276e-03,
         1.49233371e-03, -6.59230805e-04,  2.48755794e-03,
         3.20925144e-03, -1.36918097e-03, -2.72739453e-05,
         2.93381920e-04, -1.87923515e-03,  1.01112784e-03,
         1.48276135e-03,  1.29908673e-03,  1.64771895e-03,
         2.72686360e-03, -2.25090585e-03,  2.96247890e-04,
        -1.49486194e-04,  2.53493758e-03,  1.21083576e-0

In [3]:
from layer import TimeEmbedding, TimeLSTM, TimeAffine

class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        self.V = vocab_size
        self.D = wordvec_size
        self.H = hidden_size
        
        self.params = []
        self.grads = []
        
        self.W_embed = (0.01 * np.random.randn(self.V, self.D)).astype('float32')
        self.W_x_lstm = ((1 / np.sqrt(self.D)) * np.random.randn(self.D, 4*self.H)).astype('float32')
        self.W_h_lstm = ((1 / np.sqrt(self.H)) * np.random.randn(self.H, 4*self.H)).astype('float32')
        self.b_lstm = np.zeros(4*self.H).astype('float32')
        self.W_affine = ((1 / np.sqrt(self.H)) * np.random.randn(self.H, self.V)).astype('float32')
        self.b_affine = np.zeros(self.V).astype('float32')
        
        self.layers = []
        self.layers.append(TimeEmbedding(self.W_embed))
        self.layers.append(TimeLSTM(self.W_x_lstm, self.W_h_lstm, self.b_lstm))
        self.layers.append(TimeAffine(self.W_affine, self.b_affine))
        
        for layer in self.layers:
            for param, grad in zip(layer.params, layer.grads):
                self.params.append(param)
                self.grads.append(grad)
                
    def forward(self, xs, h):
        out = xs
        self.layers[1].h = h
        
        for layer in self.layers:
            out = layer.forward(out)
        
        self.hs = out
        return out
    
    def backward(self, dout):
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
            
        self.dh = self.layers[1].dh
        
        return dout
    
    def generate(self, h, start_id, sample_size):
        self.layers[1].h = h
        
        input = start_id
        word_ids = [start_id]

        while len(word_ids) < sample_size:
            out = np.array(input, dtype=np.int32).reshape(1, 1)
            for layer in self.layers:
                out = layer.forward(out)

            sample = int(np.argmax(out))

            word_ids.append(sample)
            input = sample

        return word_ids

In [4]:
from layer import TimeSoftmaxWithLoss

class Seq2seq:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        self.V = vocab_size
        self.D = wordvec_size
        self.H = hidden_size
        
        self.params = []
        self.grads = []
        
        self.encoder = Encoder(vocab_size, wordvec_size, hidden_size)
        self.decoder = Decoder(vocab_size, wordvec_size, hidden_size)
        self.softmax_with_loss = TimeSoftmaxWithLoss()
        
        for param, grad in zip(self.encoder.params, self.encoder.grads):
            self.params.append(param)
            self.grads.append(grad)
            
        for param, grad in zip(self.decoder.params, self.decoder.grads):
            self.params.append(param)
            self.grads.append(grad)

    def forward(self, xs, ts):
        decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:]
        h = self.encoder.forward(xs)
        logit = self.decoder.forward(decoder_xs, h)
        loss = self.softmax_with_loss.forward(logit, decoder_ts)
        
        return loss
    
    def backward(self, dout=1):
        dout = self.softmax_with_loss.backward()
        dh = self.decoder.backward(dout)
        dxs = self.encoder.backward(dh)
        
        return dxs
    
    def generate(self, xs, start_id, sample_size):
        h = self.encoder.forward(xs)
        word_ids = self.decoder.generate(h, start_id, sample_size)
        return word_ids

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from optimizer import AdaGrad
from trainer import Trainer
from dataset import sequence

(x_)

In [None]:
from layer import TimeEmbedding, TimeLSTM, TimeAffine

class PeekyDecoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        self.V = vocab_size
        self.D = wordvec_size
        self.H = hidden_size
        
        self.params = []
        self.grads = []
        
        self.W_embed = (0.01 * np.random.randn(self.V, self.D)).astype('float32')
        self.W_x_lstm = ((1 / np.sqrt(self.D + self.H)) * np.random.randn(self.D + self.H, 4*self.H)).astype('float32')
        self.W_h_lstm = ((1 / np.sqrt(self.H)) * np.random.randn(self.H, 4*self.H)).astype('float32')
        self.b_lstm = np.zeros(4*self.H).astype('float32')
        self.W_affine = ((1 / np.sqrt(self.H + self.H)) * np.random.randn(self.H + self.H, self.V)).astype('float32')
        self.b_affine = np.zeros(self.V).astype('float32')
        
        self.layers = []
        self.layers.append(TimeEmbedding(self.W_embed))
        self.layers.append(TimeLSTM(self.W_x_lstm, self.W_h_lstm, self.b_lstm))
        self.layers.append(TimeAffine(self.W_affine, self.b_affine))
        
        for layer in self.layers:
            for param, grad in zip(layer.params, layer.grads):
                self.params.append(param)
                self.grads.append(grad)
        
        self.cache = None
        
    def forward(self, xs, h):
        N, T = xs.shape
        _, H = h.shape
        
        self.layers[1].h = h
        
        out = self.layers[0].forward(xs)
        hs = np.repeat(h, T, axis=0).reshape(N, T, H)
        out = np.concatenate((hs, out), axis=2)
        
        out = self.layers[1].forward(out)
        out = np.concatenate((hs, out), axis=2)
        
        logit = self.layers[2].forward(out)
        
        self.cache = H
        self.hs = out
        
        return logit
    
    def backward(self, dout):
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
            
        self.dh = self.layers[1].dh
        
        return dout
    
    def generate(self, h, start_id, sample_size):
        self.layers[1].h = h
        
        input = start_id
        word_ids = [start_id]

        while len(word_ids) < sample_size:
            out = np.array(input, dtype=np.int32).reshape(1, 1)
            for layer in self.layers:
                out = layer.forward(out)

            sample = int(np.argmax(out))

            word_ids.append(sample)
            input = sample

        return word_ids

In [None]:
class PeekySeq2seq(Seq2seq):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        self.V = vocab_size
        self.D = wordvec_size
        self.H = hidden_size
        
        self.params = []
        self.grads = []
        
        self.encoder = Encoder(vocab_size, wordvec_size, hidden_size)
        self.decoder = PeekyDecoder(vocab_size, wordvec_size, hidden_size)
        self.softmax_with_loss = TimeSoftmaxWithLoss()
        
        for param, grad in zip(self.encoder.params, self.encoder.grads):
            self.params.append(param)
            self.grads.append(grad)
            
        for param, grad in zip(self.decoder.params, self.decoder.grads):
            self.params.append(param)
            self.grads.append(grad)