### 문장생성 구현

In [90]:
# coding: utf-8
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from ch06.rnnlm import Rnnlm
from ch06.better_rnnlm import BetterRnnlm


class MyRnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
#             print('score=', score)
#             print('score.shape=', score.shape)
            p = softmax(score.flatten())
#             print('p=',p)
#             print('len(p)=', len(p))
#             print('np.argmax(p)=', np.argmax(p))
#             print(id_to_word[np.argmax(p)])
#             break

            sampled = np.random.choice(len(p), size=1, p=p)
#             print(type(sampled))
#             sampled = np.array(np.argmax(p)).reshape(1)
#             print('sampled=',id_to_word[sampled[0]])
#             break

            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c

    def set_state(self, state):
        self.lstm_layer.set_state(*state)


class BetterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x).flatten()
            p = softmax(score).flatten()

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        states = []
        for layer in self.lstm_layers:
            states.append((layer.h, layer.c))
        return states

    def set_state(self, states):
        for layer, state in zip(self.lstm_layers, states):
            layer.set_state(*state)

### 문장생성을 위한 코드

In [93]:
# coding: utf-8
import sys
sys.path.append('..')
from rnnlm_gen import RnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = MyRnnlmGen()
model.load_params('../ch06/Rnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids, sample_size=100)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you vice about exist damaging.
 mr. cray its bread which broadcast on their cells month detailed theoretical opening of next order of new novel of new recently of functions of this news of u.s. bid has and other more of the refund and securities.
 they when funds five routine vice said distinctive before the total of rudolph properties to j.p. to virgin.
 for its should of headaches deals of october that mr. imports of new refund funds for were distribution products yield not for the department of house of the seeking time said the touted said it


### 더 좋은 문장으로

In [94]:
# coding: utf-8
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)


model = BetterRnnlmGen()
model.load_params('../ch06/BetterRnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)


model.reset_state()

start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)

word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt)


you reject the sec on the plan he said.
 richard who met out regularly in his prison positions on the california board contained after june the wall street journal 's magazine but who election think it would act in the stand.
 for the nine months the broker-dealer chain dropped to about six billion yen.
 at&t plans parts to report overtime which can be undertaken soon to comply with the problem.
 short interest margins in certain areas is likely to increase management.
 troubled directors have contracted the top a management adviser have and canceled takeover pricing
--------------------------------------------------
the meaning of life is high of more than before the senators take higher rates than it also was tied to the statement.
 the letter of the u.s. appeals court that is cleared public and airlines should german growing holders.
 the private copyright safety 's role of a major government issue has been plans to increase as the first draw on drugs without rewarding more wary and