### 문장생성 구현

In [50]:
import numpy as np
p = np.array([0.7,0.1,0.2,0,0])
a = np.random.choice(5, p=p)
print(a)

1


In [1]:
# coding: utf-8
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from ch06.rnnlm import Rnnlm
from ch06.better_rnnlm import BetterRnnlm


class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c

    def set_state(self, state):
        self.lstm_layer.set_state(*state)


class BetterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x).flatten()
            p = softmax(score).flatten()

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        states = []
        for layer in self.lstm_layers:
            states.append((layer.h, layer.c))
        return states

    def set_state(self, states):
        for layer, state in zip(self.lstm_layers, states):
            layer.set_state(*state)

### 문장생성을 위한 코드

In [57]:
# coding: utf-8
import sys
sys.path.append('..')
from rnnlm_gen import RnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()
model.load_params('../ch06/Rnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
print(start_id)
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
print(skip_ids)
# # 문장 생성
word_ids = model.generate(start_id, skip_ids)
print(word_ids, len(word_ids))
txt = ' '.join([id_to_word[i] for i in word_ids])
# print(txt)
txt = txt.replace(' <eos>', '.\n')
print(txt)

316
[27, 26, 416]
[316, 154, 556, 3487, 6487, 48, 32, 4129, 1153, 251, 2393, 1730, 3010, 678, 181, 3587, 1195, 24, 2774, 1040, 1815, 387, 812, 2131, 64, 2573, 152, 32, 7049, 48, 2129, 3142, 467, 3235, 467, 1932, 79, 80, 4082, 42, 777, 467, 246, 24, 315, 108, 1770, 318, 363, 39, 9167, 40, 3071, 64, 423, 138, 625, 4575, 3532, 24, 372, 338, 154, 5031, 108, 1050, 78, 396, 7716, 108, 366, 24, 213, 1989, 159, 6477, 24, 181, 32, 4091, 1041, 3257, 5495, 181, 35, 2319, 1142, 24, 34, 35, 425, 42, 109, 812, 5257, 32, 198, 432, 2442, 35] 100
you have their prosecution bankrupt and the consent do n't sense how helping looking for semiconductor companies.
 everyone i 'm only big rules to know on the deficit-reduction and must need or cause or provide more than advantage of credit or buildings.
 but in easy these days mr. schaeffer is allowed to go any special sharp benefits.
 declining rates have resulted in september it may proceed in august.
 those pressures are arrested.
 for the afternoon japan 

### 더 좋은 문장으로

In [60]:
# coding: utf-8
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)


model = BetterRnnlmGen()
model.load_params('../ch06/BetterRnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)


model.reset_state()

start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)

word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt)


you try to believe that the crowd provides a far more extensive near-term movie.
 it 's time that does n't wonder how in decline or if it finds each term that i 'm going to go until you were a bearish bet.
 moreover before former intensity mr. gonzalez laid the waiver into great many businesses the panel will be criticized for a exception of defensive sales that would benefit are of ru-486.
 what spreads from a city will fly to the british colony to push them into the free-market market as the industry runs some likely to
--------------------------------------------------
the meaning of life is quite accurate.
 this is not the land of a cheaper magazine that dance has said that the jurors ' products are not all more foreclosed than the company.
 a haven for the auction virus does not learn a of the network.
 are giving yourself out she said they had the problems.
 in a battle he 's better.
 the piece of bureaucracy is true in your shoulder says albert m. green.
 the service has one the