### 문장생성 구현

In [48]:
import numpy as np
a = np.array([1,2,3,4,5,6,7,8,9,10])
p = np.array([0.6,0.1,0.2,0.1,0,0,0,0,0,0])
sampled = np.random.choice(len(a), size=1, p=p)
# sampled = np.array(np.argmax(p)).reshape(1)
print(a[sampled])

[2]


In [125]:
# coding: utf-8
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from ch06.rnnlm import Rnnlm
from ch06.better_rnnlm import BetterRnnlm

class MyRnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
#             print('score.shape=', score.shape)  # (1,1,10000)
#             print('score=', score)
            
            
            
            p = softmax(score.flatten())        # (10000,)
#             print('p=',p)
#             print('len(p)=', len(p))
#             print('np.argmax(p)=', np.argmax(p))
#             print('np.max(p)=', np.max(p))
#             print(id_to_word[np.argmax(p)])
#             break
            

            sampled = np.random.choice(len(p), size=1, p=p)
#             sampled = np.array(np.argmax(p)).reshape(1)
#             print('sampled=',sampled)
#             print(id_to_word[sampled[0]])
#             break
            
        
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))
#             print(word_ids)
        return word_ids

    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c

    def set_state(self, state):
        self.lstm_layer.set_state(*state)


class BetterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x).flatten()
            p = softmax(score).flatten()

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        states = []
        for layer in self.lstm_layers:
            states.append((layer.h, layer.c))
        return states

    def set_state(self, states):
        for layer, state in zip(self.lstm_layers, states):
            layer.set_state(*state)

### 문장생성을 위한 코드

In [127]:
# coding: utf-8
import sys
sys.path.append('..')
from rnnlm_gen import RnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = MyRnnlmGen()
model.load_params('../ch06/Rnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
# print(start_id)
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# print(skip_ids)
# 문장 생성
word_ids = model.generate(start_id, skip_ids, 100)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you by such u.s. korea judge felt dunes rose a first mutual funds international applicable dominion as diseases made and bankruptcy s.a..
 mrs. have actively a karen spending said it decided to a closed-end chairman for government there.
 corp. and refund managers can harvard carlos reinvestment by foreign refund licensed cutbacks research nine and mci funds for president.
 they rumored a research edison they positive killer duty-free porter interest to a year 's world if mr. two-part on airport dodge durable alex in a so-called interest and shaking mexico.
 in a lehman lack to aid without


### 더 좋은 문장으로

In [129]:
# coding: utf-8
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)


model = BetterRnnlmGen()
model.load_params('../ch06/BetterRnnlm.pkl')

# start 문자와 skip 문자 설정
# start_word = 'you'
# start_id = word_to_id[start_word]
# skip_words = ['N', '<unk>', '$']
# skip_ids = [word_to_id[w] for w in skip_words]
# # 문장 생성
# word_ids = model.generate(start_id, skip_ids)
# txt = ' '.join([id_to_word[i] for i in word_ids])
# txt = txt.replace(' <eos>', '.\n')

# print(txt)


model.reset_state()


# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
# print(start_id)
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# print(skip_ids)
# 문장 생성
word_ids = model.generate(start_id, skip_ids, 100)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)



# start_words = 'the meaning of life is'
# start_words = 'you'
# start_ids = [word_to_id[w] for w in start_words.split(' ')]
# print(start_ids)
# print(start_ids[:-1])

# for x in start_ids[:-1]:
#     x = np.array(x).reshape(1, 1)
#     model.predict(x)

# word_ids = model.generate(start_ids[-1], skip_ids)
# word_ids = start_ids[:-1] + word_ids
# txt = ' '.join([id_to_word[i] for i in word_ids])
# txt = txt.replace(' <eos>', '.\n')
# print('-' * 50)
# print(txt)


you rush to rely extremely on the rest of that business he says.
 thomas w. garrison new jersey city transportation general.
 mr. c. met goldberg said he expected two to five days to report an annual return of five stewart money from the previous arms end of the day.
 mr. mcgovern of a subsidiary of new york mr. hahn will face a policy as a record fellow for him.
 in just the point area mr. sorrell and many other men are include chrysler 's marketing division.
 nissan has lost millions of dollars from rolling than
