# predict_word

In [1]:
import tensorflow as tf
import numpy as np
import re
import glob
import collections
import random
import pickle
import time
import datetime
import os

# logging levelを変更
tf.logging.set_verbosity(tf.logging.ERROR)

class Corpus:
    def __init__(self):
        self.unknown_word_symbol = "<???>" # 出現回数の少ない単語は未知語として定義しておく
        self.unknown_word_threshold = 3 # 未知語と定義する単語の出現回数の閾値
        self.corpus_file = "./corpus/**/*.txt"
        self.corpus_encoding = "utf-8"
        self.dictionary_filename = "./data_for_predict/word_dict.dic"
        self.chunk_size = 5
        self.load_dict()

        words = []
        for filename in glob.glob(self.corpus_file, recursive=True):
            with open(filename, "r", encoding=self.corpus_encoding) as f:

                # word breaking
                text = f.read()
                # 全ての文字を小文字に統一し、改行をスペースに変換
                text = text.lower().replace("\n", " ")
                # 特定の文字以外の文字を空文字に置換する
                text = re.sub(r"[^a-z '\-]", "", text)
                # 複数のスペースはスペース一文字に変換
                text = re.sub(r"[ ]+", " ", text)

                # 前処理： '-' で始まる単語は無視する
                words = [ word for word in text.split() if not word.startswith("-")]


        self.data_n = len(words) - self.chunk_size
        self.data = self.seq_to_matrix(words)

    def prepare_data(self):
        """
        訓練データとテストデータを準備する。
        data_n = ( text データの総単語数 ) - chunk_size
        input: (data_n, chunk_size, vocabulary_size)
        output: (data_n, vocabulary_size)
        """

        # 入力と出力の次元テンソルを準備
        all_input = np.zeros([self.chunk_size, self.vocabulary_size, self.data_n])
        all_output = np.zeros([self.vocabulary_size, self.data_n])

        # 準備したテンソルに、コーパスの one-hot 表現(self.data) のデータを埋めていく
        # i 番目から ( i + chunk_size - 1 ) 番目までの単語が１組の入力となる
        # このときの出力は ( i + chunk_size ) 番目の単語
        for i in range(self.data_n):
            all_output[:, i] = self.data[:, i + self.chunk_size] # (i + chunk_size) 番目の単語の one-hot ベクトル
            for j in range(self.chunk_size):
                all_input[j, :, i] = self.data[:, i + self.chunk_size - j - 1]

        # 後に使うデータ形式に合わせるために転置を取る
        all_input = all_input.transpose([2, 0, 1])
        all_output = all_output.transpose()

        # 訓練データ：テストデータを 4 : 1 に分割する
        training_num = ( self.data_n * 4 ) // 5
        return all_input[:training_num], all_output[:training_num], all_input[training_num:], all_output[training_num:]


    def build_dict(self):
        # コーパス全体を見て、単語の出現回数をカウントする
        counter = collections.Counter()
        for filename in glob.glob(self.corpus_file, recursive=True):
            with open(filename, "r", encoding=self.corpus_encoding) as f:

                # word breaking
                text = f.read()
                # 全ての文字を小文字に統一し、改行をスペースに変換
                text = text.lower().replace("\n", " ")
                # 特定の文字以外の文字を空文字に置換する
                text = re.sub(r"[^a-z '\-]", "", text)
                # 複数のスペースはスペース一文字に変換
                text = re.sub(r"[ ]+", " ", text)

                # 前処理： '-' で始まる単語は無視する
                words = [word for word in text.split() if not word.startswith("-")]

                counter.update(words)

        # 出現頻度の低い単語を一つの記号にまとめる
        word_id = 0
        dictionary = {}
        for word, count in counter.items():
            if count <= self.unknown_word_threshold:
                continue

            dictionary[word] = word_id
            word_id += 1
        dictionary[self.unknown_word_symbol] = word_id

        print("総単語数：", len(dictionary))

        # 辞書を pickle を使って保存しておく
        with open(self.dictionary_filename, "wb") as f:
            pickle.dump(dictionary, f)
            print("Dictionary is saved to", self.dictionary_filename)

        self.dictionary = dictionary

        print(self.dictionary)

    def load_dict(self):
        with open(self.dictionary_filename, "rb") as f:
            self.dictionary = pickle.load(f)
            self.vocabulary_size = len(self.dictionary)
            self.input_layer_size = len(self.dictionary)
            self.output_layer_size = len(self.dictionary)
            print("総単語数: ", self.input_layer_size)

    def get_word_id(self, word):
        # print(word)
        # print(self.dictionary)
        # print(self.unknown_word_symbol)
        # print(self.dictionary[self.unknown_word_symbol])
        # print(self.dictionary.get(word, self.dictionary[self.unknown_word_symbol]))
        return self.dictionary.get(word, self.dictionary[self.unknown_word_symbol])

    # 入力された単語を one-hot ベクトルにする
    def to_one_hot(self, word):
        index = self.get_word_id(word)
        data = np.zeros(self.vocabulary_size)
        data[index] = 1
        return data

    def seq_to_matrix(self, seq):
        print(seq)
        data = np.array([self.to_one_hot(word) for word in seq]) # (data_n, vocabulary_size)
        return data.transpose() # (vocabulary_size, data_n)

class Language:
    """
    input layer: self.vocabulary_size
    hidden layer: rnn_size = 30
    output layer: self.vocabulary_size
    """

    def __init__(self):
        self.corpus = Corpus()
        self.dictionary = self.corpus.dictionary
        self.vocabulary_size = len(self.dictionary) # 単語数
        self.input_layer_size = self.vocabulary_size # 入力層の数
        self.hidden_layer_size = 30 # 隠れ層の RNN ユニットの数
        self.output_layer_size = self.vocabulary_size # 出力層の数
        self.batch_size = 128 # バッチサイズ
        self.chunk_size = 5 # 展開するシーケンスの数。c_0, c_1, ..., c_(chunk_size - 1) を入力し、c_(chunk_size) 番目の単語の確率が出力される。
        self.learning_rate = 0.005 # 学習率
        self.epochs = 1000 # 学習するエポック数
        self.forget_bias = 1.0 # LSTM における忘却ゲートのバイアス
        self.model_filename = "./data_for_predict/predict_model.ckpt"
        self.unknown_word_symbol = self.corpus.unknown_word_symbol

    def inference(self, input_data, initial_state):
        """
        :param input_data: (batch_size, chunk_size, vocabulary_size) 次元のテンソル
        :param initial_state: (batch_size, hidden_layer_size) 次元の行列
        :return:
        """
        # 重みとバイアスの初期化
        hidden_w = tf.Variable(tf.truncated_normal([self.input_layer_size, self.hidden_layer_size], stddev=0.01))
        hidden_b = tf.Variable(tf.ones([self.hidden_layer_size]))
        output_w = tf.Variable(tf.truncated_normal([self.hidden_layer_size, self.output_layer_size], stddev=0.01))
        output_b = tf.Variable(tf.ones([self.output_layer_size]))

        # BasicLSTMCell, BasicRNNCell は (batch_size, hidden_layer_size) が chunk_size 数ぶんつながったリストを入力とする。
        # 現時点での入力データは (batch_size, chunk_size, input_layer_size) という３次元のテンソルなので
        # tf.transpose や tf.reshape などを駆使してテンソルのサイズを調整する。

        input_data = tf.transpose(input_data, [1, 0, 2]) # 転置。(chunk_size, batch_size, vocabulary_size)
        input_data = tf.reshape(input_data, [-1, self.input_layer_size]) # 変形。(chunk_size * batch_size, input_layer_size)
        input_data = tf.matmul(input_data, hidden_w) + hidden_b # 重みWとバイアスBを適用。 (chunk_size, batch_size, hidden_layer_size)
        input_data = tf.split(input_data, self.chunk_size, 0) # リストに分割。chunk_size * (batch_size, hidden_layer_size)

        # RNN のセルを定義する。RNN Cell の他に LSTM のセルや GRU のセルなどが利用できる。
        cell = tf.nn.rnn_cell.BasicRNNCell(self.hidden_layer_size)
        outputs, states = tf.nn.static_rnn(cell, input_data, initial_state=initial_state)
        
        # 最後に隠れ層から出力層につながる重みとバイアスを処理する
        # 最終的に softmax 関数で処理し、確率として解釈される。
        # softmax 関数はこの関数の外で定義する。
        output = tf.matmul(outputs[-1], output_w) + output_b

        return output

    def loss(self, logits, labels):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

        return cost

    def training(self, cost):
        # 今回は最適化手法として Adam を選択する。
        # ここの AdamOptimizer の部分を変えることで、Adagrad, Adadelta などの他の最適化手法を選択することができる
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost)

        return optimizer

    def train(self):
        # 変数などの用意
        input_data = tf.placeholder("float", [None, self.chunk_size, self.input_layer_size])
        actual_labels = tf.placeholder("float", [None, self.output_layer_size])
        initial_state = tf.placeholder("float", [None, self.hidden_layer_size])

        prediction = self.inference(input_data, initial_state)
        cost = self.loss(prediction, actual_labels)
        optimizer = self.training(cost)
        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(actual_labels, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

        # TensorBoard で可視化するため、クロスエントロピーをサマリーに追加
        tf.summary.scalar("Cross entropy: ", cost)
        summary = tf.summary.merge_all()

        # 訓練・テストデータの用意
        # corpus = Corpus()
        trX, trY, teX, teY = self.corpus.prepare_data()
        training_num = trX.shape[0]

        # ログを保存するためのディレクトリ
        timestamp = time.time()
        dirname = datetime.datetime.fromtimestamp(timestamp).strftime("%Y%m%d%H%M%S")

        # ここから実際に学習を走らせる
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            summary_writer = tf.summary.FileWriter("./log/" + dirname, sess.graph)

            # エポックを回す
            for epoch in range(self.epochs):
                step = 0
                epoch_loss = 0
                epoch_acc = 0

                # 訓練データをバッチサイズごとに分けて学習させる (= optimizer を走らせる)
                # エポックごとの損失関数の合計値や（訓練データに対する）精度も計算しておく
                while (step + 1) * self.batch_size < training_num:
                    start_idx = step * self.batch_size
                    end_idx = (step + 1) * self.batch_size

                    batch_xs = trX[start_idx:end_idx, :, :]
                    batch_ys = trY[start_idx:end_idx, :]

                    _, c, a = sess.run([optimizer, cost, accuracy],
                                       feed_dict={input_data: batch_xs,
                                                  actual_labels: batch_ys,
                                                  initial_state: np.zeros([self.batch_size, self.hidden_layer_size])
                                                  }
                                       )
                    epoch_loss += c
                    epoch_acc += a
                    step += 1

                # コンソールに損失関数の値や精度を出力しておく
                print("Epoch", epoch, "completed ouf of", self.epochs, "-- loss:", epoch_loss, " -- accuracy:",
                      epoch_acc / step)

                # Epochが終わるごとにTensorBoard用に値を保存
                summary_str = sess.run(summary, feed_dict={input_data: trX,
                                                           actual_labels: trY,
                                                           initial_state: np.zeros(
                                                               [trX.shape[0],
                                                                self.hidden_layer_size]
                                                           )
                                                           }
                                       )
                summary_writer.add_summary(summary_str, epoch)
                summary_writer.flush()

            # 学習したモデルも保存しておく
            saver = tf.train.Saver()
            saver.save(sess, self.model_filename)

            # 最後にテストデータでの精度を計算して表示する
            a = sess.run(accuracy, feed_dict={input_data: teX, actual_labels: teY,
                                              initial_state: np.zeros([teX.shape[0], self.hidden_layer_size])})
            print("Accuracy on test:", a)


    def predict(self, seq):
        """
        文章を入力したときに次に来る単語を予測する
        :param seq: 予測したい単語の直前の文字列。chunk_size 以上の単語数が必要。
        :return:
        """

        # 最初に復元したい変数をすべて定義してしまいます
        tf.reset_default_graph()
        input_data = tf.placeholder("float", [None, self.chunk_size, self.input_layer_size])
        initial_state = tf.placeholder("float", [None, self.hidden_layer_size])
        prediction = tf.nn.softmax(self.inference(input_data, initial_state))
        predicted_labels = tf.argmax(prediction, 1)

        # 入力データの作成
        # seq を one-hot 表現に変換する。
        words = [word for word in seq.split() if not word.startswith("-")]
        x = np.zeros([1, self.chunk_size, self.input_layer_size])
        for i in range(self.chunk_size):
            word = seq[len(words) - self.chunk_size + i]
            index = self.dictionary.get(word, self.dictionary[self.unknown_word_symbol])
            x[0][i][index] = 1
        feed_dict = {
            input_data: x, # (1, chunk_size, vocabulary_size)
            initial_state: np.zeros([1, self.hidden_layer_size])
        }

        # tf.Session()を用意
        with tf.Session() as sess:
            # 保存したモデルをロードする。ロード前にすべての変数を用意しておく必要がある。
            saver = tf.train.Saver()
            saver.restore(sess, self.model_filename)

            # ロードしたモデルを使って予測結果を計算
            u, v = sess.run([prediction, predicted_labels], feed_dict=feed_dict)

            keys = list(self.dictionary.keys())


            # コンソールに文字ごとの確率を表示
            for i in range(self.vocabulary_size):
                c = self.unknown_word_symbol if i == (self.vocabulary_size - 1) else keys[i]
                print(c, ":", u[0][i])

            print("Prediction:", seq + " " + ("<???>" if v[0] == (self.vocabulary_size - 1) else keys[v[0]]))

        return u[0]


def build_dict():
    cp = Corpus()
    cp.build_dict()

if __name__ == "__main__":
    #build_dict()

    ln = Language()

    # 学習するときに呼び出す
    #ln.train()

    # 保存したモデルを使って単語の予測をする
    ln.predict("some of them looks like")


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


総単語数:  9826


no : 8.723958e-12
in : 0.0035252138
the : 0.035431463
supreme : 6.6973895e-13
court : 6.4598614e-13
of : 0.00025577436
attorney : 6.3459334e-13
general : 6.929165e-13
washington : 6.0849253e-13
dc : 6.701836e-13
united : 7.467902e-13
states : 6.258462e-13
wednesday : 7.018362e-13
october : 6.0887444e-13
matter : 6.521414e-13
came : 5.164268e-13
on : 2.4574694e-08
for : 5.683326e-10
oral : 6.2529615e-13
argument : 5.870776e-13
before : 6.2438325e-13
at : 3.849002e-06
am : 6.405081e-13
appearances : 6.364528e-13
lawrence : 6.906812e-13
lessig : 6.330894e-13
esq : 5.9054557e-13
stanford : 6.419537e-13
california : 4.535812e-10
behalf : 6.3844676e-13
petitioners : 6.831034e-13
theodore : 5.898263e-13
b : 6.1873423e-13
olson : 6.7765563e-13
department : 6.7253337e-13
justice : 4.0816447e-12
we'll : 5.9327093e-13
hear : 6.122201e-13
now : 6.097345e-13
number : 6.1943566e-13
eric : 6.9171475e-13
v : 7.119699e-13
john : 1.1165525e-14
d : 6.635809e-13
ashcroft : 6.3836523e-13
mr : 6.5371677e-13

understanding : 6.155349e-13
stevens : 6.452904e-13
purely : 6.874401e-13
purposes : 6.234312e-13
dissemination : 6.51352e-13
oh : 6.432333e-13
need : 1.6509535e-11
implied : 6.0252394e-13
expressed : 6.6859663e-13
carefully : 6.0396794e-13
article : 6.6515467e-13
section : 6.652968e-13
shall : 6.1859496e-13
world : 6.6927415e-13
ours : 6.3364635e-13
certain : 6.545153e-13
changes : 6.8035454e-13
partners : 6.682091e-13
led : 6.11331e-13
fight : 6.417787e-13
european : 1.4075964e-07
union : 0.0017358273
demand : 6.67292e-13
couldn't : 6.6689503e-13
knowledge : 6.53146e-13
fitting : 6.178085e-13
france : 6.510515e-13
hate : 6.357637e-13
consistent : 6.6500624e-13
similarly : 6.234883e-13
mary : 6.122715e-13
beth : 6.80365e-13
testified : 6.131246e-13
continually : 6.454677e-13
expand : 6.275974e-13
own : 1.3540723e-07
vision : 6.1889827e-13
articulated : 6.6138474e-13
addition : 5.868951e-13
each : 5.604435e-09
customer : 6.714235e-13
printed : 5.8997365e-13
come : 4.240288e-13
suggesti

defendants : 6.7515535e-13
afternoon : 5.5504386e-12
session : 6.8057517e-13
transcript : 5.946304e-13
proceedings : 6.336052e-13
bench : 6.329421e-13
trial : 7.0102013e-13
e : 6.255538e-13
jones : 6.249575e-13
iii : 6.3896577e-13
date : 6.4789504e-13
september : 5.7292474e-13
courtroom : 6.210291e-13
floor : 6.0588676e-13
building : 6.0049323e-13
street : 6.1051067e-13
counsel : 6.6933796e-13
stephen : 6.575584e-13
g : 6.741657e-13
harvey : 6.3048893e-13
richard : 6.434272e-13
patrick : 6.887158e-13
t : 6.919259e-13
gillen : 6.084786e-13
thompson : 7.2643616e-13
robert : 7.0858846e-13
muise : 7.892823e-13
eq : 6.99671e-13
official : 6.659799e-13
reporter : 6.714081e-13
index : 6.02993e-13
witnesses : 6.3269344e-13
julie : 6.390608e-13
smith : 6.221673e-13
christy : 6.435475e-13
rehm : 7.029106e-13
eveland : 6.2932596e-13
following : 7.0212537e-13
discussion : 6.919576e-13
occurred : 6.7718525e-13
chambers : 6.3790634e-13
record : 6.380828e-13
versus : 6.23115e-13
assembled : 6.683187e

faith : 2.3878243e-08
mutually : 6.7106623e-13
mrs : 6.4030167e-13
april : 6.792628e-13
center : 4.0015293e-09
ken : 6.813688e-13
barrie : 6.3128555e-13
callahan : 6.815417e-13
gentleman : 6.48188e-13
plaintiff : 6.585374e-13
aclu : 6.1466316e-13
friend : 6.6247306e-13
contacted : 6.2184936e-13
agreed : 6.338917e-13
lawsuit : 6.09424e-13
ninth : 6.440374e-13
implemented : 6.6141504e-13
january : 2.7325959e-14
ten : 1.0593592e-10
affected : 6.2982546e-13
ninth-grade : 7.0767947e-13
students : 6.4381763e-13
classes : 7.067366e-13
listen : 6.707194e-13
neither : 6.2515303e-13
accurate : 6.3619314e-13
year : 2.832407e-11
becoming : 6.327116e-13
attended : 5.5336346e-07
single : 7.2592925e-13
excuse : 7.1478923e-13
wait : 6.677835e-13
start : 6.1292817e-13
temptation : 6.9813523e-13
project : 8.4730825e-12
approach : 6.521128e-13
attention : 6.399305e-13
loud : 6.1411716e-13
starting : 6.218968e-13
starts : 6.757324e-13
q : 6.792628e-13
represents : 6.773196e-13
reflect : 7.121601e-13
famil

nodes : 6.569015e-13
observed : 6.376119e-13
bayes-net : 6.871963e-13
entity : 6.402174e-13
fire : 5.928004e-13
stable : 6.274263e-13
f- : 6.981086e-13
parking : 6.6744483e-13
location : 6.658731e-13
hotel : 6.4191084e-13
riots : 6.445794e-13
input : 6.7266424e-13
w- : 7.7859746e-13
y- : 6.770691e-13
xml : 6.3024607e-13
forces : 7.091699e-13
schema : 6.3586555e-13
th- : 7.098249e-13
l- : 6.6509125e-13
java : 5.775493e-13
bayes : 6.8603077e-13
spec : 6.5804014e-13
gonna : 6.58859e-13
interface : 6.7636424e-13
document : 6.520494e-13
somewhere : 6.5258936e-13
observe : 6.817783e-13
yep : 6.499634e-13
defines : 5.641715e-13
possible : 6.6476403e-13
structures : 7.207486e-13
r- : 6.3665436e-13
format : 6.1670417e-13
n- : 7.0973826e-13
node : 6.693801e-13
nuh : 5.6190767e-13
convert : 6.6712656e-13
wanna : 6.145308e-13
posterior : 6.451501e-13
probabilities : 6.64181e-13
go-there : 6.447072e-13
raining : 6.7586133e-13
specify : 6.7215504e-13
user : 6.095031e-13
specification : 6.6592906e-13

sphere : 6.517796e-13
nist : 6.2117363e-13
program : 5.948244e-13
shorten : 6.350959e-13
sri : 6.377823e-13
tools : 6.937072e-13
tool : 6.437549e-13
front-end : 6.2439035e-13
uses : 6.044243e-13
quick : 6.1129834e-13
relieve : 6.884873e-13
solution : 6.3066815e-13
encoding : 7.212382e-13
minus : 6.8869216e-13
wavefile : 6.762998e-13
shell : 6.421632e-13
operator : 1.9989977e-05
replaced : 6.7210506e-13
programs : 6.640645e-13
saved : 6.549386e-13
experiment : 6.9571063e-13
processing : 6.643103e-13
hours : 6.708551e-13
flash : 0.00037656142
inspiration : 6.051268e-13
low : 5.8663317e-13
gained : 6.881329e-13
processes : 6.9447237e-13
runs : 6.25413e-13
watching : 6.0890117e-13
meter : 6.4102754e-13
excitement : 6.597846e-13
boy : 6.446642e-13
mess : 6.9236685e-13
split : 6.541284e-13
lists : 6.779128e-13
indices : 6.202431e-13
segment : 6.2245575e-13
floating : 6.480928e-13
compressed : 6.438348e-13
routine : 6.133539e-13
seek : 7.497273e-13
wasted : 6.458445e-13
delete : 6.671584e-13


forward : 6.937588e-13
deductible : 6.4935237e-13
forced : 5.8007196e-13
competition : 3.0305512e-07
helps : 6.564782e-13
save : 4.405604e-13
credible : 6.4060703e-13
politically : 6.3662525e-13
analyzed : 6.262402e-13
largest : 7.4182137e-13
estimated : 6.216216e-13
leads : 6.246989e-13
controls : 6.0548474e-13
controlled : 2.5921019e-12
envy : 6.2613754e-13
patients : 6.9514164e-13
officials : 5.343812e-13
capital : 5.450449e-12
government-run : 7.283105e-13
explains : 6.5301394e-13
funded : 6.582335e-13
funding : 6.292299e-13
obligation : 6.922335e-13
expanding : 6.2544165e-13
facilities : 6.369082e-13
social : 7.0778886e-13
fix : 6.897254e-13
letting : 6.469936e-13
critics : 5.985904e-13
finding : 6.5566606e-13
senior : 6.4944653e-13
understands : 6.4820285e-13
checks : 6.603839e-13
youngsters : 7.0869666e-13
valued : 7.469754e-13
trillions : 7.051854e-13
commitment : 6.642165e-13
recognizing : 6.499906e-13
study : 6.7757545e-13
committee : 6.0086793e-13
chaired : 5.8157635e-13
dan

aimed : 5.9235055e-13
parenting : 6.413418e-13
flood : 5.9218e-13
stem : 6.5750324e-13
beds : 6.4569666e-13
automatically : 7.12584e-13
vehicle : 6.5332413e-13
illness : 5.9166403e-13
scales : 6.5831894e-13
payment : 6.7690894e-13
pool : 7.0486537e-13
associations : 6.8186933e-13
chamber : 6.157333e-13
restaurant : 6.6570176e-13
association : 6.388244e-13
universal : 6.430101e-13
shrinking : 6.489686e-13
size : 6.727823e-13
typically : 6.333648e-13
measured : 6.631102e-13
gdp : 6.2926834e-13
greatly : 6.2111443e-13
chip : 6.6600273e-13
millions : 6.2048565e-13
ranks : 6.5384276e-13
figures : 6.958354e-13
tracks : 5.85886e-10
uninsured : 6.3880975e-13
chips : 6.141347e-13
passes : 6.072218e-13
signing : 6.2954205e-13
throws : 6.737775e-13
speculate : 6.531983e-13
legislature : 9.663075e-06
whom : 1.4515147e-06
charges : 6.8241186e-13
efficient : 6.598828e-13
absurd : 6.1364173e-13
proposals : 6.2559316e-13
one-third : 6.398414e-13
reductions : 7.030796e-13
ladder : 6.4633986e-13
contras

egyptian : 6.9314243e-13
rulers : 6.4273294e-13
thesis : 5.950899e-13
counting : 7.1796746e-13
track : 2.7318875e-10
accepts : 6.664551e-13
speculation : 6.8911527e-13
laborers : 6.929151e-13
greek : 6.4043725e-13
historian : 6.6335815e-13
labour : 6.7367726e-13
blows : 7.1115564e-13
modern : 5.937028e-15
researcher : 6.6056904e-13
commanded : 6.426153e-13
sum : 6.449446e-13
sixth : 6.1089155e-13
bc : 3.4484596e-10
pan : 6.534899e-13
flourished : 6.431156e-13
welfare : 6.73577e-13
roman : 6.4751953e-13
nurse : 6.8523046e-13
fourth : 6.327587e-13
cultures : 6.614579e-13
instrument : 5.909715e-13
blood : 5.9514893e-13
bones : 6.418068e-13
preferred : 6.6165725e-13
wild : 6.701901e-13
destiny : 7.038605e-13
paul : 6.9529945e-13
seller : 5.991524e-13
purple : 6.600993e-13
ibid : 6.2492654e-13
array : 6.4973645e-13
brilliant : 5.8043604e-13
unusual : 6.5453654e-13
inspiring : 6.4509964e-13
preceding : 6.6570555e-13
taste : 6.8969125e-13
stroke : 6.591419e-13
grim : 6.4331185e-13
clothing : 

substantially : 6.5649575e-13
premature : 6.5822846e-13
vary : 6.342945e-13
rural : 6.537417e-13
units : 6.994188e-13
fetus's : 7.0265863e-13
attempting : 6.360645e-13
functioning : 6.522521e-13
eyelids : 6.354145e-13
onset : 6.995162e-13
respiratory : 6.371779e-13
constant : 6.275651e-13
findings : 6.042595e-13
nationwide : 5.836633e-13
delivery : 6.439576e-13
crushed : 7.1596814e-13
brains : 6.059607e-13
regulation : 6.6387835e-13
polls : 6.457066e-13
performed : 5.8155753e-13
palestinian : 6.092962e-13
dealer : 6.1906003e-13
sale : 6.6003886e-13
struggle : 7.1920237e-13
biblical : 5.991752e-13
palestine : 7.1224564e-13
ottoman : 6.455785e-13
empire : 6.401564e-13
jerusalem : 6.757041e-13
immigrants : 6.3984506e-13
tracts : 3.536655e-13
physically : 5.987183e-13
arabs : 5.6558073e-13
laying : 6.149212e-13
charter : 6.606913e-13
mandate : 6.322423e-13
guidelines : 6.6563947e-13
peasants : 6.8409563e-13
displaced : 6.296525e-13
threat : 6.1112e-13
displacement : 6.1478047e-13
railed : 

deliberations : 6.481015e-13
independently : 6.463917e-13
experimental : 6.346321e-13
rep : 6.6688365e-13
sections : 6.496126e-13
mechanisms : 6.8526575e-13
expedited : 6.2234294e-13
arrangements : 6.1725023e-13
mailer : 6.6195525e-13
mailers : 6.433646e-13
customized : 6.5146754e-13
opportunities : 6.4294875e-13
establishes : 6.6299505e-13
bind : 6.353152e-13
correspondence : 6.6195525e-13
prompt : 6.928543e-13
adequate : 5.741587e-13
categories : 6.52389e-13
powers : 6.1925844e-13
operational : 6.807427e-13
custody : 6.335061e-13
stamps : 6.3547756e-13
postage : 7.0939446e-13
authorizes : 7.0705746e-13
authorized : 4.5404666e-11
directs : 6.525458e-13
recommended : 6.8488677e-13
governing : 5.9700096e-13
cfr : 6.3453523e-13
docket : 6.964781e-13
mc- : 6.8774316e-13
phase : 6.228322e-13
requests : 6.2184936e-13
depart : 7.3678737e-13
incorporate : 6.6177337e-13
extensively : 6.1431394e-13
implement : 6.375597e-13
rulemaking : 7.453131e-13
revised : 6.5468003e-13
prescribe : 6.460971e-

conspicuous : 6.2513395e-13
tightly : 5.970078e-13
prof : 6.5538596e-13
revitalization : 7.0192186e-13
movements : 6.6053256e-13
humans : 6.5208044e-13
traits : 7.000073e-13
lifestyle : 2.6345994e-12
gaining : 6.4867035e-13
maya : 7.1071355e-13
guatemala : 6.486394e-13
nasa : 6.407293e-13
guambiano : 6.58849e-13
colombia : 6.4558217e-13
demanded : 6.754142e-13
possessed : 6.693137e-13
claimed : 6.323267e-13
jackson : 6.428053e-13
conservatives : 6.9964824e-13
colombian : 6.850945e-13
mayans : 5.909693e-13
guerrillas : 6.218268e-13
accord : 6.9004123e-13
popularity : 6.34211e-13
mayan : 6.9282123e-13
pan-mayan : 6.6653394e-13
guatemalan : 5.939888e-13
modernity : 6.19763e-13
mobility : 6.6596457e-13
renaissance : 6.3767644e-13
fractured : 6.918256e-13
sole : 6.4077443e-13
consciousness : 6.5378036e-13
gow : 5.988348e-13
stemming : 6.7552626e-13
customary : 6.7195896e-13
corporal : 6.536245e-13
referendum : 6.420688e-13
formal : 2.1425068e-08
illiteracy : 5.9430157e-13
literate : 6.44442

tonic : 6.21227e-13
whole-tone : 7.041102e-13
chromatic : 6.6929074e-13
motivic : 6.4177624e-13
motive : 6.5067163e-13
celli : 6.393693e-13
basses : 6.8521214e-13
pentatonic : 6.9258217e-13
fourths : 6.383494e-13
generates : 6.1725966e-13
diminish : 6.948408e-13
transitions : 7.167005e-13
constitutes : 7.0737047e-13
descending : 6.4772455e-13
exemplified : 6.63467e-13
chains : 6.070875e-13
intervals : 6.6626826e-13
intervallic : 6.13952e-13
cell : 6.016443e-13
inversion : 6.215268e-13
outer : 6.5289186e-13
violins : 6.953021e-13
span : 6.592211e-13
seemingly : 6.9113975e-13
interval : 6.823429e-13
dissonance : 6.295973e-13
melody : 6.888314e-13
horns : 6.202561e-13
strings : 7.0679053e-13
triple : 6.36204e-13
fugato : 6.065666e-13
transposed : 6.641228e-13
constitute : 6.315143e-13
developmental : 7.3679295e-13
rhythm : 7.005817e-13
chord : 6.0188535e-13
blends : 6.784186e-13
seamlessly : 6.6043677e-13
emerge : 7.241232e-13
neighbors : 6.9925207e-13
pause : 7.364909e-13
signaling : 6.7

sleek : 6.7688184e-13
smile : 6.922586e-13
poh : 6.651839e-13
musicp : 6.7508065e-13
victor : 6.2181255e-13
kelly : 6.250219e-13
investigator : 7.0592826e-13
pstill : 7.058139e-13
pworking : 6.6529047e-13
projectp : 6.409358e-13
anonymous : 6.3685477e-13
pstudying : 6.7951287e-13
studying : 6.24107e-13
alternate : 6.930856e-13
cafe : 6.71248e-13
pon : 7.210621e-13
wont : 6.1243507e-13
invite : 6.221602e-13
pchecking : 6.2976783e-13
lap : 6.905284e-13
chasing : 6.628851e-13
eagle : 6.192289e-13
amid : 6.15367e-13
arcade : 6.979356e-13
climbing : 6.830721e-13
hitting : 6.4864314e-13
alley : 5.994679e-13
bothering : 6.053808e-13
todayp : 6.236084e-13
workshop : 6.565758e-13
breast : 6.3920223e-13
rum : 5.956191e-13
gothic : 6.5149367e-13
el : 1.6153975e-14
stewart : 0.000102601356
pfinally : 6.162244e-13
sailing : 6.457028e-13
daysp : 6.4119505e-13
dam : 5.724573e-09
netherlands : 6.6110724e-13
homep : 6.127856e-13
stranger : 6.6566614e-13
pmy : 6.756538e-13
wedding : 6.0548355e-13
nurses

curves : 6.4073296e-13
distant : 7.1093863e-13
boulevard : 6.339921e-13
stephens : 7.176279e-13
nightlife : 6.8506713e-13
cathedral : 6.4066813e-13
enjoying : 5.8072725e-13
modernism : 6.099357e-13
youll : 6.4177255e-13
legendary : 4.5364523e-10
gate : 6.552422e-13
pubs : 6.5160426e-13
dancing : 6.473022e-13
crafts : 6.6747535e-13
souvenirs : 6.582385e-13
hotels : 1.372031e-08
restaurants : 6.9317555e-13
cuisine : 6.050968e-13
seas : 6.59299e-13
rivers : 6.0849486e-13
vienna : 5.94492e-13
seattle : 6.967505e-13
gardens : 7.2246907e-13
recreation : 6.9085243e-13
tara : 6.755997e-13
dart : 6.366859e-13
tours : 6.5605887e-13
twenty-five : 5.7933774e-13
rub : 6.267266e-13
shoulders : 7.083588e-13
jerusalems : 6.756693e-13
settlements : 6.565621e-13
resembles : 6.4550334e-13
bread : 6.021931e-13
davids : 7.1181516e-13
discovering : 6.442807e-13
rival : 6.4220845e-13
solomon : 6.4380164e-13
dedication : 1.1854889e-11
sacrifice : 6.2355253e-13
palace : 6.8278814e-13
mansions : 6.3596014e-13
t

equation : 6.225709e-13
adjusted : 6.1095795e-13
valley-floor : 6.2033777e-13
decreasing : 7.3687166e-13
bedrock : 6.258665e-13
meanders : 6.613532e-13
encounters : 6.740153e-13
conversion : 5.721341e-13
boundaryless : 6.4795315e-13
restructuring : 6.3117e-13
define : 6.399622e-13
prevail : 5.9367507e-13
anarchy : 6.291387e-13
emergent : 6.8908903e-13
rousseau : 6.7787144e-13
emerges : 7.048009e-13
micro-level : 6.7380964e-13
behaviour : 6.251721e-13
organising : 6.747253e-13
weicks : 6.431217e-13
framed : 5.965093e-13
weick : 6.947639e-13
lens : 5.833361e-13
conception : 5.7645756e-13
sensemaking : 6.90136e-13
enact : 6.401234e-13
evolves : 6.30602e-13
unfold : 6.6587567e-13
behave : 6.15069e-13
proliferation : 6.079519e-13
descriptions : 6.338699e-13
cues : 6.415413e-13
ambiguity : 6.1361365e-13
opens : 6.4687757e-13
codes : 7.0543295e-13
shapes : 6.502448e-13
unfolding : 6.617166e-13
illustrate : 6.7944164e-13
saxenian : 6.282705e-13
hewlett : 6.664462e-13
packard : 6.399134e-13
pro

panties : 6.6955626e-13
shrugged : 6.1261846e-13
rang : 6.4338796e-13
alias : 6.5926003e-13
slime : 6.650152e-13
hiding : 6.4233715e-13
forte : 6.7091905e-13
roommate : 6.530762e-13
eternity : 6.4343946e-13
whites : 6.298399e-13
tapping : 6.5631794e-13
karon : 5.8556003e-13
theo : 7.234923e-13
gravity : 6.5739287e-13
bobby : 6.57985e-13
varsity : 6.0536694e-13
slid : 6.9121624e-13
remembering : 6.7883795e-13
whispered : 6.538477e-13
whispering : 7.211309e-13
numbered : 6.6387455e-13
brow : 6.8028715e-13
tagged : 6.3650983e-13
insisted : 6.486864e-13
cheek : 6.4339284e-13
yo : 7.357425e-13
crush : 6.783733e-13
hadnt : 6.571572e-13
eros : 6.2452376e-13
psyche : 6.183154e-13
canadianwriter : 7.4585206e-13
bud : 6.331957e-13
mister : 6.3423035e-13
eross : 6.8487246e-13
wrinkles : 6.552273e-13
touching : 6.259763e-13
immortal : 6.1211973e-13
incredulously : 6.715413e-13
fairy : 6.995095e-13
immense : 6.3992437e-13
sadness : 6.870391e-13
theyd : 6.281531e-13
screaming : 6.6791597e-13
muttere

whatsamatta : 6.5001916e-13
rabbits : 6.9774785e-13
limb : 6.2451666e-13
cheque : 6.8627943e-13
communicated : 6.704943e-13
cakes : 6.282394e-13
ale : 6.832989e-13
pig : 6.254655e-13
childbirth : 7.0153643e-13
diaper : 6.6426595e-13
aggi : 6.235335e-13
dialed : 5.880862e-13
swap : 6.4945277e-13
donkey : 6.023826e-13
ape : 6.415033e-13
panty : 6.7179877e-13
nelson's : 6.29787e-13
prefix : 6.859732e-13
melvin : 7.3111217e-13
sneeze : 7.2366614e-13
leopard : 6.5292053e-13
arrow : 6.407867e-13
proportional : 6.456758e-13
ski : 6.275603e-13
pasadena : 6.805415e-13
patti : 6.6712656e-13
docnolists-- : 7.006004e-13
whitney : 7.3266447e-13
sentwed : 6.6891295e-13
subjectre : 6.8356735e-13
html-math : 6.658707e-13
wc-math-erbworg : 7.1416104e-13
nico : 6.4818924e-13
sgml : 6.290067e-13
committee's : 6.8099335e-13
wolfram : 6.4648536e-13
tex : 6.448425e-13
namedave : 6.275017e-13
raggett : 6.3769834e-13
emaildsrworg : 6.967385e-13
senttue : 6.8013005e-13
idaaworg : 6.1775904e-13
raman : 6.867298

# 考察
文章を入力として次に現れる単語を予測するモデルの（精度・実用性はさておき）挙動を確認することにより、DL/NNにおける言語の扱いをプログラムにより確認することができた。