### データセットのインポート

In [1]:
import pyprind
import pandas as pd
from string import punctuation
import re
import numpy as np

df = pd.read_csv('movie_data.csv', encoding='utf-8')
print(df.head(3))

                                              review  sentiment
0  I went and saw this movie last night after bei...          1
1  Actor turned director Bill Paxton follows up h...          1
2  As a recreational golfer with some knowledge o...          1


### テキストを一連の整数に変換する

In [3]:
from collections import Counter

# 単語に分割して各単語の出現回数をカウント
# punctuationが入っていれば前後の文字も1つの単語としてカウント
# punctuation :: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~.
counts = Counter()

for i,review in enumerate(df['review']):
    text = ''.join([c if c not in punctuation else ''+c+'' for c in review]).lower()
    df.loc[i, 'review'] = text
    counts.update(text.split())

In [4]:
# カウンタークラスの検証
c = Counter()
c.update(['a','b','c'])
print(c)
c.update(['a','b','c'])
print(c)

Counter({'a': 1, 'b': 1, 'c': 1})
Counter({'a': 2, 'b': 2, 'c': 2})


In [5]:
# マッピングを作成
# 一意な単語をそれぞれ整数にマッピング

# カウンタを多い順にソート
word_counts = sorted(counts, key=counts.get, reverse=True)
print(word_counts[:5])

# 出現数が多いものから順番に1～インデックスをはる
# 辞書型
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)} # インデックスの取得を1から
print(word_to_int["and"])

# REVIEWの文章に単語の出現順位をマップしていく
mapped_reviews = []

for review in df['review']:
    mapped_reviews.append([word_to_int[word] for word in review.split()])

['the', 'a', 'and', 'of', 'to']
3


### 長さを整える

In [6]:
# RNNは入力が同じ長さでないとだめ
# sequence_length以下の場合 :: 左側を0パディング
# sequence_length以上の場合 :: 左の数字を省略
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype=int)
for i, row in enumerate(mapped_reviews):
    review_arr = np.array(row)
    sequences[i, -len(row):] = review_arr[-sequence_length:] # 後ろから〇〇個の要素を取得

### データ

In [7]:
# データを訓練用とテスト用に分ける
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values

### ミニバッチ

In [8]:
# ミニバッチ用の関数を定義
def create_batch_generator(x, y=None, batch_size=64):
    # 整数の商を取得
    n_batches = len(x)//batch_size
    
    # 対象の数字までをスライス
    x= x[:n_batches*batch_size]
    
    if y is not None:
        y = y[:n_batches*batch_size]
    for ii in range(0, len(x), batch_size):
        if y is not None:
            yield x[ii:ii+batch_size], y[ii:ii+batch_size]
        else:
            yield x[ii:ii+batch_size]

### 埋め込み(一意な単語をベクトルで表現)

In [86]:
# 埋め込み(一意な単語をベクトルで表現)
embedding = tf.Variable(
    tf.random_uniform(shape=(n_words, embedding_size),minval=1,maxval=1))
# 一意な単語に対応するベクトルを特定
embed_x = tf.nn.embedding_lookup(embedding, tf_x)

NameError: name 'n_words' is not defined

### RNNモデルの構築

In [103]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np

In [110]:
### 構成は以下 ###
# コンストラクタ
# buildメソッド
# trainメソッド
# predictメソッド

class SentimentRNN(object):
    def __init__(self
                 , n_words
                 , seq_len=200
                 ,lstm_size=256
                 , num_layers=1
                 , batch_size=64
                 ,learning_rate=0.0001
                 , embed_size=200):
            
        self.n_words = n_words              # 一意な単語の数
        self.seq_len = seq_len              # sequenceの長さ
        self.lstm_size = lstm_size          # 隠れユニットの個数
        self.num_layers = num_layers        # レイヤー数
        self.batch_size = batch_size        # バッチサイズ
        self.learning_rate = learning_rate  # 学習率
        self.embed_size = embed_size        # 一意な単語を表現するためのベクトルの箱数

        # 計算グラフを作成
        self.g = tf.Graph()
        with self.g.as_default():
            tf.set_random_seed(123)
            self.build()
            self.saver = tf.train.Saver()
            self.init_op = tf.global_variables_initializer()
    
    def build(self):
        # 各プレースホルダーを定義
        # 入力データ
        tf_x = tf.placeholder(
            tf.int32
            ,shape=(self.batch_size, self.seq_len)
            ,name='tf_x')
        # 入力ラベル
        tf_y = tf.placeholder(
            tf.float32
            ,shape=(self.batch_size)
            ,name='tf_y')
        # ドロップアウトのキープ率
        tf_keepprob = tf.placeholder(
            tf.float32
            ,name='tf_keepprob')
        
        # 埋め込み層を作成
        embedding = tf.Variable(
                    tf.random_uniform(
                        (self.n_words, self.embed_size),
                        minval=-1, maxval=1),
                    name='embedding')
        embed_x = tf.nn.embedding_lookup(
                    embedding, tf_x, 
                    name='embeded_x')
        
        # LSTMセル(長短期記憶)を定義し、積み上げる
        # tf.contrib.rnn.MultiRNNCell
        cells = tf.nn.rnn_cell.MultiRNNCell(
                [tf.nn.rnn_cell.DropoutWrapper(
                   tf.nn.rnn_cell.BasicLSTMCell(self.lstm_size),
                   output_keep_prob=tf_keepprob)
                 for i in range(self.num_layers)])
        
        # LSTMの初期状態を定義(全て0を設定)
        self.initial_state = cells.zero_state(self.batch_size, tf.float32)
        print('  << initial state >> ', self.initial_state)
        
        # LSTMのアウトプットと最終状態
        lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
            cells
            ,embed_x
            ,initial_state=self.initial_state)
        
        print('\n  << lstm_output   >> ', lstm_outputs)      # バッチサイズ,最大時間,アウトプットサイズ
        print('\n  << final state   >> ', self.final_state)
        
        # RNNの出力後に全結合層を適用
        # tf.layers.dense :: 全結合レイヤーを構築。引数としてニューロンの数と活性化関数をとる。
        logits = tf.layers.dense(
         inputs=lstm_outputs[:, -1],
         units=1, activation=None,
         name='logits')
        
        # tf.squeeze :: sizeが1の次元を削除し次元数を減らすAPI
        logits = tf.squeeze(logits, name='logits_squeezed')
        print ('\n  << logits        >> ', logits)
        
        # シグモイド関数を適用する
        y_proba = tf.nn.sigmoid(logits, name='probabilities')
        predictions = {
            'probabilities': y_proba,
            'labels' : tf.cast(tf.round(y_proba), tf.int32,
                 name='labels')
        }
        print('\n  << predictions   >> ', predictions)
        
        # コスト関数を定義する
        # tf.reduce_mean :: 与えたリストに入っている数値の平均値を求める関数
        cost = tf.reduce_mean(
         tf.nn.sigmoid_cross_entropy_with_logits(
         labels=tf_y, logits=logits),
         name='cost')
        
        # オプティマイザを定義する
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        train_op = optimizer.minimize(cost, name='train_op')

    def train(self, X_train, y_train, num_epochs):
        with tf.Session(graph=self.g) as sess:
            sess.run(self.init_op)
            iteration = 1
            for epoch in range(num_epochs):
                state = sess.run(self.initial_state) # セルの状態を初期状態に更新
                
                for batch_x, batch_y in create_batch_generator(
                            X_train, y_train, self.batch_size):
                    feed = {'tf_x:0': batch_x,
                            'tf_y:0': batch_y,
                            'tf_keepprob:0': 0.5,
                            self.initial_state : state}
                    loss, _, state = sess.run(
                            ['cost:0', 'train_op', 
                             self.final_state],      # セルの状態を最終状態に更新
                            feed_dict=feed)

                    if iteration % 20 == 0:
                        print("Epoch: %d/%d Iteration: %d "
                              "| Train loss: %.5f" % (
                               epoch + 1, num_epochs,
                               iteration, loss))

                    iteration +=1 
                    
                if (epoch+1)%10 == 0:
                    self.saver.save(sess,
                        "model/sentiment-%d.ckpt" % epoch)
                    
    def predict(self, X_data, return_proba=False):
        preds=[]
        with tf.Session(graph = self.g) as sess:
            self.saver.restore(
                sess, tf.train.latest_checkpoint('model/'))
            test_state = sess.run(self.initial_state)
            for ii, batch_x in enumerate(
                create_batch_generator(
                    X_data, None, batch_size=self.batch_size), 1):
                feed = {'tf_x:0' : batch_x,
                        'tf_keepprob:0': 1.0,
                        self.initial_state : test_state}
                if return_proba:
                    pred, test_state = sess.run(
                        ['probabilities:0', self.final_state],
                        feed_dict=feed)
                else:
                    pred, test_state = sess.run(
                        ['labels:0', self.final_state],
                        feed_dict=feed)
                    
                preds.append(pred)
                
        return np.concatenate(preds)

In [112]:
# SentimentRNNクラスのインスタンス化
n_words = max(list(word_to_int.values())) + 1

rnn = SentimentRNN(n_words=n_words,
                  seq_len=sequence_length,
                  embed_size=256,
                  lstm_size=128,
                  num_layers=1,
                  batch_size=100,
                  learning_rate=0.001)

  << initial state >>  (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 128) dtype=float32>),)

  << lstm_output   >>  Tensor("rnn/transpose_1:0", shape=(100, 200, 128), dtype=float32)

  << final state   >>  (LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_3:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_4:0' shape=(100, 128) dtype=float32>),)

  << logits        >>  Tensor("logits_squeezed:0", shape=(100,), dtype=float32)

  << predictions   >>  {'probabilities': <tf.Tensor 'probabilities:0' shape=(100,) dtype=float32>, 'labels': <tf.Tensor 'labels:0' shape=(100,) dtype=int32>}


In [None]:
rnn.train(X_train, y_train, num_epochs=40)

Epoch: 1/40 Iteration: 20 | Train loss: 0.00027
Epoch: 1/40 Iteration: 40 | Train loss: 0.00008
Epoch: 1/40 Iteration: 60 | Train loss: 0.00004
Epoch: 1/40 Iteration: 80 | Train loss: 0.00003
Epoch: 1/40 Iteration: 100 | Train loss: 0.00003
Epoch: 1/40 Iteration: 120 | Train loss: 0.00003
Epoch: 1/40 Iteration: 140 | Train loss: 1.58993
Epoch: 1/40 Iteration: 160 | Train loss: 0.03236
Epoch: 1/40 Iteration: 180 | Train loss: 0.00912
Epoch: 1/40 Iteration: 200 | Train loss: 0.00508
Epoch: 1/40 Iteration: 220 | Train loss: 0.00282
Epoch: 1/40 Iteration: 240 | Train loss: 0.00281
Epoch: 2/40 Iteration: 260 | Train loss: 2.85194
Epoch: 2/40 Iteration: 280 | Train loss: 0.76411
Epoch: 2/40 Iteration: 300 | Train loss: 0.26356
Epoch: 2/40 Iteration: 320 | Train loss: 0.12397
Epoch: 2/40 Iteration: 340 | Train loss: 0.07835
Epoch: 2/40 Iteration: 360 | Train loss: 0.04867
Epoch: 2/40 Iteration: 380 | Train loss: 3.09539
Epoch: 2/40 Iteration: 400 | Train loss: 1.13746
Epoch: 2/40 Iteration: 4

Epoch: 14/40 Iteration: 3300 | Train loss: 0.12962
Epoch: 14/40 Iteration: 3320 | Train loss: 0.05517
Epoch: 14/40 Iteration: 3340 | Train loss: 0.12987
Epoch: 14/40 Iteration: 3360 | Train loss: 0.09286
Epoch: 14/40 Iteration: 3380 | Train loss: 0.63470
Epoch: 14/40 Iteration: 3400 | Train loss: 0.34697
Epoch: 14/40 Iteration: 3420 | Train loss: 0.18320
Epoch: 14/40 Iteration: 3440 | Train loss: 0.07953
Epoch: 14/40 Iteration: 3460 | Train loss: 0.16688
Epoch: 14/40 Iteration: 3480 | Train loss: 0.16607
Epoch: 14/40 Iteration: 3500 | Train loss: 0.07785
Epoch: 15/40 Iteration: 3520 | Train loss: 0.36752
Epoch: 15/40 Iteration: 3540 | Train loss: 0.18010
Epoch: 15/40 Iteration: 3560 | Train loss: 0.15982
Epoch: 15/40 Iteration: 3580 | Train loss: 0.13659
Epoch: 15/40 Iteration: 3600 | Train loss: 0.11236
Epoch: 15/40 Iteration: 3620 | Train loss: 0.06498
Epoch: 15/40 Iteration: 3640 | Train loss: 0.35236
Epoch: 15/40 Iteration: 3660 | Train loss: 0.15630
Epoch: 15/40 Iteration: 3680 | 

In [None]:
# テストデータの推計
preds = rnn.predict(X_test)
y_true = y_test[:len(preds)]
print('Test Acc.: %.3f' % (
      np.sum(preds == y_true) / len(y_true)))

In [None]:
# 推計の実施
proba = rnn.predict(X_test, return_proba=True)

### 文字レベルの言語モデルとしてRNNを実装

In [10]:
import numpy as np

with open('pg2265.txt', 'r', encoding='utf-8') as f: 
    text=f.read()

# 文字列の15858文字以降を取得
text = text[15858:]
# 集合型に変換する
chars = set(text)
# 文字ごとにインデックスを付与する{'t':0,}
char2int = {ch:i for i,ch in enumerate(chars)}
# インデックスごとに文字を当てはめる{0: 't',}
int2char = dict(enumerate(chars))
# char2int(インデックス)を配列にする
text_ints = np.array([char2int[ch] for ch in text], 
                     dtype=np.int32)

In [60]:
def reshape_data(sequence, batch_size, num_steps):
    # バッチサイズ×ステップの個数
    tot_batch_length = batch_size * num_steps
    # バッチの個数
    num_batches = int(len(sequence) / tot_batch_length)
    print("num_batches :: %d" % num_batches)
    
    # バッチの個数×バッチサイズ×ステップの個数
    if num_batches*tot_batch_length + 1 > len(sequence):
        num_batches = num_batches - 1
    
    # x :: バッチの個数×バッチサイズ×ステップの個数
    # y :: バッチの個数×バッチサイズ×ステップの個数を1つずらしたもの
    x = sequence[0:num_batches*tot_batch_length]
    y = sequence[1:num_batches*tot_batch_length+1]
    
    # xとyのシーケンスをバッチサイズに分割
    x_batch_splits = np.split(x, batch_size)
    y_batch_splits = np.split(y, batch_size)
    
    # それらのバッチを結合
    # [1 1 1] + [2 2 2] -> [[1,1,1],[2,2,2]]
    x = np.stack(x_batch_splits)
    y = np.stack(y_batch_splits)
    
    return x, y

In [61]:
# reshape_data関数のテスト
train_x, train_y = reshape_data(text_ints, 64, 10)
print(train_x.shape)
print(train_x[0, :10])
print(train_y[0, :10])
print(''.join(int2char[i] for i in train_x[0, :50]))

num_batches :: 254
(64, 2540)
[57  4 63 22 57  8  3 64 63 46]
[ 4 63 22 57  8  3 64 63 46 49]
The Tragedie of Hamlet

Actus Primus. Scoena Prima


In [62]:
np.random.seed(123)

def create_batch_generator(data_x, data_y, num_steps):
    batch_size, tot_batch_length = data_x.shape
    num_batches = int(tot_batch_length/num_steps)
    for b in range(num_batches):
        yield(data_x[:, b*num_steps: (b+1)*num_steps], 
             data_y[:, b*num_steps: (b+1)*num_steps])

In [63]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import os

In [64]:
def get_top_char(probas, char_size, top_n=5):
    # np.squeeze :: サイズ1の次元を削除する [[[[1][2][3]]]][[[[4][5][6]]]] -> [[1 2 3][4 5 6]]
    p = np.squeeze(probas)
    # np.argsort :: 並び変えたインデックスの位置を返却する
    p[np.argsort(p)[:-top_n]] = 0.0
    p = p / np.sum(p)
    # np.random.choice :: ランダムに1つの値をチョイスする
    ch_id = np.random.choice(char_size, 1, p=p)[0]
    return ch_id

In [65]:
class CharRNN(object):
    def __init__(self, num_classes, batch_size=64, 
                 num_steps=100, lstm_size=128, 
                 num_layers=1, learning_rate=0.001, 
                 keep_prob=0.5, grad_clip=5, 
                 sampling=False):
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.lstm_size = lstm_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.keep_prob = keep_prob
        self.grad_clip = grad_clip
        
        self.g = tf.Graph()
        with self.g.as_default():
            tf.set_random_seed(123)

            self.build(sampling=sampling)
            self.saver = tf.train.Saver()
            self.init_op = tf.global_variables_initializer()
        
    def build(self, sampling):
        if sampling == True:
            batch_size, num_steps = 1, 1 # サンプルモード
        else:
            batch_size = self.batch_size
            num_steps = self.num_steps   # トレーニングモード
    
        tf_x = tf.placeholder(tf.int32, 
                              shape=[batch_size, num_steps], 
                              name='tf_x')
        tf_y = tf.placeholder(tf.int32, 
                              shape=[batch_size, num_steps], 
                              name='tf_y')
        tf_keepprob = tf.placeholder(tf.float32, 
                              name='tf_keepprob')

        # One-hot encoding:
        # depth :: 分類するクラスの数
        x_onehot = tf.one_hot(tf_x, depth=self.num_classes)
        y_onehot = tf.one_hot(tf_y, depth=self.num_classes)
        
        # LSTMセル(長短期記憶)を定義し、積み上げる
        # tf.contrib.rnn.MultiRNNCell
        cells = tf.nn.rnn_cell.MultiRNNCell(
            [tf.nn.rnn_cell.DropoutWrapper(
               tf.nn.rnn_cell.BasicLSTMCell(self.lstm_size),
               output_keep_prob=tf_keepprob)
             for i in range(self.num_layers)])
        
        # セルの初期値を設定
        self.initial_state = cells.zero_state(
            batch_size, tf.float32)
        
        # LSTMのアウトプットと最終状態を取得
        lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
            cells, x_onehot, 
            initial_state=self.initial_state)
        
        print('  << lstm_outputs  >>', lstm_outputs)
        
        # 2次元のテンソルに変換
        # batch_size, num_steps, lstm_size -> batch_size*num_steps, lstm_size
        seq_output_reshaped = tf.reshape(
            lstm_outputs, 
            shape=[-1, self.lstm_size],
            name='seq_output_reshaped')
        
        # 全結合層に渡して総入力を取得
        logits = tf.layers.dense(
            inputs=seq_output_reshaped, 
            units=self.num_classes,
            activation=None,
            name='logits')
        
        # ソフトマックス関数で次の文字バッチの確率を計算
        proba = tf.nn.softmax(
            logits, 
            name='probabilities')
        print(proba)
        
        # yを2次元のテンソルに変換
        y_reshaped = tf.reshape(
            y_onehot, 
            shape=[-1, self.num_classes],
            name='y_reshaped')
        
        # コスト関数を定義する
        # tf.reduce_mean :: 与えたリストに入っている数値の平均値を求める関数
        cost = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, 
                        labels=y_reshaped),
                    name='cost')
        
        # 勾配発散問題を回避するための勾配刈り込み
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
                    tf.gradients(cost, tvars), 
                    self.grad_clip)
        
        # オプティマイザを定義
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        train_op = optimizer.apply_gradients(
                    zip(grads, tvars),
                    name='train_op')
        
    def train(self, train_x, train_y, 
          num_epochs, ckpt_dir='./model/'):

        # チェックポイントディレクトリが存在しない場合は作成
        if not os.path.exists(ckpt_dir):
            os.mkdir(ckpt_dir)
        
        with tf.Session(graph=self.g) as sess:
            sess.run(self.init_op)

            n_batches = int(train_x.shape[1]/self.num_steps)
            iterations = n_batches * num_epochs
            
            for epoch in range(num_epochs):

                # ネットワークをトレーニング
                new_state = sess.run(self.initial_state)
                loss = 0
                
                # ミニバッチジェネレーター
                bgen = create_batch_generator(
                        train_x, train_y, self.num_steps)
                for b, (batch_x, batch_y) in enumerate(bgen, 1):
                    iteration = epoch*n_batches + b
                    
                    feed = {'tf_x:0': batch_x,
                            'tf_y:0': batch_y,
                            'tf_keepprob:0': self.keep_prob,
                            self.initial_state : new_state}
                    batch_cost, _, new_state = sess.run(
                            ['cost:0', 'train_op', 
                                self.final_state],
                            feed_dict=feed)
                    if iteration % 10 == 0:
                        print('Epoch %d/%d Iteration %d'
                              '| Training loss: %.4f' % (
                              epoch + 1, num_epochs, 
                              iteration, batch_cost))

                # トレーニング済みのモデルを保存
                self.saver.save(
                        sess, os.path.join(
                            ckpt_dir, 'language_modeling.ckpt'))
                
    def sample(self, output_length, 
               ckpt_dir, starter_seq="The "):
        observed_seq = [ch for ch in starter_seq]        
        with tf.Session(graph=self.g) as sess:
            self.saver.restore(
                sess, 
                tf.train.latest_checkpoint(ckpt_dir))
            
            # 1: starter_seqを使ってモデルを実行
            new_state = sess.run(self.initial_state)
            for ch in starter_seq:
                x = np.zeros((1, 1))
                x[0,0] = char2int[ch]
                feed = {'tf_x:0': x,
                        'tf_keepprob:0': 1.0,
                        self.initial_state: new_state}
                proba, new_state = sess.run(
                        ['probabilities:0', self.final_state], 
                        feed_dict=feed)

            ch_id = get_top_char(proba, len(chars))
            observed_seq.append(int2char[ch_id])
            
            # 2: 更新されたobserved_seqを使ってモデルを実行
            for i in range(output_length):
                x[0,0] = ch_id
                feed = {'tf_x:0': x,
                        'tf_keepprob:0': 1.0,
                        self.initial_state: new_state}
                proba, new_state = sess.run(
                        ['probabilities:0', self.final_state], 
                        feed_dict=feed)

                ch_id = get_top_char(proba, len(chars))
                observed_seq.append(int2char[ch_id])

        return ''.join(observed_seq)

In [66]:
batch_size = 64
num_steps = 100 
train_x, train_y = reshape_data(text_ints, 
                                batch_size, 
                                num_steps)

rnn = CharRNN(num_classes=len(chars), batch_size=batch_size)
rnn.train(train_x, train_y, 
          num_epochs=100,
          ckpt_dir='./model-100/')

num_batches :: 25
  << lstm_outputs  >> Tensor("rnn/transpose_1:0", shape=(64, 100, 128), dtype=float32)
Tensor("probabilities:0", shape=(6400, 65), dtype=float32)
Epoch 1/100 Iteration 10| Training loss: 3.6289
Epoch 1/100 Iteration 20| Training loss: 3.3708
Epoch 2/100 Iteration 30| Training loss: 3.2857
Epoch 2/100 Iteration 40| Training loss: 3.2490
Epoch 2/100 Iteration 50| Training loss: 3.2240
Epoch 3/100 Iteration 60| Training loss: 3.2153
Epoch 3/100 Iteration 70| Training loss: 3.1800
Epoch 4/100 Iteration 80| Training loss: 3.1639
Epoch 4/100 Iteration 90| Training loss: 3.1429
Epoch 4/100 Iteration 100| Training loss: 3.1331
Epoch 5/100 Iteration 110| Training loss: 3.1153
Epoch 5/100 Iteration 120| Training loss: 3.0724
Epoch 6/100 Iteration 130| Training loss: 3.0306
Epoch 6/100 Iteration 140| Training loss: 2.9955
Epoch 6/100 Iteration 150| Training loss: 2.9627
Epoch 7/100 Iteration 160| Training loss: 2.9450
Epoch 7/100 Iteration 170| Training loss: 2.8844
Epoch 8/100 

Epoch 65/100 Iteration 1610| Training loss: 1.9990
Epoch 65/100 Iteration 1620| Training loss: 1.9786
Epoch 66/100 Iteration 1630| Training loss: 1.9748
Epoch 66/100 Iteration 1640| Training loss: 2.0000
Epoch 66/100 Iteration 1650| Training loss: 1.9396
Epoch 67/100 Iteration 1660| Training loss: 1.9993
Epoch 67/100 Iteration 1670| Training loss: 1.9641
Epoch 68/100 Iteration 1680| Training loss: 1.9769
Epoch 68/100 Iteration 1690| Training loss: 2.0020
Epoch 68/100 Iteration 1700| Training loss: 1.9398
Epoch 69/100 Iteration 1710| Training loss: 1.9912
Epoch 69/100 Iteration 1720| Training loss: 1.9642
Epoch 70/100 Iteration 1730| Training loss: 1.9720
Epoch 70/100 Iteration 1740| Training loss: 1.9885
Epoch 70/100 Iteration 1750| Training loss: 1.9215
Epoch 71/100 Iteration 1760| Training loss: 1.9840
Epoch 71/100 Iteration 1770| Training loss: 1.9495
Epoch 72/100 Iteration 1780| Training loss: 1.9705
Epoch 72/100 Iteration 1790| Training loss: 1.9832
Epoch 72/100 Iteration 1800| Tr