In [11]:
import tensorflow as tf
import os
from six.moves import cPickle
import collections
import numpy as np
import codecs
import urllib

### 读取和预处理数据

In [12]:
DATA_DIR = "./data/sherlock/"
DOWNLOAD_URL = "https://sherlock-holm.es/stories/plain-text/cnus.txt"

def make_dir(path):
    try:
        os.mkdir(path)
    except OSError:
        pass

make_dir("./data")
make_dir(DATA_DIR)

def download(file_path):
    if os.path.exists(file_path):
        print("Dataset ready")
        return file_path
    file_name, _ = urllib.request.urlretrieve(DOWNLOAD_URL, file_path)
    return file_path

file_path = download(os.path.join(DATA_DIR, "input.txt"))



input_file = os.path.join(DATA_DIR, "input.txt")
vocab_file = os.path.join(DATA_DIR, "vocab.pkl")
tensor_file = os.path.join(DATA_DIR, "data.npy")
    

with codecs.open(input_file, "r", encoding="utf-8") as f:
    data = f.read()
counter = collections.Counter(data)
counter_pairs = sorted(counter.items(), key=lambda x: -x[1])
chars, _ = zip(*counter_pairs)
vocab_size = len(chars)
vocab = dict(zip(chars, range(len(chars))))
tensor = np.array(list(map(vocab.get, data)))
    

Dataset ready


### 把数据处理成batch

In [7]:
batch_size = 50
seq_length = 50
num_batches = int(tensor.size / (batch_size * seq_length))
if num_batches == 0:
    assert False, "Not enough data. Make seq_length and batch_size small."
tensor = tensor[:num_batches * batch_size * seq_length]
xdata = tensor
ydata = np.copy(tensor)
ydata[:-1] = xdata[1:]
ydata[-1] = xdata[0]
x_batches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)
y_batches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)
pointer = 0

### 定义RNN模型

In [8]:
from tensorflow.contrib import rnn
from tensorflow.contrib import legacy_seq2seq


class Model():
    def __init__(self, training=True):
        self.batch_size = 50
        self.seq_length = 50
        if not training:
            self.batch_size = 1
            self.seq_length = 1
            
        self.rnn_size = 128
        self.num_layers = 2
        self.input_keep_prob = 1.0
        self.output_keep_prob = 1.0
        self.grad_clip = 5.0
        self.training = 1

        with tf.device("/gpu:0"):
            cell_fn = rnn.BasicRNNCell

            cells = []
            for _ in range(self.num_layers):
                cell = cell_fn(self.rnn_size)
                # dropout
                if training and (self.input_keep_prob < 1.0 or self.output_keep_prob < 1.0):
                    cell = rnn.DropoutWrapper(cell, input_keep_prob=self.input_keep_prob,
                                             output_keep_prob=self.output_keep_prob)
                cells.append(cell)

            self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

            # placeholder for input and output
            self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
            self.output_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
            self.initial_state = cell.zero_state(self.batch_size, tf.float32)
            with tf.variable_scope("rnnlm"):
                softmax_w = tf.get_variable("softmax_w", [self.rnn_size, vocab_size])
                softmax_b = tf.get_variable("softmax_b", [vocab_size])
            embedding = tf.get_variable("embedding", [vocab_size, self.rnn_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            if training and self.output_keep_prob:
                inputs = tf.nn.dropout(inputs, self.output_keep_prob)

            inputs = tf.split(inputs, self.seq_length, 1) # returns self.seq_length Tensor objects
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # squeeze the first dimension, got [B * embed_size] * self.seq_length Tensor objects

            def loop(prev, _):
                prev = tf.matmul(prev, softmax_w) + softmax_b
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(embedding, prev_symbol)

            # run an rnn on the sequence
            outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, 
                                                             loop_function=loop if not training else None, scope="rnnlm")
            output = tf.reshape(tf.concat(outputs, 1), [-1, self.rnn_size])
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            self.probs = tf.nn.softmax(self.logits)

            # loss over the whole sequence
            loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                                                          [tf.reshape(self.output_data, [-1])],
                                                          [tf.ones([self.batch_size * self.seq_length])])
            with tf.name_scope("cost"):
                self.cost = tf.reduce_sum(loss) / batch_size / seq_length
            self.final_state = last_state
            self.lr = tf.Variable(0.0, trainable=False)
            # gradient clipping on trainable variables
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip)

            # optimizer and train
            with tf.name_scope("optimizer"):
                optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
    
    def sample(self, sess, chars, vocab, num=200, prime="The "):
        state = sess.run(self.cell.zero_state(1, tf.float32))
        for char in prime[:-1]:
            x = np.zeros((1,1))
            x[0, 0] = vocab[char]
            feed = {self.input_data: x, self.initial_state: state}
            [state] = sess.run([self.final_state], feed)

        def weighted_pick(weights):
            t = np.cumsum(weights)
            s = np.sum(weights)
            return (int(np.searchsorted(t, np.random.rand(1)*s)))

        ret = prime
        char = prime[-1]
        for n in range(num):
            x = np.zeros((1,1))
            x[0, 0] = vocab[char]
            feed = {self.input_data: x, self.initial_state: state}
            [p, state] = sess.run([self.probs, self.final_state], feed)
            p = p[0]
            sample = weighted_pick(p)

            pred = chars[sample]
            ret += pred
            char = pred
        return ret



### 开始训练RNN

In [9]:
import time
num_epochs = 10
learning_rate = 0.002
decay_rate = 0.97
save_dir = "./save/"
save_every = 500


tf.reset_default_graph()
model = Model() 


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())   
    saver = tf.train.Saver(tf.global_variables())

    for e in range(num_epochs):
        sess.run(tf.assign(model.lr, learning_rate*(decay_rate ** e)))
        # reset the pointer to load from the beginning
        pointer = 0
        state = sess.run(model.initial_state)
        for b in range(num_batches):
            start = time.time()
            x, y = x_batches[pointer], y_batches[pointer]
            pointer += 1
            feed = {model.input_data: x, model.output_data: y}
            feed[model.initial_state] = state
                
            train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
            end = time.time()
            print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch={:.3f}".format(
                e * num_batches + b, num_epochs * num_batches, e, train_loss, end-start))
            
            if ( e * num_batches + b ) % save_every == 0 or ( 
                e == num_epochs - 1 and b == num_batches - 1):
                checkpoint_path = os.path.join(save_dir, "model.ckpt")
                saver.save(sess, checkpoint_path, global_step=e*num_batches + b)
                print("model saved to {}".format(checkpoint_path))
                
        
        

0/13520 (epoch 0), train_loss = 4.639, time/batch=0.631
model saved to ./save/model.ckpt
1/13520 (epoch 0), train_loss = 4.493, time/batch=0.028
2/13520 (epoch 0), train_loss = 4.212, time/batch=0.025
3/13520 (epoch 0), train_loss = 3.801, time/batch=0.025
4/13520 (epoch 0), train_loss = 3.489, time/batch=0.026
5/13520 (epoch 0), train_loss = 3.394, time/batch=0.026
6/13520 (epoch 0), train_loss = 3.252, time/batch=0.025
7/13520 (epoch 0), train_loss = 3.190, time/batch=0.025
8/13520 (epoch 0), train_loss = 3.124, time/batch=0.026
9/13520 (epoch 0), train_loss = 3.050, time/batch=0.026
10/13520 (epoch 0), train_loss = 2.996, time/batch=0.026
11/13520 (epoch 0), train_loss = 3.003, time/batch=0.026
12/13520 (epoch 0), train_loss = 3.018, time/batch=0.024
13/13520 (epoch 0), train_loss = 3.008, time/batch=0.024
14/13520 (epoch 0), train_loss = 3.013, time/batch=0.023
15/13520 (epoch 0), train_loss = 2.963, time/batch=0.024
16/13520 (epoch 0), train_loss = 3.003, time/batch=0.024
17/13520

144/13520 (epoch 0), train_loss = 2.028, time/batch=0.017
145/13520 (epoch 0), train_loss = 2.080, time/batch=0.018
146/13520 (epoch 0), train_loss = 2.019, time/batch=0.018
147/13520 (epoch 0), train_loss = 2.040, time/batch=0.017
148/13520 (epoch 0), train_loss = 2.070, time/batch=0.017
149/13520 (epoch 0), train_loss = 1.960, time/batch=0.017
150/13520 (epoch 0), train_loss = 2.038, time/batch=0.018
151/13520 (epoch 0), train_loss = 2.001, time/batch=0.018
152/13520 (epoch 0), train_loss = 1.985, time/batch=0.018
153/13520 (epoch 0), train_loss = 2.050, time/batch=0.018
154/13520 (epoch 0), train_loss = 1.955, time/batch=0.018
155/13520 (epoch 0), train_loss = 2.040, time/batch=0.018
156/13520 (epoch 0), train_loss = 2.055, time/batch=0.018
157/13520 (epoch 0), train_loss = 2.037, time/batch=0.022
158/13520 (epoch 0), train_loss = 1.961, time/batch=0.020
159/13520 (epoch 0), train_loss = 2.022, time/batch=0.017
160/13520 (epoch 0), train_loss = 1.992, time/batch=0.018
161/13520 (epo

297/13520 (epoch 0), train_loss = 1.795, time/batch=0.020
298/13520 (epoch 0), train_loss = 1.783, time/batch=0.019
299/13520 (epoch 0), train_loss = 1.851, time/batch=0.017
300/13520 (epoch 0), train_loss = 1.804, time/batch=0.018
301/13520 (epoch 0), train_loss = 1.837, time/batch=0.018
302/13520 (epoch 0), train_loss = 1.813, time/batch=0.018
303/13520 (epoch 0), train_loss = 1.791, time/batch=0.019
304/13520 (epoch 0), train_loss = 1.830, time/batch=0.018
305/13520 (epoch 0), train_loss = 1.834, time/batch=0.018
306/13520 (epoch 0), train_loss = 1.785, time/batch=0.018
307/13520 (epoch 0), train_loss = 1.784, time/batch=0.017
308/13520 (epoch 0), train_loss = 1.838, time/batch=0.018
309/13520 (epoch 0), train_loss = 1.815, time/batch=0.017
310/13520 (epoch 0), train_loss = 1.782, time/batch=0.017
311/13520 (epoch 0), train_loss = 1.807, time/batch=0.018
312/13520 (epoch 0), train_loss = 1.747, time/batch=0.019
313/13520 (epoch 0), train_loss = 1.785, time/batch=0.019
314/13520 (epo

448/13520 (epoch 0), train_loss = 1.656, time/batch=0.018
449/13520 (epoch 0), train_loss = 1.672, time/batch=0.017
450/13520 (epoch 0), train_loss = 1.643, time/batch=0.018
451/13520 (epoch 0), train_loss = 1.803, time/batch=0.018
452/13520 (epoch 0), train_loss = 1.735, time/batch=0.017
453/13520 (epoch 0), train_loss = 1.644, time/batch=0.018
454/13520 (epoch 0), train_loss = 1.639, time/batch=0.017
455/13520 (epoch 0), train_loss = 1.688, time/batch=0.019
456/13520 (epoch 0), train_loss = 1.711, time/batch=0.019
457/13520 (epoch 0), train_loss = 1.715, time/batch=0.018
458/13520 (epoch 0), train_loss = 1.728, time/batch=0.018
459/13520 (epoch 0), train_loss = 1.722, time/batch=0.018
460/13520 (epoch 0), train_loss = 1.702, time/batch=0.018
461/13520 (epoch 0), train_loss = 1.627, time/batch=0.018
462/13520 (epoch 0), train_loss = 1.703, time/batch=0.019
463/13520 (epoch 0), train_loss = 1.713, time/batch=0.018
464/13520 (epoch 0), train_loss = 1.653, time/batch=0.018
465/13520 (epo

596/13520 (epoch 0), train_loss = 1.596, time/batch=0.018
597/13520 (epoch 0), train_loss = 1.584, time/batch=0.018
598/13520 (epoch 0), train_loss = 1.578, time/batch=0.018
599/13520 (epoch 0), train_loss = 1.585, time/batch=0.018
600/13520 (epoch 0), train_loss = 1.643, time/batch=0.018
601/13520 (epoch 0), train_loss = 1.604, time/batch=0.018
602/13520 (epoch 0), train_loss = 1.596, time/batch=0.018
603/13520 (epoch 0), train_loss = 1.623, time/batch=0.018
604/13520 (epoch 0), train_loss = 1.652, time/batch=0.018
605/13520 (epoch 0), train_loss = 1.575, time/batch=0.018
606/13520 (epoch 0), train_loss = 1.679, time/batch=0.018
607/13520 (epoch 0), train_loss = 1.580, time/batch=0.018
608/13520 (epoch 0), train_loss = 1.640, time/batch=0.018
609/13520 (epoch 0), train_loss = 1.611, time/batch=0.018
610/13520 (epoch 0), train_loss = 1.572, time/batch=0.018
611/13520 (epoch 0), train_loss = 1.674, time/batch=0.018
612/13520 (epoch 0), train_loss = 1.630, time/batch=0.019
613/13520 (epo

749/13520 (epoch 0), train_loss = 1.553, time/batch=0.018
750/13520 (epoch 0), train_loss = 1.548, time/batch=0.018
751/13520 (epoch 0), train_loss = 1.513, time/batch=0.018
752/13520 (epoch 0), train_loss = 1.539, time/batch=0.018
753/13520 (epoch 0), train_loss = 1.590, time/batch=0.018
754/13520 (epoch 0), train_loss = 1.476, time/batch=0.024
755/13520 (epoch 0), train_loss = 1.529, time/batch=0.029
756/13520 (epoch 0), train_loss = 1.616, time/batch=0.046
757/13520 (epoch 0), train_loss = 1.589, time/batch=0.019
758/13520 (epoch 0), train_loss = 1.524, time/batch=0.017
759/13520 (epoch 0), train_loss = 1.501, time/batch=0.018
760/13520 (epoch 0), train_loss = 1.602, time/batch=0.017
761/13520 (epoch 0), train_loss = 1.536, time/batch=0.018
762/13520 (epoch 0), train_loss = 1.502, time/batch=0.018
763/13520 (epoch 0), train_loss = 1.578, time/batch=0.018
764/13520 (epoch 0), train_loss = 1.653, time/batch=0.017
765/13520 (epoch 0), train_loss = 1.508, time/batch=0.017
766/13520 (epo

901/13520 (epoch 0), train_loss = 1.582, time/batch=0.017
902/13520 (epoch 0), train_loss = 1.585, time/batch=0.018
903/13520 (epoch 0), train_loss = 1.524, time/batch=0.019
904/13520 (epoch 0), train_loss = 1.533, time/batch=0.019
905/13520 (epoch 0), train_loss = 1.574, time/batch=0.019
906/13520 (epoch 0), train_loss = 1.519, time/batch=0.019
907/13520 (epoch 0), train_loss = 1.514, time/batch=0.017
908/13520 (epoch 0), train_loss = 1.565, time/batch=0.018
909/13520 (epoch 0), train_loss = 1.521, time/batch=0.018
910/13520 (epoch 0), train_loss = 1.532, time/batch=0.018
911/13520 (epoch 0), train_loss = 1.576, time/batch=0.018
912/13520 (epoch 0), train_loss = 1.510, time/batch=0.018
913/13520 (epoch 0), train_loss = 1.534, time/batch=0.018
914/13520 (epoch 0), train_loss = 1.545, time/batch=0.018
915/13520 (epoch 0), train_loss = 1.478, time/batch=0.021
916/13520 (epoch 0), train_loss = 1.511, time/batch=0.026
917/13520 (epoch 0), train_loss = 1.501, time/batch=0.018
918/13520 (epo

1047/13520 (epoch 0), train_loss = 1.544, time/batch=0.018
1048/13520 (epoch 0), train_loss = 1.533, time/batch=0.018
1049/13520 (epoch 0), train_loss = 1.510, time/batch=0.017
1050/13520 (epoch 0), train_loss = 1.487, time/batch=0.018
1051/13520 (epoch 0), train_loss = 1.439, time/batch=0.017
1052/13520 (epoch 0), train_loss = 1.494, time/batch=0.018
1053/13520 (epoch 0), train_loss = 1.451, time/batch=0.017
1054/13520 (epoch 0), train_loss = 1.526, time/batch=0.018
1055/13520 (epoch 0), train_loss = 1.508, time/batch=0.018
1056/13520 (epoch 0), train_loss = 1.429, time/batch=0.018
1057/13520 (epoch 0), train_loss = 1.503, time/batch=0.017
1058/13520 (epoch 0), train_loss = 1.573, time/batch=0.017
1059/13520 (epoch 0), train_loss = 1.468, time/batch=0.018
1060/13520 (epoch 0), train_loss = 1.496, time/batch=0.018
1061/13520 (epoch 0), train_loss = 1.504, time/batch=0.018
1062/13520 (epoch 0), train_loss = 1.485, time/batch=0.021
1063/13520 (epoch 0), train_loss = 1.419, time/batch=0.0

1190/13520 (epoch 0), train_loss = 1.450, time/batch=0.017
1191/13520 (epoch 0), train_loss = 1.462, time/batch=0.017
1192/13520 (epoch 0), train_loss = 1.429, time/batch=0.017
1193/13520 (epoch 0), train_loss = 1.463, time/batch=0.017
1194/13520 (epoch 0), train_loss = 1.415, time/batch=0.017
1195/13520 (epoch 0), train_loss = 1.416, time/batch=0.017
1196/13520 (epoch 0), train_loss = 1.490, time/batch=0.017
1197/13520 (epoch 0), train_loss = 1.493, time/batch=0.018
1198/13520 (epoch 0), train_loss = 1.478, time/batch=0.017
1199/13520 (epoch 0), train_loss = 1.450, time/batch=0.018
1200/13520 (epoch 0), train_loss = 1.454, time/batch=0.018
1201/13520 (epoch 0), train_loss = 1.424, time/batch=0.018
1202/13520 (epoch 0), train_loss = 1.447, time/batch=0.018
1203/13520 (epoch 0), train_loss = 1.451, time/batch=0.018
1204/13520 (epoch 0), train_loss = 1.536, time/batch=0.018
1205/13520 (epoch 0), train_loss = 1.444, time/batch=0.017
1206/13520 (epoch 0), train_loss = 1.401, time/batch=0.0

1334/13520 (epoch 0), train_loss = 1.420, time/batch=0.018
1335/13520 (epoch 0), train_loss = 1.476, time/batch=0.018
1336/13520 (epoch 0), train_loss = 1.471, time/batch=0.018
1337/13520 (epoch 0), train_loss = 1.448, time/batch=0.018
1338/13520 (epoch 0), train_loss = 1.499, time/batch=0.018
1339/13520 (epoch 0), train_loss = 1.442, time/batch=0.018
1340/13520 (epoch 0), train_loss = 1.418, time/batch=0.018
1341/13520 (epoch 0), train_loss = 1.449, time/batch=0.018
1342/13520 (epoch 0), train_loss = 1.491, time/batch=0.018
1343/13520 (epoch 0), train_loss = 1.447, time/batch=0.018
1344/13520 (epoch 0), train_loss = 1.428, time/batch=0.018
1345/13520 (epoch 0), train_loss = 1.479, time/batch=0.018
1346/13520 (epoch 0), train_loss = 1.426, time/batch=0.018
1347/13520 (epoch 0), train_loss = 1.478, time/batch=0.017
1348/13520 (epoch 0), train_loss = 1.515, time/batch=0.018
1349/13520 (epoch 0), train_loss = 1.533, time/batch=0.017
1350/13520 (epoch 0), train_loss = 1.443, time/batch=0.0

1475/13520 (epoch 1), train_loss = 1.393, time/batch=0.019
1476/13520 (epoch 1), train_loss = 1.420, time/batch=0.018
1477/13520 (epoch 1), train_loss = 1.354, time/batch=0.018
1478/13520 (epoch 1), train_loss = 1.455, time/batch=0.018
1479/13520 (epoch 1), train_loss = 1.470, time/batch=0.018
1480/13520 (epoch 1), train_loss = 1.407, time/batch=0.017
1481/13520 (epoch 1), train_loss = 1.428, time/batch=0.018
1482/13520 (epoch 1), train_loss = 1.455, time/batch=0.017
1483/13520 (epoch 1), train_loss = 1.418, time/batch=0.018
1484/13520 (epoch 1), train_loss = 1.483, time/batch=0.017
1485/13520 (epoch 1), train_loss = 1.429, time/batch=0.018
1486/13520 (epoch 1), train_loss = 1.466, time/batch=0.017
1487/13520 (epoch 1), train_loss = 1.458, time/batch=0.017
1488/13520 (epoch 1), train_loss = 1.457, time/batch=0.018
1489/13520 (epoch 1), train_loss = 1.434, time/batch=0.018
1490/13520 (epoch 1), train_loss = 1.445, time/batch=0.018
1491/13520 (epoch 1), train_loss = 1.417, time/batch=0.0

1618/13520 (epoch 1), train_loss = 1.386, time/batch=0.017
1619/13520 (epoch 1), train_loss = 1.420, time/batch=0.018
1620/13520 (epoch 1), train_loss = 1.371, time/batch=0.018
1621/13520 (epoch 1), train_loss = 1.372, time/batch=0.018
1622/13520 (epoch 1), train_loss = 1.461, time/batch=0.018
1623/13520 (epoch 1), train_loss = 1.396, time/batch=0.018
1624/13520 (epoch 1), train_loss = 1.438, time/batch=0.018
1625/13520 (epoch 1), train_loss = 1.459, time/batch=0.018
1626/13520 (epoch 1), train_loss = 1.409, time/batch=0.018
1627/13520 (epoch 1), train_loss = 1.418, time/batch=0.018
1628/13520 (epoch 1), train_loss = 1.449, time/batch=0.018
1629/13520 (epoch 1), train_loss = 1.358, time/batch=0.017
1630/13520 (epoch 1), train_loss = 1.398, time/batch=0.026
1631/13520 (epoch 1), train_loss = 1.439, time/batch=0.018
1632/13520 (epoch 1), train_loss = 1.473, time/batch=0.017
1633/13520 (epoch 1), train_loss = 1.395, time/batch=0.017
1634/13520 (epoch 1), train_loss = 1.346, time/batch=0.0

1762/13520 (epoch 1), train_loss = 1.436, time/batch=0.018
1763/13520 (epoch 1), train_loss = 1.391, time/batch=0.018
1764/13520 (epoch 1), train_loss = 1.408, time/batch=0.017
1765/13520 (epoch 1), train_loss = 1.419, time/batch=0.017
1766/13520 (epoch 1), train_loss = 1.416, time/batch=0.018
1767/13520 (epoch 1), train_loss = 1.479, time/batch=0.018
1768/13520 (epoch 1), train_loss = 1.427, time/batch=0.018
1769/13520 (epoch 1), train_loss = 1.451, time/batch=0.018
1770/13520 (epoch 1), train_loss = 1.457, time/batch=0.017
1771/13520 (epoch 1), train_loss = 1.437, time/batch=0.017
1772/13520 (epoch 1), train_loss = 1.354, time/batch=0.018
1773/13520 (epoch 1), train_loss = 1.378, time/batch=0.018
1774/13520 (epoch 1), train_loss = 1.392, time/batch=0.017
1775/13520 (epoch 1), train_loss = 1.387, time/batch=0.018
1776/13520 (epoch 1), train_loss = 1.412, time/batch=0.017
1777/13520 (epoch 1), train_loss = 1.385, time/batch=0.017
1778/13520 (epoch 1), train_loss = 1.434, time/batch=0.0

1906/13520 (epoch 1), train_loss = 1.437, time/batch=0.018
1907/13520 (epoch 1), train_loss = 1.381, time/batch=0.018
1908/13520 (epoch 1), train_loss = 1.424, time/batch=0.018
1909/13520 (epoch 1), train_loss = 1.407, time/batch=0.017
1910/13520 (epoch 1), train_loss = 1.344, time/batch=0.017
1911/13520 (epoch 1), train_loss = 1.344, time/batch=0.017
1912/13520 (epoch 1), train_loss = 1.361, time/batch=0.018
1913/13520 (epoch 1), train_loss = 1.378, time/batch=0.017
1914/13520 (epoch 1), train_loss = 1.358, time/batch=0.018
1915/13520 (epoch 1), train_loss = 1.397, time/batch=0.017
1916/13520 (epoch 1), train_loss = 1.364, time/batch=0.018
1917/13520 (epoch 1), train_loss = 1.398, time/batch=0.017
1918/13520 (epoch 1), train_loss = 1.389, time/batch=0.020
1919/13520 (epoch 1), train_loss = 1.438, time/batch=0.019
1920/13520 (epoch 1), train_loss = 1.423, time/batch=0.018
1921/13520 (epoch 1), train_loss = 1.358, time/batch=0.018
1922/13520 (epoch 1), train_loss = 1.374, time/batch=0.0

2050/13520 (epoch 1), train_loss = 1.396, time/batch=0.018
2051/13520 (epoch 1), train_loss = 1.345, time/batch=0.018
2052/13520 (epoch 1), train_loss = 1.398, time/batch=0.018
2053/13520 (epoch 1), train_loss = 1.420, time/batch=0.018
2054/13520 (epoch 1), train_loss = 1.404, time/batch=0.018
2055/13520 (epoch 1), train_loss = 1.350, time/batch=0.018
2056/13520 (epoch 1), train_loss = 1.361, time/batch=0.018
2057/13520 (epoch 1), train_loss = 1.377, time/batch=0.018
2058/13520 (epoch 1), train_loss = 1.372, time/batch=0.018
2059/13520 (epoch 1), train_loss = 1.402, time/batch=0.017
2060/13520 (epoch 1), train_loss = 1.408, time/batch=0.017
2061/13520 (epoch 1), train_loss = 1.350, time/batch=0.018
2062/13520 (epoch 1), train_loss = 1.378, time/batch=0.017
2063/13520 (epoch 1), train_loss = 1.311, time/batch=0.018
2064/13520 (epoch 1), train_loss = 1.376, time/batch=0.017
2065/13520 (epoch 1), train_loss = 1.428, time/batch=0.018
2066/13520 (epoch 1), train_loss = 1.372, time/batch=0.0

2193/13520 (epoch 1), train_loss = 1.395, time/batch=0.017
2194/13520 (epoch 1), train_loss = 1.371, time/batch=0.018
2195/13520 (epoch 1), train_loss = 1.388, time/batch=0.018
2196/13520 (epoch 1), train_loss = 1.379, time/batch=0.017
2197/13520 (epoch 1), train_loss = 1.361, time/batch=0.018
2198/13520 (epoch 1), train_loss = 1.374, time/batch=0.018
2199/13520 (epoch 1), train_loss = 1.357, time/batch=0.018
2200/13520 (epoch 1), train_loss = 1.410, time/batch=0.018
2201/13520 (epoch 1), train_loss = 1.368, time/batch=0.017
2202/13520 (epoch 1), train_loss = 1.358, time/batch=0.018
2203/13520 (epoch 1), train_loss = 1.442, time/batch=0.018
2204/13520 (epoch 1), train_loss = 1.439, time/batch=0.018
2205/13520 (epoch 1), train_loss = 1.409, time/batch=0.018
2206/13520 (epoch 1), train_loss = 1.352, time/batch=0.018
2207/13520 (epoch 1), train_loss = 1.423, time/batch=0.017
2208/13520 (epoch 1), train_loss = 1.407, time/batch=0.017
2209/13520 (epoch 1), train_loss = 1.373, time/batch=0.0

2336/13520 (epoch 1), train_loss = 1.354, time/batch=0.019
2337/13520 (epoch 1), train_loss = 1.452, time/batch=0.018
2338/13520 (epoch 1), train_loss = 1.371, time/batch=0.018
2339/13520 (epoch 1), train_loss = 1.445, time/batch=0.018
2340/13520 (epoch 1), train_loss = 1.356, time/batch=0.018
2341/13520 (epoch 1), train_loss = 1.349, time/batch=0.017
2342/13520 (epoch 1), train_loss = 1.384, time/batch=0.017
2343/13520 (epoch 1), train_loss = 1.334, time/batch=0.018
2344/13520 (epoch 1), train_loss = 1.434, time/batch=0.018
2345/13520 (epoch 1), train_loss = 1.362, time/batch=0.018
2346/13520 (epoch 1), train_loss = 1.369, time/batch=0.018
2347/13520 (epoch 1), train_loss = 1.360, time/batch=0.018
2348/13520 (epoch 1), train_loss = 1.347, time/batch=0.018
2349/13520 (epoch 1), train_loss = 1.385, time/batch=0.018
2350/13520 (epoch 1), train_loss = 1.298, time/batch=0.018
2351/13520 (epoch 1), train_loss = 1.318, time/batch=0.017
2352/13520 (epoch 1), train_loss = 1.352, time/batch=0.0

2480/13520 (epoch 1), train_loss = 1.388, time/batch=0.018
2481/13520 (epoch 1), train_loss = 1.343, time/batch=0.017
2482/13520 (epoch 1), train_loss = 1.387, time/batch=0.017
2483/13520 (epoch 1), train_loss = 1.374, time/batch=0.018
2484/13520 (epoch 1), train_loss = 1.344, time/batch=0.018
2485/13520 (epoch 1), train_loss = 1.388, time/batch=0.018
2486/13520 (epoch 1), train_loss = 1.419, time/batch=0.018
2487/13520 (epoch 1), train_loss = 1.421, time/batch=0.018
2488/13520 (epoch 1), train_loss = 1.391, time/batch=0.017
2489/13520 (epoch 1), train_loss = 1.360, time/batch=0.018
2490/13520 (epoch 1), train_loss = 1.377, time/batch=0.018
2491/13520 (epoch 1), train_loss = 1.407, time/batch=0.018
2492/13520 (epoch 1), train_loss = 1.363, time/batch=0.018
2493/13520 (epoch 1), train_loss = 1.385, time/batch=0.018
2494/13520 (epoch 1), train_loss = 1.422, time/batch=0.019
2495/13520 (epoch 1), train_loss = 1.394, time/batch=0.018
2496/13520 (epoch 1), train_loss = 1.373, time/batch=0.0

2620/13520 (epoch 1), train_loss = 1.297, time/batch=0.017
2621/13520 (epoch 1), train_loss = 1.347, time/batch=0.018
2622/13520 (epoch 1), train_loss = 1.352, time/batch=0.018
2623/13520 (epoch 1), train_loss = 1.309, time/batch=0.017
2624/13520 (epoch 1), train_loss = 1.365, time/batch=0.018
2625/13520 (epoch 1), train_loss = 1.354, time/batch=0.018
2626/13520 (epoch 1), train_loss = 1.385, time/batch=0.018
2627/13520 (epoch 1), train_loss = 1.387, time/batch=0.018
2628/13520 (epoch 1), train_loss = 1.371, time/batch=0.017
2629/13520 (epoch 1), train_loss = 1.351, time/batch=0.018
2630/13520 (epoch 1), train_loss = 1.306, time/batch=0.018
2631/13520 (epoch 1), train_loss = 1.293, time/batch=0.018
2632/13520 (epoch 1), train_loss = 1.331, time/batch=0.018
2633/13520 (epoch 1), train_loss = 1.390, time/batch=0.018
2634/13520 (epoch 1), train_loss = 1.322, time/batch=0.018
2635/13520 (epoch 1), train_loss = 1.381, time/batch=0.018
2636/13520 (epoch 1), train_loss = 1.380, time/batch=0.0

2764/13520 (epoch 2), train_loss = 1.375, time/batch=0.018
2765/13520 (epoch 2), train_loss = 1.330, time/batch=0.019
2766/13520 (epoch 2), train_loss = 1.385, time/batch=0.017
2767/13520 (epoch 2), train_loss = 1.374, time/batch=0.018
2768/13520 (epoch 2), train_loss = 1.353, time/batch=0.018
2769/13520 (epoch 2), train_loss = 1.349, time/batch=0.019
2770/13520 (epoch 2), train_loss = 1.350, time/batch=0.017
2771/13520 (epoch 2), train_loss = 1.387, time/batch=0.018
2772/13520 (epoch 2), train_loss = 1.364, time/batch=0.018
2773/13520 (epoch 2), train_loss = 1.357, time/batch=0.018
2774/13520 (epoch 2), train_loss = 1.368, time/batch=0.018
2775/13520 (epoch 2), train_loss = 1.359, time/batch=0.017
2776/13520 (epoch 2), train_loss = 1.344, time/batch=0.018
2777/13520 (epoch 2), train_loss = 1.430, time/batch=0.017
2778/13520 (epoch 2), train_loss = 1.345, time/batch=0.018
2779/13520 (epoch 2), train_loss = 1.406, time/batch=0.017
2780/13520 (epoch 2), train_loss = 1.412, time/batch=0.0

2907/13520 (epoch 2), train_loss = 1.343, time/batch=0.017
2908/13520 (epoch 2), train_loss = 1.382, time/batch=0.017
2909/13520 (epoch 2), train_loss = 1.277, time/batch=0.018
2910/13520 (epoch 2), train_loss = 1.279, time/batch=0.018
2911/13520 (epoch 2), train_loss = 1.310, time/batch=0.018
2912/13520 (epoch 2), train_loss = 1.378, time/batch=0.017
2913/13520 (epoch 2), train_loss = 1.402, time/batch=0.018
2914/13520 (epoch 2), train_loss = 1.351, time/batch=0.017
2915/13520 (epoch 2), train_loss = 1.423, time/batch=0.018
2916/13520 (epoch 2), train_loss = 1.332, time/batch=0.018
2917/13520 (epoch 2), train_loss = 1.325, time/batch=0.018
2918/13520 (epoch 2), train_loss = 1.344, time/batch=0.018
2919/13520 (epoch 2), train_loss = 1.375, time/batch=0.018
2920/13520 (epoch 2), train_loss = 1.304, time/batch=0.017
2921/13520 (epoch 2), train_loss = 1.322, time/batch=0.018
2922/13520 (epoch 2), train_loss = 1.282, time/batch=0.018
2923/13520 (epoch 2), train_loss = 1.303, time/batch=0.0

3048/13520 (epoch 2), train_loss = 1.377, time/batch=0.019
3049/13520 (epoch 2), train_loss = 1.402, time/batch=0.018
3050/13520 (epoch 2), train_loss = 1.343, time/batch=0.018
3051/13520 (epoch 2), train_loss = 1.341, time/batch=0.018
3052/13520 (epoch 2), train_loss = 1.387, time/batch=0.018
3053/13520 (epoch 2), train_loss = 1.388, time/batch=0.017
3054/13520 (epoch 2), train_loss = 1.302, time/batch=0.018
3055/13520 (epoch 2), train_loss = 1.289, time/batch=0.018
3056/13520 (epoch 2), train_loss = 1.273, time/batch=0.018
3057/13520 (epoch 2), train_loss = 1.311, time/batch=0.018
3058/13520 (epoch 2), train_loss = 1.312, time/batch=0.018
3059/13520 (epoch 2), train_loss = 1.377, time/batch=0.018
3060/13520 (epoch 2), train_loss = 1.321, time/batch=0.019
3061/13520 (epoch 2), train_loss = 1.319, time/batch=0.019
3062/13520 (epoch 2), train_loss = 1.279, time/batch=0.017
3063/13520 (epoch 2), train_loss = 1.431, time/batch=0.017
3064/13520 (epoch 2), train_loss = 1.360, time/batch=0.0

3191/13520 (epoch 2), train_loss = 1.321, time/batch=0.018
3192/13520 (epoch 2), train_loss = 1.395, time/batch=0.018
3193/13520 (epoch 2), train_loss = 1.356, time/batch=0.018
3194/13520 (epoch 2), train_loss = 1.281, time/batch=0.018
3195/13520 (epoch 2), train_loss = 1.335, time/batch=0.018
3196/13520 (epoch 2), train_loss = 1.347, time/batch=0.018
3197/13520 (epoch 2), train_loss = 1.308, time/batch=0.018
3198/13520 (epoch 2), train_loss = 1.392, time/batch=0.018
3199/13520 (epoch 2), train_loss = 1.336, time/batch=0.017
3200/13520 (epoch 2), train_loss = 1.331, time/batch=0.018
3201/13520 (epoch 2), train_loss = 1.340, time/batch=0.018
3202/13520 (epoch 2), train_loss = 1.377, time/batch=0.018
3203/13520 (epoch 2), train_loss = 1.335, time/batch=0.017
3204/13520 (epoch 2), train_loss = 1.254, time/batch=0.018
3205/13520 (epoch 2), train_loss = 1.285, time/batch=0.017
3206/13520 (epoch 2), train_loss = 1.336, time/batch=0.017
3207/13520 (epoch 2), train_loss = 1.348, time/batch=0.0

3335/13520 (epoch 2), train_loss = 1.399, time/batch=0.017
3336/13520 (epoch 2), train_loss = 1.313, time/batch=0.018
3337/13520 (epoch 2), train_loss = 1.393, time/batch=0.018
3338/13520 (epoch 2), train_loss = 1.386, time/batch=0.017
3339/13520 (epoch 2), train_loss = 1.270, time/batch=0.018
3340/13520 (epoch 2), train_loss = 1.400, time/batch=0.018
3341/13520 (epoch 2), train_loss = 1.293, time/batch=0.018
3342/13520 (epoch 2), train_loss = 1.271, time/batch=0.018
3343/13520 (epoch 2), train_loss = 1.264, time/batch=0.018
3344/13520 (epoch 2), train_loss = 1.335, time/batch=0.018
3345/13520 (epoch 2), train_loss = 1.328, time/batch=0.019
3346/13520 (epoch 2), train_loss = 1.342, time/batch=0.018
3347/13520 (epoch 2), train_loss = 1.415, time/batch=0.018
3348/13520 (epoch 2), train_loss = 1.379, time/batch=0.018
3349/13520 (epoch 2), train_loss = 1.316, time/batch=0.018
3350/13520 (epoch 2), train_loss = 1.354, time/batch=0.018
3351/13520 (epoch 2), train_loss = 1.352, time/batch=0.0

3479/13520 (epoch 2), train_loss = 1.407, time/batch=0.020
3480/13520 (epoch 2), train_loss = 1.360, time/batch=0.017
3481/13520 (epoch 2), train_loss = 1.327, time/batch=0.019
3482/13520 (epoch 2), train_loss = 1.308, time/batch=0.018
3483/13520 (epoch 2), train_loss = 1.366, time/batch=0.018
3484/13520 (epoch 2), train_loss = 1.380, time/batch=0.017
3485/13520 (epoch 2), train_loss = 1.352, time/batch=0.018
3486/13520 (epoch 2), train_loss = 1.310, time/batch=0.017
3487/13520 (epoch 2), train_loss = 1.334, time/batch=0.018
3488/13520 (epoch 2), train_loss = 1.315, time/batch=0.018
3489/13520 (epoch 2), train_loss = 1.361, time/batch=0.017
3490/13520 (epoch 2), train_loss = 1.349, time/batch=0.017
3491/13520 (epoch 2), train_loss = 1.342, time/batch=0.018
3492/13520 (epoch 2), train_loss = 1.343, time/batch=0.018
3493/13520 (epoch 2), train_loss = 1.287, time/batch=0.017
3494/13520 (epoch 2), train_loss = 1.307, time/batch=0.018
3495/13520 (epoch 2), train_loss = 1.327, time/batch=0.0

3618/13520 (epoch 2), train_loss = 1.369, time/batch=0.018
3619/13520 (epoch 2), train_loss = 1.286, time/batch=0.018
3620/13520 (epoch 2), train_loss = 1.301, time/batch=0.018
3621/13520 (epoch 2), train_loss = 1.314, time/batch=0.018
3622/13520 (epoch 2), train_loss = 1.318, time/batch=0.018
3623/13520 (epoch 2), train_loss = 1.313, time/batch=0.018
3624/13520 (epoch 2), train_loss = 1.362, time/batch=0.018
3625/13520 (epoch 2), train_loss = 1.325, time/batch=0.018
3626/13520 (epoch 2), train_loss = 1.343, time/batch=0.018
3627/13520 (epoch 2), train_loss = 1.298, time/batch=0.018
3628/13520 (epoch 2), train_loss = 1.272, time/batch=0.018
3629/13520 (epoch 2), train_loss = 1.334, time/batch=0.018
3630/13520 (epoch 2), train_loss = 1.292, time/batch=0.017
3631/13520 (epoch 2), train_loss = 1.332, time/batch=0.018
3632/13520 (epoch 2), train_loss = 1.306, time/batch=0.018
3633/13520 (epoch 2), train_loss = 1.334, time/batch=0.018
3634/13520 (epoch 2), train_loss = 1.276, time/batch=0.0

3760/13520 (epoch 2), train_loss = 1.237, time/batch=0.018
3761/13520 (epoch 2), train_loss = 1.335, time/batch=0.019
3762/13520 (epoch 2), train_loss = 1.377, time/batch=0.018
3763/13520 (epoch 2), train_loss = 1.307, time/batch=0.018
3764/13520 (epoch 2), train_loss = 1.318, time/batch=0.017
3765/13520 (epoch 2), train_loss = 1.321, time/batch=0.018
3766/13520 (epoch 2), train_loss = 1.315, time/batch=0.018
3767/13520 (epoch 2), train_loss = 1.240, time/batch=0.018
3768/13520 (epoch 2), train_loss = 1.297, time/batch=0.019
3769/13520 (epoch 2), train_loss = 1.309, time/batch=0.018
3770/13520 (epoch 2), train_loss = 1.278, time/batch=0.018
3771/13520 (epoch 2), train_loss = 1.346, time/batch=0.019
3772/13520 (epoch 2), train_loss = 1.285, time/batch=0.018
3773/13520 (epoch 2), train_loss = 1.342, time/batch=0.018
3774/13520 (epoch 2), train_loss = 1.351, time/batch=0.018
3775/13520 (epoch 2), train_loss = 1.363, time/batch=0.018
3776/13520 (epoch 2), train_loss = 1.303, time/batch=0.0

3904/13520 (epoch 2), train_loss = 1.312, time/batch=0.018
3905/13520 (epoch 2), train_loss = 1.280, time/batch=0.018
3906/13520 (epoch 2), train_loss = 1.285, time/batch=0.018
3907/13520 (epoch 2), train_loss = 1.284, time/batch=0.018
3908/13520 (epoch 2), train_loss = 1.402, time/batch=0.017
3909/13520 (epoch 2), train_loss = 1.300, time/batch=0.017
3910/13520 (epoch 2), train_loss = 1.253, time/batch=0.018
3911/13520 (epoch 2), train_loss = 1.317, time/batch=0.018
3912/13520 (epoch 2), train_loss = 1.273, time/batch=0.017
3913/13520 (epoch 2), train_loss = 1.253, time/batch=0.018
3914/13520 (epoch 2), train_loss = 1.319, time/batch=0.017
3915/13520 (epoch 2), train_loss = 1.264, time/batch=0.018
3916/13520 (epoch 2), train_loss = 1.266, time/batch=0.017
3917/13520 (epoch 2), train_loss = 1.312, time/batch=0.018
3918/13520 (epoch 2), train_loss = 1.323, time/batch=0.018
3919/13520 (epoch 2), train_loss = 1.282, time/batch=0.017
3920/13520 (epoch 2), train_loss = 1.311, time/batch=0.0

4047/13520 (epoch 2), train_loss = 1.309, time/batch=0.018
4048/13520 (epoch 2), train_loss = 1.284, time/batch=0.018
4049/13520 (epoch 2), train_loss = 1.323, time/batch=0.017
4050/13520 (epoch 2), train_loss = 1.279, time/batch=0.018
4051/13520 (epoch 2), train_loss = 1.317, time/batch=0.018
4052/13520 (epoch 2), train_loss = 1.393, time/batch=0.018
4053/13520 (epoch 2), train_loss = 1.395, time/batch=0.018
4054/13520 (epoch 2), train_loss = 1.311, time/batch=0.018
4055/13520 (epoch 2), train_loss = 1.317, time/batch=0.018
4056/13520 (epoch 3), train_loss = 1.476, time/batch=0.018
4057/13520 (epoch 3), train_loss = 1.252, time/batch=0.018
4058/13520 (epoch 3), train_loss = 1.350, time/batch=0.018
4059/13520 (epoch 3), train_loss = 1.297, time/batch=0.018
4060/13520 (epoch 3), train_loss = 1.329, time/batch=0.018
4061/13520 (epoch 3), train_loss = 1.339, time/batch=0.018
4062/13520 (epoch 3), train_loss = 1.320, time/batch=0.017
4063/13520 (epoch 3), train_loss = 1.317, time/batch=0.0

4188/13520 (epoch 3), train_loss = 1.347, time/batch=0.018
4189/13520 (epoch 3), train_loss = 1.304, time/batch=0.018
4190/13520 (epoch 3), train_loss = 1.341, time/batch=0.018
4191/13520 (epoch 3), train_loss = 1.316, time/batch=0.018
4192/13520 (epoch 3), train_loss = 1.343, time/batch=0.018
4193/13520 (epoch 3), train_loss = 1.310, time/batch=0.018
4194/13520 (epoch 3), train_loss = 1.314, time/batch=0.018
4195/13520 (epoch 3), train_loss = 1.308, time/batch=0.019
4196/13520 (epoch 3), train_loss = 1.278, time/batch=0.018
4197/13520 (epoch 3), train_loss = 1.247, time/batch=0.018
4198/13520 (epoch 3), train_loss = 1.329, time/batch=0.017
4199/13520 (epoch 3), train_loss = 1.258, time/batch=0.018
4200/13520 (epoch 3), train_loss = 1.365, time/batch=0.020
4201/13520 (epoch 3), train_loss = 1.396, time/batch=0.018
4202/13520 (epoch 3), train_loss = 1.316, time/batch=0.017
4203/13520 (epoch 3), train_loss = 1.324, time/batch=0.018
4204/13520 (epoch 3), train_loss = 1.357, time/batch=0.0

4332/13520 (epoch 3), train_loss = 1.334, time/batch=0.017
4333/13520 (epoch 3), train_loss = 1.269, time/batch=0.019
4334/13520 (epoch 3), train_loss = 1.290, time/batch=0.018
4335/13520 (epoch 3), train_loss = 1.324, time/batch=0.018
4336/13520 (epoch 3), train_loss = 1.361, time/batch=0.018
4337/13520 (epoch 3), train_loss = 1.279, time/batch=0.017
4338/13520 (epoch 3), train_loss = 1.248, time/batch=0.017
4339/13520 (epoch 3), train_loss = 1.335, time/batch=0.018
4340/13520 (epoch 3), train_loss = 1.259, time/batch=0.018
4341/13520 (epoch 3), train_loss = 1.407, time/batch=0.018
4342/13520 (epoch 3), train_loss = 1.321, time/batch=0.017
4343/13520 (epoch 3), train_loss = 1.307, time/batch=0.017
4344/13520 (epoch 3), train_loss = 1.340, time/batch=0.019
4345/13520 (epoch 3), train_loss = 1.360, time/batch=0.018
4346/13520 (epoch 3), train_loss = 1.346, time/batch=0.018
4347/13520 (epoch 3), train_loss = 1.296, time/batch=0.018
4348/13520 (epoch 3), train_loss = 1.308, time/batch=0.0

4476/13520 (epoch 3), train_loss = 1.255, time/batch=0.018
4477/13520 (epoch 3), train_loss = 1.257, time/batch=0.017
4478/13520 (epoch 3), train_loss = 1.286, time/batch=0.018
4479/13520 (epoch 3), train_loss = 1.280, time/batch=0.018
4480/13520 (epoch 3), train_loss = 1.298, time/batch=0.018
4481/13520 (epoch 3), train_loss = 1.257, time/batch=0.018
4482/13520 (epoch 3), train_loss = 1.317, time/batch=0.018
4483/13520 (epoch 3), train_loss = 1.291, time/batch=0.018
4484/13520 (epoch 3), train_loss = 1.240, time/batch=0.018
4485/13520 (epoch 3), train_loss = 1.271, time/batch=0.018
4486/13520 (epoch 3), train_loss = 1.354, time/batch=0.018
4487/13520 (epoch 3), train_loss = 1.262, time/batch=0.018
4488/13520 (epoch 3), train_loss = 1.339, time/batch=0.018
4489/13520 (epoch 3), train_loss = 1.325, time/batch=0.018
4490/13520 (epoch 3), train_loss = 1.278, time/batch=0.018
4491/13520 (epoch 3), train_loss = 1.367, time/batch=0.018
4492/13520 (epoch 3), train_loss = 1.313, time/batch=0.0

4619/13520 (epoch 3), train_loss = 1.306, time/batch=0.017
4620/13520 (epoch 3), train_loss = 1.268, time/batch=0.017
4621/13520 (epoch 3), train_loss = 1.282, time/batch=0.018
4622/13520 (epoch 3), train_loss = 1.299, time/batch=0.019
4623/13520 (epoch 3), train_loss = 1.349, time/batch=0.018
4624/13520 (epoch 3), train_loss = 1.321, time/batch=0.018
4625/13520 (epoch 3), train_loss = 1.271, time/batch=0.017
4626/13520 (epoch 3), train_loss = 1.279, time/batch=0.018
4627/13520 (epoch 3), train_loss = 1.254, time/batch=0.017
4628/13520 (epoch 3), train_loss = 1.296, time/batch=0.017
4629/13520 (epoch 3), train_loss = 1.287, time/batch=0.018
4630/13520 (epoch 3), train_loss = 1.306, time/batch=0.018
4631/13520 (epoch 3), train_loss = 1.264, time/batch=0.018
4632/13520 (epoch 3), train_loss = 1.339, time/batch=0.018
4633/13520 (epoch 3), train_loss = 1.319, time/batch=0.017
4634/13520 (epoch 3), train_loss = 1.341, time/batch=0.018
4635/13520 (epoch 3), train_loss = 1.292, time/batch=0.0

4762/13520 (epoch 3), train_loss = 1.277, time/batch=0.018
4763/13520 (epoch 3), train_loss = 1.323, time/batch=0.018
4764/13520 (epoch 3), train_loss = 1.325, time/batch=0.018
4765/13520 (epoch 3), train_loss = 1.253, time/batch=0.018
4766/13520 (epoch 3), train_loss = 1.298, time/batch=0.018
4767/13520 (epoch 3), train_loss = 1.211, time/batch=0.018
4768/13520 (epoch 3), train_loss = 1.290, time/batch=0.018
4769/13520 (epoch 3), train_loss = 1.313, time/batch=0.018
4770/13520 (epoch 3), train_loss = 1.290, time/batch=0.019
4771/13520 (epoch 3), train_loss = 1.299, time/batch=0.018
4772/13520 (epoch 3), train_loss = 1.251, time/batch=0.018
4773/13520 (epoch 3), train_loss = 1.299, time/batch=0.018
4774/13520 (epoch 3), train_loss = 1.280, time/batch=0.018
4775/13520 (epoch 3), train_loss = 1.281, time/batch=0.019
4776/13520 (epoch 3), train_loss = 1.285, time/batch=0.017
4777/13520 (epoch 3), train_loss = 1.337, time/batch=0.017
4778/13520 (epoch 3), train_loss = 1.304, time/batch=0.0

4904/13520 (epoch 3), train_loss = 1.312, time/batch=0.019
4905/13520 (epoch 3), train_loss = 1.292, time/batch=0.018
4906/13520 (epoch 3), train_loss = 1.282, time/batch=0.018
4907/13520 (epoch 3), train_loss = 1.371, time/batch=0.018
4908/13520 (epoch 3), train_loss = 1.359, time/batch=0.019
4909/13520 (epoch 3), train_loss = 1.321, time/batch=0.018
4910/13520 (epoch 3), train_loss = 1.274, time/batch=0.018
4911/13520 (epoch 3), train_loss = 1.342, time/batch=0.018
4912/13520 (epoch 3), train_loss = 1.316, time/batch=0.018
4913/13520 (epoch 3), train_loss = 1.304, time/batch=0.019
4914/13520 (epoch 3), train_loss = 1.322, time/batch=0.018
4915/13520 (epoch 3), train_loss = 1.327, time/batch=0.017
4916/13520 (epoch 3), train_loss = 1.302, time/batch=0.018
4917/13520 (epoch 3), train_loss = 1.352, time/batch=0.018
4918/13520 (epoch 3), train_loss = 1.386, time/batch=0.018
4919/13520 (epoch 3), train_loss = 1.313, time/batch=0.018
4920/13520 (epoch 3), train_loss = 1.261, time/batch=0.0

5046/13520 (epoch 3), train_loss = 1.301, time/batch=0.020
5047/13520 (epoch 3), train_loss = 1.253, time/batch=0.018
5048/13520 (epoch 3), train_loss = 1.348, time/batch=0.018
5049/13520 (epoch 3), train_loss = 1.290, time/batch=0.018
5050/13520 (epoch 3), train_loss = 1.275, time/batch=0.021
5051/13520 (epoch 3), train_loss = 1.292, time/batch=0.018
5052/13520 (epoch 3), train_loss = 1.271, time/batch=0.017
5053/13520 (epoch 3), train_loss = 1.304, time/batch=0.018
5054/13520 (epoch 3), train_loss = 1.226, time/batch=0.018
5055/13520 (epoch 3), train_loss = 1.257, time/batch=0.017
5056/13520 (epoch 3), train_loss = 1.267, time/batch=0.018
5057/13520 (epoch 3), train_loss = 1.287, time/batch=0.018
5058/13520 (epoch 3), train_loss = 1.250, time/batch=0.018
5059/13520 (epoch 3), train_loss = 1.277, time/batch=0.018
5060/13520 (epoch 3), train_loss = 1.253, time/batch=0.017
5061/13520 (epoch 3), train_loss = 1.302, time/batch=0.018
5062/13520 (epoch 3), train_loss = 1.283, time/batch=0.0

5190/13520 (epoch 3), train_loss = 1.350, time/batch=0.018
5191/13520 (epoch 3), train_loss = 1.360, time/batch=0.018
5192/13520 (epoch 3), train_loss = 1.320, time/batch=0.018
5193/13520 (epoch 3), train_loss = 1.286, time/batch=0.019
5194/13520 (epoch 3), train_loss = 1.306, time/batch=0.018
5195/13520 (epoch 3), train_loss = 1.354, time/batch=0.017
5196/13520 (epoch 3), train_loss = 1.288, time/batch=0.018
5197/13520 (epoch 3), train_loss = 1.307, time/batch=0.018
5198/13520 (epoch 3), train_loss = 1.349, time/batch=0.018
5199/13520 (epoch 3), train_loss = 1.321, time/batch=0.018
5200/13520 (epoch 3), train_loss = 1.303, time/batch=0.017
5201/13520 (epoch 3), train_loss = 1.276, time/batch=0.018
5202/13520 (epoch 3), train_loss = 1.268, time/batch=0.018
5203/13520 (epoch 3), train_loss = 1.269, time/batch=0.018
5204/13520 (epoch 3), train_loss = 1.240, time/batch=0.017
5205/13520 (epoch 3), train_loss = 1.281, time/batch=0.018
5206/13520 (epoch 3), train_loss = 1.249, time/batch=0.0

5333/13520 (epoch 3), train_loss = 1.291, time/batch=0.018
5334/13520 (epoch 3), train_loss = 1.222, time/batch=0.018
5335/13520 (epoch 3), train_loss = 1.221, time/batch=0.018
5336/13520 (epoch 3), train_loss = 1.257, time/batch=0.018
5337/13520 (epoch 3), train_loss = 1.303, time/batch=0.018
5338/13520 (epoch 3), train_loss = 1.259, time/batch=0.018
5339/13520 (epoch 3), train_loss = 1.312, time/batch=0.017
5340/13520 (epoch 3), train_loss = 1.305, time/batch=0.018
5341/13520 (epoch 3), train_loss = 1.276, time/batch=0.018
5342/13520 (epoch 3), train_loss = 1.241, time/batch=0.017
5343/13520 (epoch 3), train_loss = 1.325, time/batch=0.018
5344/13520 (epoch 3), train_loss = 1.271, time/batch=0.018
5345/13520 (epoch 3), train_loss = 1.336, time/batch=0.018
5346/13520 (epoch 3), train_loss = 1.365, time/batch=0.018
5347/13520 (epoch 3), train_loss = 1.311, time/batch=0.018
5348/13520 (epoch 3), train_loss = 1.278, time/batch=0.018
5349/13520 (epoch 3), train_loss = 1.303, time/batch=0.0

5473/13520 (epoch 4), train_loss = 1.289, time/batch=0.019
5474/13520 (epoch 4), train_loss = 1.292, time/batch=0.018
5475/13520 (epoch 4), train_loss = 1.321, time/batch=0.018
5476/13520 (epoch 4), train_loss = 1.311, time/batch=0.018
5477/13520 (epoch 4), train_loss = 1.304, time/batch=0.018
5478/13520 (epoch 4), train_loss = 1.311, time/batch=0.018
5479/13520 (epoch 4), train_loss = 1.301, time/batch=0.018
5480/13520 (epoch 4), train_loss = 1.282, time/batch=0.018
5481/13520 (epoch 4), train_loss = 1.352, time/batch=0.018
5482/13520 (epoch 4), train_loss = 1.280, time/batch=0.018
5483/13520 (epoch 4), train_loss = 1.338, time/batch=0.019
5484/13520 (epoch 4), train_loss = 1.357, time/batch=0.018
5485/13520 (epoch 4), train_loss = 1.439, time/batch=0.017
5486/13520 (epoch 4), train_loss = 1.339, time/batch=0.018
5487/13520 (epoch 4), train_loss = 1.300, time/batch=0.019
5488/13520 (epoch 4), train_loss = 1.250, time/batch=0.018
5489/13520 (epoch 4), train_loss = 1.325, time/batch=0.0

5616/13520 (epoch 4), train_loss = 1.324, time/batch=0.018
5617/13520 (epoch 4), train_loss = 1.355, time/batch=0.018
5618/13520 (epoch 4), train_loss = 1.300, time/batch=0.018
5619/13520 (epoch 4), train_loss = 1.364, time/batch=0.017
5620/13520 (epoch 4), train_loss = 1.267, time/batch=0.018
5621/13520 (epoch 4), train_loss = 1.263, time/batch=0.018
5622/13520 (epoch 4), train_loss = 1.287, time/batch=0.017
5623/13520 (epoch 4), train_loss = 1.323, time/batch=0.018
5624/13520 (epoch 4), train_loss = 1.248, time/batch=0.018
5625/13520 (epoch 4), train_loss = 1.285, time/batch=0.018
5626/13520 (epoch 4), train_loss = 1.227, time/batch=0.017
5627/13520 (epoch 4), train_loss = 1.245, time/batch=0.018
5628/13520 (epoch 4), train_loss = 1.354, time/batch=0.018
5629/13520 (epoch 4), train_loss = 1.281, time/batch=0.017
5630/13520 (epoch 4), train_loss = 1.289, time/batch=0.018
5631/13520 (epoch 4), train_loss = 1.266, time/batch=0.019
5632/13520 (epoch 4), train_loss = 1.307, time/batch=0.0

5760/13520 (epoch 4), train_loss = 1.221, time/batch=0.018
5761/13520 (epoch 4), train_loss = 1.254, time/batch=0.017
5762/13520 (epoch 4), train_loss = 1.263, time/batch=0.018
5763/13520 (epoch 4), train_loss = 1.316, time/batch=0.017
5764/13520 (epoch 4), train_loss = 1.271, time/batch=0.017
5765/13520 (epoch 4), train_loss = 1.279, time/batch=0.017
5766/13520 (epoch 4), train_loss = 1.241, time/batch=0.019
5767/13520 (epoch 4), train_loss = 1.363, time/batch=0.017
5768/13520 (epoch 4), train_loss = 1.307, time/batch=0.018
5769/13520 (epoch 4), train_loss = 1.229, time/batch=0.017
5770/13520 (epoch 4), train_loss = 1.241, time/batch=0.017
5771/13520 (epoch 4), train_loss = 1.330, time/batch=0.018
5772/13520 (epoch 4), train_loss = 1.294, time/batch=0.018
5773/13520 (epoch 4), train_loss = 1.265, time/batch=0.018
5774/13520 (epoch 4), train_loss = 1.264, time/batch=0.018
5775/13520 (epoch 4), train_loss = 1.231, time/batch=0.017
5776/13520 (epoch 4), train_loss = 1.281, time/batch=0.0

5902/13520 (epoch 4), train_loss = 1.343, time/batch=0.017
5903/13520 (epoch 4), train_loss = 1.279, time/batch=0.017
5904/13520 (epoch 4), train_loss = 1.286, time/batch=0.018
5905/13520 (epoch 4), train_loss = 1.291, time/batch=0.018
5906/13520 (epoch 4), train_loss = 1.331, time/batch=0.018
5907/13520 (epoch 4), train_loss = 1.286, time/batch=0.018
5908/13520 (epoch 4), train_loss = 1.213, time/batch=0.018
5909/13520 (epoch 4), train_loss = 1.244, time/batch=0.017
5910/13520 (epoch 4), train_loss = 1.279, time/batch=0.017
5911/13520 (epoch 4), train_loss = 1.292, time/batch=0.017
5912/13520 (epoch 4), train_loss = 1.297, time/batch=0.018
5913/13520 (epoch 4), train_loss = 1.330, time/batch=0.018
5914/13520 (epoch 4), train_loss = 1.286, time/batch=0.018
5915/13520 (epoch 4), train_loss = 1.301, time/batch=0.018
5916/13520 (epoch 4), train_loss = 1.235, time/batch=0.018
5917/13520 (epoch 4), train_loss = 1.255, time/batch=0.018
5918/13520 (epoch 4), train_loss = 1.314, time/batch=0.0

6047/13520 (epoch 4), train_loss = 1.219, time/batch=0.017
6048/13520 (epoch 4), train_loss = 1.282, time/batch=0.018
6049/13520 (epoch 4), train_loss = 1.276, time/batch=0.018
6050/13520 (epoch 4), train_loss = 1.301, time/batch=0.018
6051/13520 (epoch 4), train_loss = 1.358, time/batch=0.018
6052/13520 (epoch 4), train_loss = 1.320, time/batch=0.017
6053/13520 (epoch 4), train_loss = 1.267, time/batch=0.018
6054/13520 (epoch 4), train_loss = 1.307, time/batch=0.018
6055/13520 (epoch 4), train_loss = 1.307, time/batch=0.018
6056/13520 (epoch 4), train_loss = 1.312, time/batch=0.018
6057/13520 (epoch 4), train_loss = 1.290, time/batch=0.018
6058/13520 (epoch 4), train_loss = 1.342, time/batch=0.018
6059/13520 (epoch 4), train_loss = 1.338, time/batch=0.019
6060/13520 (epoch 4), train_loss = 1.273, time/batch=0.018
6061/13520 (epoch 4), train_loss = 1.269, time/batch=0.018
6062/13520 (epoch 4), train_loss = 1.286, time/batch=0.018
6063/13520 (epoch 4), train_loss = 1.267, time/batch=0.0

6189/13520 (epoch 4), train_loss = 1.285, time/batch=0.018
6190/13520 (epoch 4), train_loss = 1.267, time/batch=0.018
6191/13520 (epoch 4), train_loss = 1.280, time/batch=0.018
6192/13520 (epoch 4), train_loss = 1.280, time/batch=0.018
6193/13520 (epoch 4), train_loss = 1.318, time/batch=0.019
6194/13520 (epoch 4), train_loss = 1.290, time/batch=0.018
6195/13520 (epoch 4), train_loss = 1.295, time/batch=0.018
6196/13520 (epoch 4), train_loss = 1.300, time/batch=0.019
6197/13520 (epoch 4), train_loss = 1.238, time/batch=0.019
6198/13520 (epoch 4), train_loss = 1.268, time/batch=0.018
6199/13520 (epoch 4), train_loss = 1.290, time/batch=0.018
6200/13520 (epoch 4), train_loss = 1.237, time/batch=0.018
6201/13520 (epoch 4), train_loss = 1.295, time/batch=0.018
6202/13520 (epoch 4), train_loss = 1.273, time/batch=0.017
6203/13520 (epoch 4), train_loss = 1.279, time/batch=0.018
6204/13520 (epoch 4), train_loss = 1.340, time/batch=0.018
6205/13520 (epoch 4), train_loss = 1.285, time/batch=0.0

6336/13520 (epoch 4), train_loss = 1.262, time/batch=0.019
6337/13520 (epoch 4), train_loss = 1.290, time/batch=0.019
6338/13520 (epoch 4), train_loss = 1.230, time/batch=0.018
6339/13520 (epoch 4), train_loss = 1.295, time/batch=0.018
6340/13520 (epoch 4), train_loss = 1.257, time/batch=0.019
6341/13520 (epoch 4), train_loss = 1.218, time/batch=0.019
6342/13520 (epoch 4), train_loss = 1.239, time/batch=0.018
6343/13520 (epoch 4), train_loss = 1.253, time/batch=0.018
6344/13520 (epoch 4), train_loss = 1.297, time/batch=0.018
6345/13520 (epoch 4), train_loss = 1.252, time/batch=0.018
6346/13520 (epoch 4), train_loss = 1.255, time/batch=0.019
6347/13520 (epoch 4), train_loss = 1.251, time/batch=0.019
6348/13520 (epoch 4), train_loss = 1.252, time/batch=0.021
6349/13520 (epoch 4), train_loss = 1.241, time/batch=0.018
6350/13520 (epoch 4), train_loss = 1.214, time/batch=0.019
6351/13520 (epoch 4), train_loss = 1.316, time/batch=0.019
6352/13520 (epoch 4), train_loss = 1.269, time/batch=0.0

6479/13520 (epoch 4), train_loss = 1.328, time/batch=0.018
6480/13520 (epoch 4), train_loss = 1.258, time/batch=0.018
6481/13520 (epoch 4), train_loss = 1.253, time/batch=0.018
6482/13520 (epoch 4), train_loss = 1.239, time/batch=0.018
6483/13520 (epoch 4), train_loss = 1.247, time/batch=0.018
6484/13520 (epoch 4), train_loss = 1.307, time/batch=0.018
6485/13520 (epoch 4), train_loss = 1.287, time/batch=0.019
6486/13520 (epoch 4), train_loss = 1.308, time/batch=0.018
6487/13520 (epoch 4), train_loss = 1.297, time/batch=0.018
6488/13520 (epoch 4), train_loss = 1.322, time/batch=0.018
6489/13520 (epoch 4), train_loss = 1.239, time/batch=0.018
6490/13520 (epoch 4), train_loss = 1.316, time/batch=0.018
6491/13520 (epoch 4), train_loss = 1.231, time/batch=0.018
6492/13520 (epoch 4), train_loss = 1.242, time/batch=0.018
6493/13520 (epoch 4), train_loss = 1.343, time/batch=0.018
6494/13520 (epoch 4), train_loss = 1.306, time/batch=0.018
6495/13520 (epoch 4), train_loss = 1.214, time/batch=0.0

6619/13520 (epoch 4), train_loss = 1.235, time/batch=0.018
6620/13520 (epoch 4), train_loss = 1.227, time/batch=0.018
6621/13520 (epoch 4), train_loss = 1.263, time/batch=0.018
6622/13520 (epoch 4), train_loss = 1.291, time/batch=0.018
6623/13520 (epoch 4), train_loss = 1.262, time/batch=0.018
6624/13520 (epoch 4), train_loss = 1.272, time/batch=0.017
6625/13520 (epoch 4), train_loss = 1.271, time/batch=0.019
6626/13520 (epoch 4), train_loss = 1.236, time/batch=0.017
6627/13520 (epoch 4), train_loss = 1.263, time/batch=0.018
6628/13520 (epoch 4), train_loss = 1.255, time/batch=0.017
6629/13520 (epoch 4), train_loss = 1.251, time/batch=0.018
6630/13520 (epoch 4), train_loss = 1.300, time/batch=0.018
6631/13520 (epoch 4), train_loss = 1.237, time/batch=0.018
6632/13520 (epoch 4), train_loss = 1.294, time/batch=0.018
6633/13520 (epoch 4), train_loss = 1.288, time/batch=0.018
6634/13520 (epoch 4), train_loss = 1.298, time/batch=0.018
6635/13520 (epoch 4), train_loss = 1.284, time/batch=0.0

6760/13520 (epoch 5), train_loss = 1.440, time/batch=0.020
6761/13520 (epoch 5), train_loss = 1.209, time/batch=0.018
6762/13520 (epoch 5), train_loss = 1.295, time/batch=0.017
6763/13520 (epoch 5), train_loss = 1.262, time/batch=0.018
6764/13520 (epoch 5), train_loss = 1.287, time/batch=0.017
6765/13520 (epoch 5), train_loss = 1.304, time/batch=0.018
6766/13520 (epoch 5), train_loss = 1.270, time/batch=0.017
6767/13520 (epoch 5), train_loss = 1.282, time/batch=0.017
6768/13520 (epoch 5), train_loss = 1.308, time/batch=0.018
6769/13520 (epoch 5), train_loss = 1.266, time/batch=0.017
6770/13520 (epoch 5), train_loss = 1.220, time/batch=0.018
6771/13520 (epoch 5), train_loss = 1.236, time/batch=0.018
6772/13520 (epoch 5), train_loss = 1.255, time/batch=0.019
6773/13520 (epoch 5), train_loss = 1.303, time/batch=0.018
6774/13520 (epoch 5), train_loss = 1.293, time/batch=0.019
6775/13520 (epoch 5), train_loss = 1.266, time/batch=0.017
6776/13520 (epoch 5), train_loss = 1.282, time/batch=0.0

6903/13520 (epoch 5), train_loss = 1.224, time/batch=0.018
6904/13520 (epoch 5), train_loss = 1.322, time/batch=0.018
6905/13520 (epoch 5), train_loss = 1.358, time/batch=0.018
6906/13520 (epoch 5), train_loss = 1.274, time/batch=0.018
6907/13520 (epoch 5), train_loss = 1.281, time/batch=0.018
6908/13520 (epoch 5), train_loss = 1.318, time/batch=0.018
6909/13520 (epoch 5), train_loss = 1.189, time/batch=0.019
6910/13520 (epoch 5), train_loss = 1.281, time/batch=0.018
6911/13520 (epoch 5), train_loss = 1.233, time/batch=0.017
6912/13520 (epoch 5), train_loss = 1.240, time/batch=0.018
6913/13520 (epoch 5), train_loss = 1.277, time/batch=0.018
6914/13520 (epoch 5), train_loss = 1.243, time/batch=0.018
6915/13520 (epoch 5), train_loss = 1.288, time/batch=0.019
6916/13520 (epoch 5), train_loss = 1.315, time/batch=0.018
6917/13520 (epoch 5), train_loss = 1.302, time/batch=0.018
6918/13520 (epoch 5), train_loss = 1.283, time/batch=0.018
6919/13520 (epoch 5), train_loss = 1.279, time/batch=0.0

7048/13520 (epoch 5), train_loss = 1.295, time/batch=0.018
7049/13520 (epoch 5), train_loss = 1.323, time/batch=0.018
7050/13520 (epoch 5), train_loss = 1.310, time/batch=0.018
7051/13520 (epoch 5), train_loss = 1.272, time/batch=0.018
7052/13520 (epoch 5), train_loss = 1.279, time/batch=0.017
7053/13520 (epoch 5), train_loss = 1.262, time/batch=0.018
7054/13520 (epoch 5), train_loss = 1.289, time/batch=0.018
7055/13520 (epoch 5), train_loss = 1.284, time/batch=0.018
7056/13520 (epoch 5), train_loss = 1.292, time/batch=0.018
7057/13520 (epoch 5), train_loss = 1.244, time/batch=0.019
7058/13520 (epoch 5), train_loss = 1.267, time/batch=0.017
7059/13520 (epoch 5), train_loss = 1.303, time/batch=0.019
7060/13520 (epoch 5), train_loss = 1.246, time/batch=0.017
7061/13520 (epoch 5), train_loss = 1.308, time/batch=0.018
7062/13520 (epoch 5), train_loss = 1.267, time/batch=0.018
7063/13520 (epoch 5), train_loss = 1.290, time/batch=0.018
7064/13520 (epoch 5), train_loss = 1.302, time/batch=0.0

7192/13520 (epoch 5), train_loss = 1.311, time/batch=0.018
7193/13520 (epoch 5), train_loss = 1.294, time/batch=0.018
7194/13520 (epoch 5), train_loss = 1.239, time/batch=0.018
7195/13520 (epoch 5), train_loss = 1.341, time/batch=0.018
7196/13520 (epoch 5), train_loss = 1.280, time/batch=0.018
7197/13520 (epoch 5), train_loss = 1.283, time/batch=0.018
7198/13520 (epoch 5), train_loss = 1.285, time/batch=0.018
7199/13520 (epoch 5), train_loss = 1.266, time/batch=0.018
7200/13520 (epoch 5), train_loss = 1.264, time/batch=0.017
7201/13520 (epoch 5), train_loss = 1.277, time/batch=0.018
7202/13520 (epoch 5), train_loss = 1.265, time/batch=0.019
7203/13520 (epoch 5), train_loss = 1.255, time/batch=0.018
7204/13520 (epoch 5), train_loss = 1.286, time/batch=0.018
7205/13520 (epoch 5), train_loss = 1.334, time/batch=0.018
7206/13520 (epoch 5), train_loss = 1.228, time/batch=0.018
7207/13520 (epoch 5), train_loss = 1.244, time/batch=0.018
7208/13520 (epoch 5), train_loss = 1.201, time/batch=0.0

7335/13520 (epoch 5), train_loss = 1.237, time/batch=0.018
7336/13520 (epoch 5), train_loss = 1.307, time/batch=0.018
7337/13520 (epoch 5), train_loss = 1.287, time/batch=0.017
7338/13520 (epoch 5), train_loss = 1.305, time/batch=0.019
7339/13520 (epoch 5), train_loss = 1.250, time/batch=0.018
7340/13520 (epoch 5), train_loss = 1.293, time/batch=0.018
7341/13520 (epoch 5), train_loss = 1.268, time/batch=0.018
7342/13520 (epoch 5), train_loss = 1.243, time/batch=0.018
7343/13520 (epoch 5), train_loss = 1.232, time/batch=0.018
7344/13520 (epoch 5), train_loss = 1.266, time/batch=0.018
7345/13520 (epoch 5), train_loss = 1.324, time/batch=0.018
7346/13520 (epoch 5), train_loss = 1.252, time/batch=0.017
7347/13520 (epoch 5), train_loss = 1.335, time/batch=0.017
7348/13520 (epoch 5), train_loss = 1.242, time/batch=0.017
7349/13520 (epoch 5), train_loss = 1.280, time/batch=0.018
7350/13520 (epoch 5), train_loss = 1.322, time/batch=0.018
7351/13520 (epoch 5), train_loss = 1.223, time/batch=0.0

7478/13520 (epoch 5), train_loss = 1.244, time/batch=0.018
7479/13520 (epoch 5), train_loss = 1.246, time/batch=0.018
7480/13520 (epoch 5), train_loss = 1.255, time/batch=0.017
7481/13520 (epoch 5), train_loss = 1.303, time/batch=0.018
7482/13520 (epoch 5), train_loss = 1.271, time/batch=0.018
7483/13520 (epoch 5), train_loss = 1.286, time/batch=0.019
7484/13520 (epoch 5), train_loss = 1.265, time/batch=0.017
7485/13520 (epoch 5), train_loss = 1.285, time/batch=0.017
7486/13520 (epoch 5), train_loss = 1.269, time/batch=0.018
7487/13520 (epoch 5), train_loss = 1.274, time/batch=0.018
7488/13520 (epoch 5), train_loss = 1.287, time/batch=0.017
7489/13520 (epoch 5), train_loss = 1.254, time/batch=0.018
7490/13520 (epoch 5), train_loss = 1.258, time/batch=0.018
7491/13520 (epoch 5), train_loss = 1.289, time/batch=0.018
7492/13520 (epoch 5), train_loss = 1.268, time/batch=0.018
7493/13520 (epoch 5), train_loss = 1.278, time/batch=0.018
7494/13520 (epoch 5), train_loss = 1.277, time/batch=0.0

7619/13520 (epoch 5), train_loss = 1.309, time/batch=0.019
7620/13520 (epoch 5), train_loss = 1.276, time/batch=0.018
7621/13520 (epoch 5), train_loss = 1.324, time/batch=0.018
7622/13520 (epoch 5), train_loss = 1.345, time/batch=0.018
7623/13520 (epoch 5), train_loss = 1.275, time/batch=0.018
7624/13520 (epoch 5), train_loss = 1.230, time/batch=0.018
7625/13520 (epoch 5), train_loss = 1.343, time/batch=0.018
7626/13520 (epoch 5), train_loss = 1.330, time/batch=0.018
7627/13520 (epoch 5), train_loss = 1.247, time/batch=0.018
7628/13520 (epoch 5), train_loss = 1.264, time/batch=0.018
7629/13520 (epoch 5), train_loss = 1.253, time/batch=0.017
7630/13520 (epoch 5), train_loss = 1.282, time/batch=0.019
7631/13520 (epoch 5), train_loss = 1.262, time/batch=0.018
7632/13520 (epoch 5), train_loss = 1.297, time/batch=0.018
7633/13520 (epoch 5), train_loss = 1.243, time/batch=0.018
7634/13520 (epoch 5), train_loss = 1.285, time/batch=0.018
7635/13520 (epoch 5), train_loss = 1.330, time/batch=0.0

7763/13520 (epoch 5), train_loss = 1.249, time/batch=0.018
7764/13520 (epoch 5), train_loss = 1.222, time/batch=0.018
7765/13520 (epoch 5), train_loss = 1.265, time/batch=0.018
7766/13520 (epoch 5), train_loss = 1.263, time/batch=0.018
7767/13520 (epoch 5), train_loss = 1.197, time/batch=0.018
7768/13520 (epoch 5), train_loss = 1.267, time/batch=0.017
7769/13520 (epoch 5), train_loss = 1.216, time/batch=0.017
7770/13520 (epoch 5), train_loss = 1.235, time/batch=0.018
7771/13520 (epoch 5), train_loss = 1.223, time/batch=0.018
7772/13520 (epoch 5), train_loss = 1.193, time/batch=0.018
7773/13520 (epoch 5), train_loss = 1.227, time/batch=0.018
7774/13520 (epoch 5), train_loss = 1.220, time/batch=0.018
7775/13520 (epoch 5), train_loss = 1.244, time/batch=0.018
7776/13520 (epoch 5), train_loss = 1.222, time/batch=0.018
7777/13520 (epoch 5), train_loss = 1.273, time/batch=0.018
7778/13520 (epoch 5), train_loss = 1.240, time/batch=0.018
7779/13520 (epoch 5), train_loss = 1.263, time/batch=0.0

7904/13520 (epoch 5), train_loss = 1.277, time/batch=0.018
7905/13520 (epoch 5), train_loss = 1.242, time/batch=0.017
7906/13520 (epoch 5), train_loss = 1.235, time/batch=0.018
7907/13520 (epoch 5), train_loss = 1.236, time/batch=0.018
7908/13520 (epoch 5), train_loss = 1.216, time/batch=0.018
7909/13520 (epoch 5), train_loss = 1.250, time/batch=0.018
7910/13520 (epoch 5), train_loss = 1.222, time/batch=0.019
7911/13520 (epoch 5), train_loss = 1.278, time/batch=0.018
7912/13520 (epoch 5), train_loss = 1.238, time/batch=0.017
7913/13520 (epoch 5), train_loss = 1.239, time/batch=0.018
7914/13520 (epoch 5), train_loss = 1.304, time/batch=0.019
7915/13520 (epoch 5), train_loss = 1.246, time/batch=0.018
7916/13520 (epoch 5), train_loss = 1.246, time/batch=0.017
7917/13520 (epoch 5), train_loss = 1.191, time/batch=0.017
7918/13520 (epoch 5), train_loss = 1.310, time/batch=0.018
7919/13520 (epoch 5), train_loss = 1.301, time/batch=0.018
7920/13520 (epoch 5), train_loss = 1.223, time/batch=0.0

8047/13520 (epoch 5), train_loss = 1.300, time/batch=0.018
8048/13520 (epoch 5), train_loss = 1.245, time/batch=0.018
8049/13520 (epoch 5), train_loss = 1.310, time/batch=0.018
8050/13520 (epoch 5), train_loss = 1.337, time/batch=0.018
8051/13520 (epoch 5), train_loss = 1.285, time/batch=0.017
8052/13520 (epoch 5), train_loss = 1.253, time/batch=0.018
8053/13520 (epoch 5), train_loss = 1.270, time/batch=0.017
8054/13520 (epoch 5), train_loss = 1.244, time/batch=0.017
8055/13520 (epoch 5), train_loss = 1.239, time/batch=0.018
8056/13520 (epoch 5), train_loss = 1.249, time/batch=0.018
8057/13520 (epoch 5), train_loss = 1.264, time/batch=0.018
8058/13520 (epoch 5), train_loss = 1.210, time/batch=0.018
8059/13520 (epoch 5), train_loss = 1.258, time/batch=0.017
8060/13520 (epoch 5), train_loss = 1.308, time/batch=0.017
8061/13520 (epoch 5), train_loss = 1.256, time/batch=0.018
8062/13520 (epoch 5), train_loss = 1.232, time/batch=0.018
8063/13520 (epoch 5), train_loss = 1.263, time/batch=0.0

8188/13520 (epoch 6), train_loss = 1.330, time/batch=0.017
8189/13520 (epoch 6), train_loss = 1.414, time/batch=0.018
8190/13520 (epoch 6), train_loss = 1.312, time/batch=0.018
8191/13520 (epoch 6), train_loss = 1.265, time/batch=0.018
8192/13520 (epoch 6), train_loss = 1.219, time/batch=0.018
8193/13520 (epoch 6), train_loss = 1.297, time/batch=0.018
8194/13520 (epoch 6), train_loss = 1.277, time/batch=0.018
8195/13520 (epoch 6), train_loss = 1.284, time/batch=0.018
8196/13520 (epoch 6), train_loss = 1.264, time/batch=0.018
8197/13520 (epoch 6), train_loss = 1.277, time/batch=0.018
8198/13520 (epoch 6), train_loss = 1.200, time/batch=0.018
8199/13520 (epoch 6), train_loss = 1.324, time/batch=0.018
8200/13520 (epoch 6), train_loss = 1.309, time/batch=0.018
8201/13520 (epoch 6), train_loss = 1.275, time/batch=0.018
8202/13520 (epoch 6), train_loss = 1.268, time/batch=0.018
8203/13520 (epoch 6), train_loss = 1.289, time/batch=0.017
8204/13520 (epoch 6), train_loss = 1.236, time/batch=0.0

8331/13520 (epoch 6), train_loss = 1.225, time/batch=0.018
8332/13520 (epoch 6), train_loss = 1.313, time/batch=0.019
8333/13520 (epoch 6), train_loss = 1.246, time/batch=0.018
8334/13520 (epoch 6), train_loss = 1.272, time/batch=0.017
8335/13520 (epoch 6), train_loss = 1.240, time/batch=0.017
8336/13520 (epoch 6), train_loss = 1.282, time/batch=0.018
8337/13520 (epoch 6), train_loss = 1.267, time/batch=0.018
8338/13520 (epoch 6), train_loss = 1.313, time/batch=0.018
8339/13520 (epoch 6), train_loss = 1.226, time/batch=0.019
8340/13520 (epoch 6), train_loss = 1.224, time/batch=0.018
8341/13520 (epoch 6), train_loss = 1.235, time/batch=0.018
8342/13520 (epoch 6), train_loss = 1.262, time/batch=0.018
8343/13520 (epoch 6), train_loss = 1.273, time/batch=0.018
8344/13520 (epoch 6), train_loss = 1.202, time/batch=0.018
8345/13520 (epoch 6), train_loss = 1.192, time/batch=0.017
8346/13520 (epoch 6), train_loss = 1.262, time/batch=0.018
8347/13520 (epoch 6), train_loss = 1.253, time/batch=0.0

8475/13520 (epoch 6), train_loss = 1.306, time/batch=0.018
8476/13520 (epoch 6), train_loss = 1.279, time/batch=0.019
8477/13520 (epoch 6), train_loss = 1.238, time/batch=0.017
8478/13520 (epoch 6), train_loss = 1.238, time/batch=0.018
8479/13520 (epoch 6), train_loss = 1.196, time/batch=0.017
8480/13520 (epoch 6), train_loss = 1.256, time/batch=0.018
8481/13520 (epoch 6), train_loss = 1.323, time/batch=0.018
8482/13520 (epoch 6), train_loss = 1.251, time/batch=0.017
8483/13520 (epoch 6), train_loss = 1.252, time/batch=0.019
8484/13520 (epoch 6), train_loss = 1.277, time/batch=0.018
8485/13520 (epoch 6), train_loss = 1.256, time/batch=0.017
8486/13520 (epoch 6), train_loss = 1.321, time/batch=0.017
8487/13520 (epoch 6), train_loss = 1.349, time/batch=0.017
8488/13520 (epoch 6), train_loss = 1.287, time/batch=0.017
8489/13520 (epoch 6), train_loss = 1.260, time/batch=0.018
8490/13520 (epoch 6), train_loss = 1.326, time/batch=0.018
8491/13520 (epoch 6), train_loss = 1.333, time/batch=0.0

8620/13520 (epoch 6), train_loss = 1.211, time/batch=0.018
8621/13520 (epoch 6), train_loss = 1.237, time/batch=0.018
8622/13520 (epoch 6), train_loss = 1.281, time/batch=0.017
8623/13520 (epoch 6), train_loss = 1.265, time/batch=0.018
8624/13520 (epoch 6), train_loss = 1.269, time/batch=0.018
8625/13520 (epoch 6), train_loss = 1.219, time/batch=0.019
8626/13520 (epoch 6), train_loss = 1.256, time/batch=0.018
8627/13520 (epoch 6), train_loss = 1.332, time/batch=0.018
8628/13520 (epoch 6), train_loss = 1.309, time/batch=0.018
8629/13520 (epoch 6), train_loss = 1.297, time/batch=0.018
8630/13520 (epoch 6), train_loss = 1.271, time/batch=0.018
8631/13520 (epoch 6), train_loss = 1.191, time/batch=0.019
8632/13520 (epoch 6), train_loss = 1.303, time/batch=0.018
8633/13520 (epoch 6), train_loss = 1.297, time/batch=0.017
8634/13520 (epoch 6), train_loss = 1.309, time/batch=0.017
8635/13520 (epoch 6), train_loss = 1.266, time/batch=0.019
8636/13520 (epoch 6), train_loss = 1.258, time/batch=0.0

8760/13520 (epoch 6), train_loss = 1.290, time/batch=0.019
8761/13520 (epoch 6), train_loss = 1.272, time/batch=0.019
8762/13520 (epoch 6), train_loss = 1.313, time/batch=0.018
8763/13520 (epoch 6), train_loss = 1.320, time/batch=0.017
8764/13520 (epoch 6), train_loss = 1.243, time/batch=0.018
8765/13520 (epoch 6), train_loss = 1.253, time/batch=0.018
8766/13520 (epoch 6), train_loss = 1.272, time/batch=0.017
8767/13520 (epoch 6), train_loss = 1.250, time/batch=0.018
8768/13520 (epoch 6), train_loss = 1.244, time/batch=0.018
8769/13520 (epoch 6), train_loss = 1.318, time/batch=0.018
8770/13520 (epoch 6), train_loss = 1.275, time/batch=0.019
8771/13520 (epoch 6), train_loss = 1.205, time/batch=0.018
8772/13520 (epoch 6), train_loss = 1.271, time/batch=0.019
8773/13520 (epoch 6), train_loss = 1.234, time/batch=0.018
8774/13520 (epoch 6), train_loss = 1.190, time/batch=0.019
8775/13520 (epoch 6), train_loss = 1.193, time/batch=0.018
8776/13520 (epoch 6), train_loss = 1.305, time/batch=0.0

8901/13520 (epoch 6), train_loss = 1.209, time/batch=0.017
8902/13520 (epoch 6), train_loss = 1.241, time/batch=0.018
8903/13520 (epoch 6), train_loss = 1.275, time/batch=0.017
8904/13520 (epoch 6), train_loss = 1.221, time/batch=0.018
8905/13520 (epoch 6), train_loss = 1.275, time/batch=0.017
8906/13520 (epoch 6), train_loss = 1.256, time/batch=0.018
8907/13520 (epoch 6), train_loss = 1.253, time/batch=0.018
8908/13520 (epoch 6), train_loss = 1.319, time/batch=0.017
8909/13520 (epoch 6), train_loss = 1.258, time/batch=0.018
8910/13520 (epoch 6), train_loss = 1.313, time/batch=0.018
8911/13520 (epoch 6), train_loss = 1.252, time/batch=0.018
8912/13520 (epoch 6), train_loss = 1.262, time/batch=0.018
8913/13520 (epoch 6), train_loss = 1.265, time/batch=0.018
8914/13520 (epoch 6), train_loss = 1.223, time/batch=0.017
8915/13520 (epoch 6), train_loss = 1.274, time/batch=0.017
8916/13520 (epoch 6), train_loss = 1.216, time/batch=0.017
8917/13520 (epoch 6), train_loss = 1.230, time/batch=0.0

9046/13520 (epoch 6), train_loss = 1.221, time/batch=0.018
9047/13520 (epoch 6), train_loss = 1.229, time/batch=0.018
9048/13520 (epoch 6), train_loss = 1.274, time/batch=0.018
9049/13520 (epoch 6), train_loss = 1.229, time/batch=0.018
9050/13520 (epoch 6), train_loss = 1.234, time/batch=0.018
9051/13520 (epoch 6), train_loss = 1.230, time/batch=0.018
9052/13520 (epoch 6), train_loss = 1.235, time/batch=0.018
9053/13520 (epoch 6), train_loss = 1.219, time/batch=0.018
9054/13520 (epoch 6), train_loss = 1.195, time/batch=0.018
9055/13520 (epoch 6), train_loss = 1.294, time/batch=0.019
9056/13520 (epoch 6), train_loss = 1.243, time/batch=0.018
9057/13520 (epoch 6), train_loss = 1.267, time/batch=0.018
9058/13520 (epoch 6), train_loss = 1.231, time/batch=0.018
9059/13520 (epoch 6), train_loss = 1.207, time/batch=0.018
9060/13520 (epoch 6), train_loss = 1.275, time/batch=0.018
9061/13520 (epoch 6), train_loss = 1.215, time/batch=0.017
9062/13520 (epoch 6), train_loss = 1.295, time/batch=0.0

9189/13520 (epoch 6), train_loss = 1.272, time/batch=0.018
9190/13520 (epoch 6), train_loss = 1.291, time/batch=0.018
9191/13520 (epoch 6), train_loss = 1.280, time/batch=0.018
9192/13520 (epoch 6), train_loss = 1.307, time/batch=0.018
9193/13520 (epoch 6), train_loss = 1.219, time/batch=0.017
9194/13520 (epoch 6), train_loss = 1.287, time/batch=0.018
9195/13520 (epoch 6), train_loss = 1.205, time/batch=0.018
9196/13520 (epoch 6), train_loss = 1.215, time/batch=0.018
9197/13520 (epoch 6), train_loss = 1.322, time/batch=0.017
9198/13520 (epoch 6), train_loss = 1.283, time/batch=0.017
9199/13520 (epoch 6), train_loss = 1.190, time/batch=0.018
9200/13520 (epoch 6), train_loss = 1.246, time/batch=0.018
9201/13520 (epoch 6), train_loss = 1.223, time/batch=0.018
9202/13520 (epoch 6), train_loss = 1.271, time/batch=0.018
9203/13520 (epoch 6), train_loss = 1.258, time/batch=0.018
9204/13520 (epoch 6), train_loss = 1.303, time/batch=0.018
9205/13520 (epoch 6), train_loss = 1.241, time/batch=0.0

9333/13520 (epoch 6), train_loss = 1.235, time/batch=0.018
9334/13520 (epoch 6), train_loss = 1.281, time/batch=0.018
9335/13520 (epoch 6), train_loss = 1.214, time/batch=0.017
9336/13520 (epoch 6), train_loss = 1.269, time/batch=0.017
9337/13520 (epoch 6), train_loss = 1.276, time/batch=0.018
9338/13520 (epoch 6), train_loss = 1.272, time/batch=0.018
9339/13520 (epoch 6), train_loss = 1.267, time/batch=0.018
9340/13520 (epoch 6), train_loss = 1.289, time/batch=0.017
9341/13520 (epoch 6), train_loss = 1.224, time/batch=0.017
9342/13520 (epoch 6), train_loss = 1.250, time/batch=0.018
9343/13520 (epoch 6), train_loss = 1.266, time/batch=0.018
9344/13520 (epoch 6), train_loss = 1.337, time/batch=0.017
9345/13520 (epoch 6), train_loss = 1.294, time/batch=0.018
9346/13520 (epoch 6), train_loss = 1.199, time/batch=0.018
9347/13520 (epoch 6), train_loss = 1.235, time/batch=0.018
9348/13520 (epoch 6), train_loss = 1.237, time/batch=0.019
9349/13520 (epoch 6), train_loss = 1.259, time/batch=0.0

9476/13520 (epoch 7), train_loss = 1.238, time/batch=0.017
9477/13520 (epoch 7), train_loss = 1.277, time/batch=0.018
9478/13520 (epoch 7), train_loss = 1.269, time/batch=0.018
9479/13520 (epoch 7), train_loss = 1.251, time/batch=0.018
9480/13520 (epoch 7), train_loss = 1.267, time/batch=0.018
9481/13520 (epoch 7), train_loss = 1.271, time/batch=0.018
9482/13520 (epoch 7), train_loss = 1.268, time/batch=0.017
9483/13520 (epoch 7), train_loss = 1.261, time/batch=0.018
9484/13520 (epoch 7), train_loss = 1.254, time/batch=0.018
9485/13520 (epoch 7), train_loss = 1.209, time/batch=0.017
9486/13520 (epoch 7), train_loss = 1.308, time/batch=0.018
9487/13520 (epoch 7), train_loss = 1.303, time/batch=0.018
9488/13520 (epoch 7), train_loss = 1.350, time/batch=0.017
9489/13520 (epoch 7), train_loss = 1.166, time/batch=0.018
9490/13520 (epoch 7), train_loss = 1.252, time/batch=0.017
9491/13520 (epoch 7), train_loss = 1.294, time/batch=0.018
9492/13520 (epoch 7), train_loss = 1.324, time/batch=0.0

9620/13520 (epoch 7), train_loss = 1.284, time/batch=0.018
9621/13520 (epoch 7), train_loss = 1.278, time/batch=0.018
9622/13520 (epoch 7), train_loss = 1.267, time/batch=0.018
9623/13520 (epoch 7), train_loss = 1.259, time/batch=0.017
9624/13520 (epoch 7), train_loss = 1.225, time/batch=0.017
9625/13520 (epoch 7), train_loss = 1.205, time/batch=0.017
9626/13520 (epoch 7), train_loss = 1.223, time/batch=0.018
9627/13520 (epoch 7), train_loss = 1.271, time/batch=0.018
9628/13520 (epoch 7), train_loss = 1.165, time/batch=0.017
9629/13520 (epoch 7), train_loss = 1.255, time/batch=0.017
9630/13520 (epoch 7), train_loss = 1.245, time/batch=0.018
9631/13520 (epoch 7), train_loss = 1.226, time/batch=0.018
9632/13520 (epoch 7), train_loss = 1.267, time/batch=0.018
9633/13520 (epoch 7), train_loss = 1.271, time/batch=0.018
9634/13520 (epoch 7), train_loss = 1.257, time/batch=0.018
9635/13520 (epoch 7), train_loss = 1.241, time/batch=0.018
9636/13520 (epoch 7), train_loss = 1.294, time/batch=0.0

9764/13520 (epoch 7), train_loss = 1.227, time/batch=0.018
9765/13520 (epoch 7), train_loss = 1.288, time/batch=0.018
9766/13520 (epoch 7), train_loss = 1.246, time/batch=0.018
9767/13520 (epoch 7), train_loss = 1.276, time/batch=0.017
9768/13520 (epoch 7), train_loss = 1.286, time/batch=0.017
9769/13520 (epoch 7), train_loss = 1.289, time/batch=0.017
9770/13520 (epoch 7), train_loss = 1.298, time/batch=0.017
9771/13520 (epoch 7), train_loss = 1.295, time/batch=0.019
9772/13520 (epoch 7), train_loss = 1.286, time/batch=0.018
9773/13520 (epoch 7), train_loss = 1.281, time/batch=0.018
9774/13520 (epoch 7), train_loss = 1.245, time/batch=0.018
9775/13520 (epoch 7), train_loss = 1.212, time/batch=0.018
9776/13520 (epoch 7), train_loss = 1.229, time/batch=0.018
9777/13520 (epoch 7), train_loss = 1.288, time/batch=0.018
9778/13520 (epoch 7), train_loss = 1.260, time/batch=0.017
9779/13520 (epoch 7), train_loss = 1.324, time/batch=0.018
9780/13520 (epoch 7), train_loss = 1.266, time/batch=0.0

9908/13520 (epoch 7), train_loss = 1.271, time/batch=0.019
9909/13520 (epoch 7), train_loss = 1.321, time/batch=0.018
9910/13520 (epoch 7), train_loss = 1.217, time/batch=0.018
9911/13520 (epoch 7), train_loss = 1.225, time/batch=0.018
9912/13520 (epoch 7), train_loss = 1.184, time/batch=0.018
9913/13520 (epoch 7), train_loss = 1.215, time/batch=0.019
9914/13520 (epoch 7), train_loss = 1.175, time/batch=0.018
9915/13520 (epoch 7), train_loss = 1.283, time/batch=0.017
9916/13520 (epoch 7), train_loss = 1.303, time/batch=0.018
9917/13520 (epoch 7), train_loss = 1.195, time/batch=0.017
9918/13520 (epoch 7), train_loss = 1.188, time/batch=0.017
9919/13520 (epoch 7), train_loss = 1.219, time/batch=0.018
9920/13520 (epoch 7), train_loss = 1.270, time/batch=0.018
9921/13520 (epoch 7), train_loss = 1.258, time/batch=0.018
9922/13520 (epoch 7), train_loss = 1.279, time/batch=0.018
9923/13520 (epoch 7), train_loss = 1.251, time/batch=0.017
9924/13520 (epoch 7), train_loss = 1.247, time/batch=0.0

10046/13520 (epoch 7), train_loss = 1.222, time/batch=0.017
10047/13520 (epoch 7), train_loss = 1.212, time/batch=0.018
10048/13520 (epoch 7), train_loss = 1.245, time/batch=0.018
10049/13520 (epoch 7), train_loss = 1.306, time/batch=0.018
10050/13520 (epoch 7), train_loss = 1.234, time/batch=0.017
10051/13520 (epoch 7), train_loss = 1.318, time/batch=0.018
10052/13520 (epoch 7), train_loss = 1.226, time/batch=0.018
10053/13520 (epoch 7), train_loss = 1.264, time/batch=0.017
10054/13520 (epoch 7), train_loss = 1.303, time/batch=0.018
10055/13520 (epoch 7), train_loss = 1.205, time/batch=0.018
10056/13520 (epoch 7), train_loss = 1.226, time/batch=0.017
10057/13520 (epoch 7), train_loss = 1.247, time/batch=0.018
10058/13520 (epoch 7), train_loss = 1.200, time/batch=0.018
10059/13520 (epoch 7), train_loss = 1.244, time/batch=0.018
10060/13520 (epoch 7), train_loss = 1.244, time/batch=0.017
10061/13520 (epoch 7), train_loss = 1.226, time/batch=0.017
10062/13520 (epoch 7), train_loss = 1.23

10190/13520 (epoch 7), train_loss = 1.253, time/batch=0.017
10191/13520 (epoch 7), train_loss = 1.257, time/batch=0.017
10192/13520 (epoch 7), train_loss = 1.264, time/batch=0.018
10193/13520 (epoch 7), train_loss = 1.244, time/batch=0.018
10194/13520 (epoch 7), train_loss = 1.238, time/batch=0.018
10195/13520 (epoch 7), train_loss = 1.273, time/batch=0.018
10196/13520 (epoch 7), train_loss = 1.249, time/batch=0.018
10197/13520 (epoch 7), train_loss = 1.257, time/batch=0.017
10198/13520 (epoch 7), train_loss = 1.265, time/batch=0.018
10199/13520 (epoch 7), train_loss = 1.207, time/batch=0.018
10200/13520 (epoch 7), train_loss = 1.310, time/batch=0.018
10201/13520 (epoch 7), train_loss = 1.226, time/batch=0.018
10202/13520 (epoch 7), train_loss = 1.194, time/batch=0.017
10203/13520 (epoch 7), train_loss = 1.275, time/batch=0.018
10204/13520 (epoch 7), train_loss = 1.183, time/batch=0.018
10205/13520 (epoch 7), train_loss = 1.210, time/batch=0.018
10206/13520 (epoch 7), train_loss = 1.18

10334/13520 (epoch 7), train_loss = 1.262, time/batch=0.018
10335/13520 (epoch 7), train_loss = 1.241, time/batch=0.018
10336/13520 (epoch 7), train_loss = 1.279, time/batch=0.018
10337/13520 (epoch 7), train_loss = 1.234, time/batch=0.017
10338/13520 (epoch 7), train_loss = 1.265, time/batch=0.017
10339/13520 (epoch 7), train_loss = 1.315, time/batch=0.017
10340/13520 (epoch 7), train_loss = 1.287, time/batch=0.017
10341/13520 (epoch 7), train_loss = 1.179, time/batch=0.018
10342/13520 (epoch 7), train_loss = 1.237, time/batch=0.018
10343/13520 (epoch 7), train_loss = 1.156, time/batch=0.018
10344/13520 (epoch 7), train_loss = 1.204, time/batch=0.017
10345/13520 (epoch 7), train_loss = 1.229, time/batch=0.018
10346/13520 (epoch 7), train_loss = 1.240, time/batch=0.018
10347/13520 (epoch 7), train_loss = 1.253, time/batch=0.018
10348/13520 (epoch 7), train_loss = 1.221, time/batch=0.018
10349/13520 (epoch 7), train_loss = 1.243, time/batch=0.017
10350/13520 (epoch 7), train_loss = 1.21

10478/13520 (epoch 7), train_loss = 1.207, time/batch=0.018
10479/13520 (epoch 7), train_loss = 1.233, time/batch=0.018
10480/13520 (epoch 7), train_loss = 1.203, time/batch=0.018
10481/13520 (epoch 7), train_loss = 1.259, time/batch=0.018
10482/13520 (epoch 7), train_loss = 1.230, time/batch=0.018
10483/13520 (epoch 7), train_loss = 1.244, time/batch=0.017
10484/13520 (epoch 7), train_loss = 1.219, time/batch=0.018
10485/13520 (epoch 7), train_loss = 1.210, time/batch=0.018
10486/13520 (epoch 7), train_loss = 1.219, time/batch=0.017
10487/13520 (epoch 7), train_loss = 1.220, time/batch=0.018
10488/13520 (epoch 7), train_loss = 1.223, time/batch=0.018
10489/13520 (epoch 7), train_loss = 1.167, time/batch=0.018
10490/13520 (epoch 7), train_loss = 1.274, time/batch=0.018
10491/13520 (epoch 7), train_loss = 1.187, time/batch=0.017
10492/13520 (epoch 7), train_loss = 1.275, time/batch=0.018
10493/13520 (epoch 7), train_loss = 1.256, time/batch=0.018
10494/13520 (epoch 7), train_loss = 1.26

10620/13520 (epoch 7), train_loss = 1.235, time/batch=0.018
10621/13520 (epoch 7), train_loss = 1.175, time/batch=0.018
10622/13520 (epoch 7), train_loss = 1.296, time/batch=0.018
10623/13520 (epoch 7), train_loss = 1.278, time/batch=0.018
10624/13520 (epoch 7), train_loss = 1.206, time/batch=0.017
10625/13520 (epoch 7), train_loss = 1.212, time/batch=0.018
10626/13520 (epoch 7), train_loss = 1.227, time/batch=0.018
10627/13520 (epoch 7), train_loss = 1.194, time/batch=0.018
10628/13520 (epoch 7), train_loss = 1.254, time/batch=0.018
10629/13520 (epoch 7), train_loss = 1.200, time/batch=0.018
10630/13520 (epoch 7), train_loss = 1.213, time/batch=0.017
10631/13520 (epoch 7), train_loss = 1.231, time/batch=0.018
10632/13520 (epoch 7), train_loss = 1.264, time/batch=0.018
10633/13520 (epoch 7), train_loss = 1.168, time/batch=0.018
10634/13520 (epoch 7), train_loss = 1.207, time/batch=0.018
10635/13520 (epoch 7), train_loss = 1.212, time/batch=0.017
10636/13520 (epoch 7), train_loss = 1.22

10763/13520 (epoch 7), train_loss = 1.248, time/batch=0.018
10764/13520 (epoch 7), train_loss = 1.295, time/batch=0.019
10765/13520 (epoch 7), train_loss = 1.246, time/batch=0.018
10766/13520 (epoch 7), train_loss = 1.211, time/batch=0.018
10767/13520 (epoch 7), train_loss = 1.254, time/batch=0.018
10768/13520 (epoch 7), train_loss = 1.250, time/batch=0.018
10769/13520 (epoch 7), train_loss = 1.196, time/batch=0.018
10770/13520 (epoch 7), train_loss = 1.264, time/batch=0.018
10771/13520 (epoch 7), train_loss = 1.266, time/batch=0.018
10772/13520 (epoch 7), train_loss = 1.263, time/batch=0.018
10773/13520 (epoch 7), train_loss = 1.236, time/batch=0.018
10774/13520 (epoch 7), train_loss = 1.199, time/batch=0.018
10775/13520 (epoch 7), train_loss = 1.241, time/batch=0.018
10776/13520 (epoch 7), train_loss = 1.243, time/batch=0.017
10777/13520 (epoch 7), train_loss = 1.293, time/batch=0.018
10778/13520 (epoch 7), train_loss = 1.298, time/batch=0.018
10779/13520 (epoch 7), train_loss = 1.25

10903/13520 (epoch 8), train_loss = 1.314, time/batch=0.018
10904/13520 (epoch 8), train_loss = 1.285, time/batch=0.018
10905/13520 (epoch 8), train_loss = 1.273, time/batch=0.018
10906/13520 (epoch 8), train_loss = 1.253, time/batch=0.018
10907/13520 (epoch 8), train_loss = 1.278, time/batch=0.017
10908/13520 (epoch 8), train_loss = 1.220, time/batch=0.018
10909/13520 (epoch 8), train_loss = 1.226, time/batch=0.017
10910/13520 (epoch 8), train_loss = 1.227, time/batch=0.017
10911/13520 (epoch 8), train_loss = 1.177, time/batch=0.017
10912/13520 (epoch 8), train_loss = 1.187, time/batch=0.017
10913/13520 (epoch 8), train_loss = 1.254, time/batch=0.017
10914/13520 (epoch 8), train_loss = 1.245, time/batch=0.018
10915/13520 (epoch 8), train_loss = 1.300, time/batch=0.018
10916/13520 (epoch 8), train_loss = 1.171, time/batch=0.018
10917/13520 (epoch 8), train_loss = 1.250, time/batch=0.018
10918/13520 (epoch 8), train_loss = 1.281, time/batch=0.017
10919/13520 (epoch 8), train_loss = 1.17

11047/13520 (epoch 8), train_loss = 1.253, time/batch=0.018
11048/13520 (epoch 8), train_loss = 1.192, time/batch=0.018
11049/13520 (epoch 8), train_loss = 1.180, time/batch=0.018
11050/13520 (epoch 8), train_loss = 1.243, time/batch=0.018
11051/13520 (epoch 8), train_loss = 1.232, time/batch=0.018
11052/13520 (epoch 8), train_loss = 1.227, time/batch=0.018
11053/13520 (epoch 8), train_loss = 1.249, time/batch=0.018
11054/13520 (epoch 8), train_loss = 1.304, time/batch=0.018
11055/13520 (epoch 8), train_loss = 1.257, time/batch=0.018
11056/13520 (epoch 8), train_loss = 1.260, time/batch=0.018
11057/13520 (epoch 8), train_loss = 1.202, time/batch=0.018
11058/13520 (epoch 8), train_loss = 1.229, time/batch=0.018
11059/13520 (epoch 8), train_loss = 1.263, time/batch=0.018
11060/13520 (epoch 8), train_loss = 1.174, time/batch=0.018
11061/13520 (epoch 8), train_loss = 1.262, time/batch=0.019
11062/13520 (epoch 8), train_loss = 1.225, time/batch=0.018
11063/13520 (epoch 8), train_loss = 1.23

11191/13520 (epoch 8), train_loss = 1.335, time/batch=0.017
11192/13520 (epoch 8), train_loss = 1.273, time/batch=0.018
11193/13520 (epoch 8), train_loss = 1.246, time/batch=0.018
11194/13520 (epoch 8), train_loss = 1.313, time/batch=0.017
11195/13520 (epoch 8), train_loss = 1.312, time/batch=0.017
11196/13520 (epoch 8), train_loss = 1.283, time/batch=0.017
11197/13520 (epoch 8), train_loss = 1.236, time/batch=0.018
11198/13520 (epoch 8), train_loss = 1.243, time/batch=0.018
11199/13520 (epoch 8), train_loss = 1.278, time/batch=0.020
11200/13520 (epoch 8), train_loss = 1.315, time/batch=0.018
11201/13520 (epoch 8), train_loss = 1.288, time/batch=0.017
11202/13520 (epoch 8), train_loss = 1.240, time/batch=0.017
11203/13520 (epoch 8), train_loss = 1.239, time/batch=0.018
11204/13520 (epoch 8), train_loss = 1.271, time/batch=0.018
11205/13520 (epoch 8), train_loss = 1.249, time/batch=0.018
11206/13520 (epoch 8), train_loss = 1.254, time/batch=0.018
11207/13520 (epoch 8), train_loss = 1.23

11334/13520 (epoch 8), train_loss = 1.256, time/batch=0.017
11335/13520 (epoch 8), train_loss = 1.179, time/batch=0.018
11336/13520 (epoch 8), train_loss = 1.282, time/batch=0.017
11337/13520 (epoch 8), train_loss = 1.281, time/batch=0.018
11338/13520 (epoch 8), train_loss = 1.299, time/batch=0.018
11339/13520 (epoch 8), train_loss = 1.247, time/batch=0.017
11340/13520 (epoch 8), train_loss = 1.244, time/batch=0.018
11341/13520 (epoch 8), train_loss = 1.243, time/batch=0.018
11342/13520 (epoch 8), train_loss = 1.280, time/batch=0.018
11343/13520 (epoch 8), train_loss = 1.203, time/batch=0.018
11344/13520 (epoch 8), train_loss = 1.265, time/batch=0.018
11345/13520 (epoch 8), train_loss = 1.230, time/batch=0.019
11346/13520 (epoch 8), train_loss = 1.273, time/batch=0.018
11347/13520 (epoch 8), train_loss = 1.271, time/batch=0.018
11348/13520 (epoch 8), train_loss = 1.265, time/batch=0.017
11349/13520 (epoch 8), train_loss = 1.213, time/batch=0.018
11350/13520 (epoch 8), train_loss = 1.22

11478/13520 (epoch 8), train_loss = 1.178, time/batch=0.018
11479/13520 (epoch 8), train_loss = 1.184, time/batch=0.018
11480/13520 (epoch 8), train_loss = 1.294, time/batch=0.018
11481/13520 (epoch 8), train_loss = 1.249, time/batch=0.018
11482/13520 (epoch 8), train_loss = 1.273, time/batch=0.017
11483/13520 (epoch 8), train_loss = 1.233, time/batch=0.018
11484/13520 (epoch 8), train_loss = 1.306, time/batch=0.018
11485/13520 (epoch 8), train_loss = 1.219, time/batch=0.018
11486/13520 (epoch 8), train_loss = 1.147, time/batch=0.018
11487/13520 (epoch 8), train_loss = 1.236, time/batch=0.018
11488/13520 (epoch 8), train_loss = 1.177, time/batch=0.018
11489/13520 (epoch 8), train_loss = 1.311, time/batch=0.018
11490/13520 (epoch 8), train_loss = 1.292, time/batch=0.019
11491/13520 (epoch 8), train_loss = 1.211, time/batch=0.018
11492/13520 (epoch 8), train_loss = 1.255, time/batch=0.018
11493/13520 (epoch 8), train_loss = 1.276, time/batch=0.017
11494/13520 (epoch 8), train_loss = 1.28

11620/13520 (epoch 8), train_loss = 1.209, time/batch=0.017
11621/13520 (epoch 8), train_loss = 1.212, time/batch=0.018
11622/13520 (epoch 8), train_loss = 1.256, time/batch=0.018
11623/13520 (epoch 8), train_loss = 1.229, time/batch=0.018
11624/13520 (epoch 8), train_loss = 1.162, time/batch=0.018
11625/13520 (epoch 8), train_loss = 1.270, time/batch=0.018
11626/13520 (epoch 8), train_loss = 1.208, time/batch=0.018
11627/13520 (epoch 8), train_loss = 1.272, time/batch=0.018
11628/13520 (epoch 8), train_loss = 1.261, time/batch=0.018
11629/13520 (epoch 8), train_loss = 1.279, time/batch=0.018
11630/13520 (epoch 8), train_loss = 1.279, time/batch=0.018
11631/13520 (epoch 8), train_loss = 1.197, time/batch=0.018
11632/13520 (epoch 8), train_loss = 1.288, time/batch=0.017
11633/13520 (epoch 8), train_loss = 1.204, time/batch=0.019
11634/13520 (epoch 8), train_loss = 1.226, time/batch=0.022
11635/13520 (epoch 8), train_loss = 1.241, time/batch=0.018
11636/13520 (epoch 8), train_loss = 1.18

11761/13520 (epoch 8), train_loss = 1.247, time/batch=0.018
11762/13520 (epoch 8), train_loss = 1.215, time/batch=0.018
11763/13520 (epoch 8), train_loss = 1.201, time/batch=0.018
11764/13520 (epoch 8), train_loss = 1.260, time/batch=0.021
11765/13520 (epoch 8), train_loss = 1.200, time/batch=0.021
11766/13520 (epoch 8), train_loss = 1.281, time/batch=0.017
11767/13520 (epoch 8), train_loss = 1.200, time/batch=0.018
11768/13520 (epoch 8), train_loss = 1.278, time/batch=0.018
11769/13520 (epoch 8), train_loss = 1.198, time/batch=0.018
11770/13520 (epoch 8), train_loss = 1.184, time/batch=0.018
11771/13520 (epoch 8), train_loss = 1.282, time/batch=0.018
11772/13520 (epoch 8), train_loss = 1.253, time/batch=0.018
11773/13520 (epoch 8), train_loss = 1.258, time/batch=0.018
11774/13520 (epoch 8), train_loss = 1.191, time/batch=0.018
11775/13520 (epoch 8), train_loss = 1.231, time/batch=0.018
11776/13520 (epoch 8), train_loss = 1.203, time/batch=0.018
11777/13520 (epoch 8), train_loss = 1.15

11904/13520 (epoch 8), train_loss = 1.237, time/batch=0.017
11905/13520 (epoch 8), train_loss = 1.210, time/batch=0.017
11906/13520 (epoch 8), train_loss = 1.262, time/batch=0.018
11907/13520 (epoch 8), train_loss = 1.241, time/batch=0.018
11908/13520 (epoch 8), train_loss = 1.286, time/batch=0.018
11909/13520 (epoch 8), train_loss = 1.226, time/batch=0.018
11910/13520 (epoch 8), train_loss = 1.231, time/batch=0.018
11911/13520 (epoch 8), train_loss = 1.206, time/batch=0.018
11912/13520 (epoch 8), train_loss = 1.222, time/batch=0.018
11913/13520 (epoch 8), train_loss = 1.226, time/batch=0.018
11914/13520 (epoch 8), train_loss = 1.188, time/batch=0.018
11915/13520 (epoch 8), train_loss = 1.165, time/batch=0.018
11916/13520 (epoch 8), train_loss = 1.220, time/batch=0.018
11917/13520 (epoch 8), train_loss = 1.224, time/batch=0.017
11918/13520 (epoch 8), train_loss = 1.186, time/batch=0.018
11919/13520 (epoch 8), train_loss = 1.187, time/batch=0.018
11920/13520 (epoch 8), train_loss = 1.29

12048/13520 (epoch 8), train_loss = 1.326, time/batch=0.018
12049/13520 (epoch 8), train_loss = 1.274, time/batch=0.018
12050/13520 (epoch 8), train_loss = 1.180, time/batch=0.018
12051/13520 (epoch 8), train_loss = 1.221, time/batch=0.018
12052/13520 (epoch 8), train_loss = 1.220, time/batch=0.019
12053/13520 (epoch 8), train_loss = 1.248, time/batch=0.017
12054/13520 (epoch 8), train_loss = 1.215, time/batch=0.017
12055/13520 (epoch 8), train_loss = 1.192, time/batch=0.018
12056/13520 (epoch 8), train_loss = 1.282, time/batch=0.018
12057/13520 (epoch 8), train_loss = 1.207, time/batch=0.018
12058/13520 (epoch 8), train_loss = 1.236, time/batch=0.017
12059/13520 (epoch 8), train_loss = 1.268, time/batch=0.018
12060/13520 (epoch 8), train_loss = 1.323, time/batch=0.018
12061/13520 (epoch 8), train_loss = 1.231, time/batch=0.018
12062/13520 (epoch 8), train_loss = 1.224, time/batch=0.017
12063/13520 (epoch 8), train_loss = 1.232, time/batch=0.018
12064/13520 (epoch 8), train_loss = 1.26

12192/13520 (epoch 9), train_loss = 1.335, time/batch=0.019
12193/13520 (epoch 9), train_loss = 1.155, time/batch=0.017
12194/13520 (epoch 9), train_loss = 1.243, time/batch=0.018
12195/13520 (epoch 9), train_loss = 1.280, time/batch=0.018
12196/13520 (epoch 9), train_loss = 1.304, time/batch=0.018
12197/13520 (epoch 9), train_loss = 1.219, time/batch=0.018
12198/13520 (epoch 9), train_loss = 1.221, time/batch=0.018
12199/13520 (epoch 9), train_loss = 1.309, time/batch=0.018
12200/13520 (epoch 9), train_loss = 1.232, time/batch=0.018
12201/13520 (epoch 9), train_loss = 1.293, time/batch=0.018
12202/13520 (epoch 9), train_loss = 1.271, time/batch=0.018
12203/13520 (epoch 9), train_loss = 1.244, time/batch=0.018
12204/13520 (epoch 9), train_loss = 1.201, time/batch=0.017
12205/13520 (epoch 9), train_loss = 1.263, time/batch=0.018
12206/13520 (epoch 9), train_loss = 1.268, time/batch=0.019
12207/13520 (epoch 9), train_loss = 1.237, time/batch=0.018
12208/13520 (epoch 9), train_loss = 1.17

12335/13520 (epoch 9), train_loss = 1.213, time/batch=0.017
12336/13520 (epoch 9), train_loss = 1.257, time/batch=0.017
12337/13520 (epoch 9), train_loss = 1.254, time/batch=0.018
12338/13520 (epoch 9), train_loss = 1.244, time/batch=0.017
12339/13520 (epoch 9), train_loss = 1.225, time/batch=0.018
12340/13520 (epoch 9), train_loss = 1.280, time/batch=0.018
12341/13520 (epoch 9), train_loss = 1.209, time/batch=0.017
12342/13520 (epoch 9), train_loss = 1.225, time/batch=0.018
12343/13520 (epoch 9), train_loss = 1.180, time/batch=0.018
12344/13520 (epoch 9), train_loss = 1.234, time/batch=0.018
12345/13520 (epoch 9), train_loss = 1.282, time/batch=0.018
12346/13520 (epoch 9), train_loss = 1.205, time/batch=0.018
12347/13520 (epoch 9), train_loss = 1.223, time/batch=0.018
12348/13520 (epoch 9), train_loss = 1.185, time/batch=0.019
12349/13520 (epoch 9), train_loss = 1.184, time/batch=0.017
12350/13520 (epoch 9), train_loss = 1.214, time/batch=0.018
12351/13520 (epoch 9), train_loss = 1.30

12478/13520 (epoch 9), train_loss = 1.238, time/batch=0.018
12479/13520 (epoch 9), train_loss = 1.202, time/batch=0.018
12480/13520 (epoch 9), train_loss = 1.217, time/batch=0.017
12481/13520 (epoch 9), train_loss = 1.275, time/batch=0.018
12482/13520 (epoch 9), train_loss = 1.250, time/batch=0.018
12483/13520 (epoch 9), train_loss = 1.306, time/batch=0.017
12484/13520 (epoch 9), train_loss = 1.252, time/batch=0.018
12485/13520 (epoch 9), train_loss = 1.258, time/batch=0.019
12486/13520 (epoch 9), train_loss = 1.253, time/batch=0.018
12487/13520 (epoch 9), train_loss = 1.273, time/batch=0.018
12488/13520 (epoch 9), train_loss = 1.314, time/batch=0.018
12489/13520 (epoch 9), train_loss = 1.256, time/batch=0.018
12490/13520 (epoch 9), train_loss = 1.241, time/batch=0.018
12491/13520 (epoch 9), train_loss = 1.221, time/batch=0.019
12492/13520 (epoch 9), train_loss = 1.243, time/batch=0.018
12493/13520 (epoch 9), train_loss = 1.212, time/batch=0.018
12494/13520 (epoch 9), train_loss = 1.19

12618/13520 (epoch 9), train_loss = 1.163, time/batch=0.018
12619/13520 (epoch 9), train_loss = 1.275, time/batch=0.018
12620/13520 (epoch 9), train_loss = 1.293, time/batch=0.018
12621/13520 (epoch 9), train_loss = 1.176, time/batch=0.018
12622/13520 (epoch 9), train_loss = 1.172, time/batch=0.018
12623/13520 (epoch 9), train_loss = 1.203, time/batch=0.018
12624/13520 (epoch 9), train_loss = 1.251, time/batch=0.018
12625/13520 (epoch 9), train_loss = 1.242, time/batch=0.018
12626/13520 (epoch 9), train_loss = 1.258, time/batch=0.018
12627/13520 (epoch 9), train_loss = 1.232, time/batch=0.019
12628/13520 (epoch 9), train_loss = 1.243, time/batch=0.017
12629/13520 (epoch 9), train_loss = 1.187, time/batch=0.018
12630/13520 (epoch 9), train_loss = 1.253, time/batch=0.018
12631/13520 (epoch 9), train_loss = 1.249, time/batch=0.017
12632/13520 (epoch 9), train_loss = 1.221, time/batch=0.018
12633/13520 (epoch 9), train_loss = 1.229, time/batch=0.018
12634/13520 (epoch 9), train_loss = 1.21

12760/13520 (epoch 9), train_loss = 1.213, time/batch=0.018
12761/13520 (epoch 9), train_loss = 1.234, time/batch=0.018
12762/13520 (epoch 9), train_loss = 1.193, time/batch=0.017
12763/13520 (epoch 9), train_loss = 1.227, time/batch=0.017
12764/13520 (epoch 9), train_loss = 1.229, time/batch=0.017
12765/13520 (epoch 9), train_loss = 1.212, time/batch=0.017
12766/13520 (epoch 9), train_loss = 1.238, time/batch=0.017
12767/13520 (epoch 9), train_loss = 1.230, time/batch=0.018
12768/13520 (epoch 9), train_loss = 1.263, time/batch=0.017
12769/13520 (epoch 9), train_loss = 1.177, time/batch=0.018
12770/13520 (epoch 9), train_loss = 1.264, time/batch=0.018
12771/13520 (epoch 9), train_loss = 1.218, time/batch=0.018
12772/13520 (epoch 9), train_loss = 1.237, time/batch=0.018
12773/13520 (epoch 9), train_loss = 1.207, time/batch=0.018
12774/13520 (epoch 9), train_loss = 1.283, time/batch=0.017
12775/13520 (epoch 9), train_loss = 1.220, time/batch=0.017
12776/13520 (epoch 9), train_loss = 1.28

12903/13520 (epoch 9), train_loss = 1.195, time/batch=0.019
12904/13520 (epoch 9), train_loss = 1.298, time/batch=0.017
12905/13520 (epoch 9), train_loss = 1.215, time/batch=0.018
12906/13520 (epoch 9), train_loss = 1.185, time/batch=0.018
12907/13520 (epoch 9), train_loss = 1.258, time/batch=0.018
12908/13520 (epoch 9), train_loss = 1.173, time/batch=0.018
12909/13520 (epoch 9), train_loss = 1.202, time/batch=0.017
12910/13520 (epoch 9), train_loss = 1.178, time/batch=0.018
12911/13520 (epoch 9), train_loss = 1.215, time/batch=0.018
12912/13520 (epoch 9), train_loss = 1.249, time/batch=0.018
12913/13520 (epoch 9), train_loss = 1.259, time/batch=0.018
12914/13520 (epoch 9), train_loss = 1.216, time/batch=0.018
12915/13520 (epoch 9), train_loss = 1.231, time/batch=0.018
12916/13520 (epoch 9), train_loss = 1.242, time/batch=0.018
12917/13520 (epoch 9), train_loss = 1.215, time/batch=0.018
12918/13520 (epoch 9), train_loss = 1.166, time/batch=0.018
12919/13520 (epoch 9), train_loss = 1.21

13047/13520 (epoch 9), train_loss = 1.150, time/batch=0.018
13048/13520 (epoch 9), train_loss = 1.196, time/batch=0.018
13049/13520 (epoch 9), train_loss = 1.211, time/batch=0.018
13050/13520 (epoch 9), train_loss = 1.226, time/batch=0.018
13051/13520 (epoch 9), train_loss = 1.248, time/batch=0.018
13052/13520 (epoch 9), train_loss = 1.208, time/batch=0.018
13053/13520 (epoch 9), train_loss = 1.236, time/batch=0.018
13054/13520 (epoch 9), train_loss = 1.210, time/batch=0.019
13055/13520 (epoch 9), train_loss = 1.179, time/batch=0.018
13056/13520 (epoch 9), train_loss = 1.302, time/batch=0.018
13057/13520 (epoch 9), train_loss = 1.253, time/batch=0.018
13058/13520 (epoch 9), train_loss = 1.298, time/batch=0.017
13059/13520 (epoch 9), train_loss = 1.289, time/batch=0.019
13060/13520 (epoch 9), train_loss = 1.255, time/batch=0.018
13061/13520 (epoch 9), train_loss = 1.226, time/batch=0.018
13062/13520 (epoch 9), train_loss = 1.262, time/batch=0.018
13063/13520 (epoch 9), train_loss = 1.23

13191/13520 (epoch 9), train_loss = 1.210, time/batch=0.018
13192/13520 (epoch 9), train_loss = 1.222, time/batch=0.019
13193/13520 (epoch 9), train_loss = 1.156, time/batch=0.019
13194/13520 (epoch 9), train_loss = 1.264, time/batch=0.018
13195/13520 (epoch 9), train_loss = 1.172, time/batch=0.017
13196/13520 (epoch 9), train_loss = 1.260, time/batch=0.018
13197/13520 (epoch 9), train_loss = 1.247, time/batch=0.018
13198/13520 (epoch 9), train_loss = 1.255, time/batch=0.018
13199/13520 (epoch 9), train_loss = 1.280, time/batch=0.018
13200/13520 (epoch 9), train_loss = 1.237, time/batch=0.017
13201/13520 (epoch 9), train_loss = 1.185, time/batch=0.018
13202/13520 (epoch 9), train_loss = 1.229, time/batch=0.018
13203/13520 (epoch 9), train_loss = 1.208, time/batch=0.017
13204/13520 (epoch 9), train_loss = 1.199, time/batch=0.018
13205/13520 (epoch 9), train_loss = 1.178, time/batch=0.019
13206/13520 (epoch 9), train_loss = 1.223, time/batch=0.018
13207/13520 (epoch 9), train_loss = 1.20

13332/13520 (epoch 9), train_loss = 1.242, time/batch=0.019
13333/13520 (epoch 9), train_loss = 1.192, time/batch=0.019
13334/13520 (epoch 9), train_loss = 1.203, time/batch=0.018
13335/13520 (epoch 9), train_loss = 1.227, time/batch=0.018
13336/13520 (epoch 9), train_loss = 1.255, time/batch=0.017
13337/13520 (epoch 9), train_loss = 1.157, time/batch=0.018
13338/13520 (epoch 9), train_loss = 1.194, time/batch=0.017
13339/13520 (epoch 9), train_loss = 1.200, time/batch=0.017
13340/13520 (epoch 9), train_loss = 1.211, time/batch=0.018
13341/13520 (epoch 9), train_loss = 1.205, time/batch=0.018
13342/13520 (epoch 9), train_loss = 1.196, time/batch=0.017
13343/13520 (epoch 9), train_loss = 1.233, time/batch=0.018
13344/13520 (epoch 9), train_loss = 1.234, time/batch=0.017
13345/13520 (epoch 9), train_loss = 1.239, time/batch=0.018
13346/13520 (epoch 9), train_loss = 1.266, time/batch=0.018
13347/13520 (epoch 9), train_loss = 1.325, time/batch=0.018
13348/13520 (epoch 9), train_loss = 1.28

13476/13520 (epoch 9), train_loss = 1.246, time/batch=0.018
13477/13520 (epoch 9), train_loss = 1.226, time/batch=0.018
13478/13520 (epoch 9), train_loss = 1.186, time/batch=0.018
13479/13520 (epoch 9), train_loss = 1.220, time/batch=0.018
13480/13520 (epoch 9), train_loss = 1.230, time/batch=0.018
13481/13520 (epoch 9), train_loss = 1.285, time/batch=0.017
13482/13520 (epoch 9), train_loss = 1.285, time/batch=0.018
13483/13520 (epoch 9), train_loss = 1.240, time/batch=0.017
13484/13520 (epoch 9), train_loss = 1.242, time/batch=0.018
13485/13520 (epoch 9), train_loss = 1.278, time/batch=0.018
13486/13520 (epoch 9), train_loss = 1.242, time/batch=0.018
13487/13520 (epoch 9), train_loss = 1.226, time/batch=0.018
13488/13520 (epoch 9), train_loss = 1.233, time/batch=0.018
13489/13520 (epoch 9), train_loss = 1.219, time/batch=0.018
13490/13520 (epoch 9), train_loss = 1.191, time/batch=0.018
13491/13520 (epoch 9), train_loss = 1.243, time/batch=0.018
13492/13520 (epoch 9), train_loss = 1.28

### 产生一些句子试一试

In [10]:
n = 500
prime = " "

tf.reset_default_graph()
model = Model(training=False)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    saver = tf.train.Saver(tf.global_variables())
    ckpt = tf.train.get_checkpoint_state(save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        print(model.sample(sess, chars, vocab, n, prime).encode("utf-8"))


# with open(os.path.join(save_dir, "config.pkl"), "rb") as f:
#     saved_args = cPickle.load(f)
# with open(os.path.join(save_dir, "chars_vocab.pkl"), "rb") as f:
#     chars, vocab = cPickle.load(f)


INFO:tensorflow:Restoring parameters from ./save/model.ckpt-13519
b' Holyes, who should alight of them, this own. A point-counterly at unalson\n     loftion my dear Hopkint than\n     the other liid. There is well times Gow I will do again you once attackfiest.\n\n     "Jowemon I heard them.\n\n     "Because.\n\n     Should that\n     could tongind it. "The man to the whee-told him, "I would swaving with how I may now it over It lay through\n     his heads, to accoUlf effect. I kidne within the time,\' silver lugtary to be held that  what danger. On not the micked, then?" t'


### Homework
- Implement char-rnn using LSTM and GRU
- Find your own dataset (例如找一个中文dataset, be creative on what dataset to use) and apply the same model, report your generated samples. 