### модель

In [1]:
# подгружаемые библиотеки
import numpy as np
import tensorflow as tf
import re

from bpemb import BPEmb

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
vocab_size = 5000 #размер словоря
emb_dim = 200 #размер эмбендингов
max_len = 200 # TODO посчитать максимальную длинну выборки
buffer_size = 5# размер буфера данных в батчах

In [4]:
bpemb_ru = BPEmb(lang='ru', vs=vocab_size , dim=emb_dim)

# bpemb_ru.encode_ids('.,!?:;-')
#>>> [2922, 2921, 2978, 2985, 2947, 2963, 2936]
def load_emb_matrix(bpemb= bpemb_ru, dtype=np.float32):
    '''функция загрузки эмбендингов в модель'''
    
    return bpemb.emb.vectors.astype(dtype)

def parse_fn(line, bpemb = bpemb_ru):
    '''
        функция кодировки строки:
        line- строка
    '''
    sequence = np.array(bpemb.encode_ids(line) + [0]).astype(np.int32)
    punct_dic = {4922: 1, 4921: 2, 4978: 3, 4985: 4, 4947: 5, 4963: 6, 4936: 7}
    feature = []
    label = []

    for i in range(len(sequence) - 1):
        if sequence[i] not in punct_dic:
            feature.append(sequence[i])
            if sequence[i + 1] in punct_dic:
                label.append(punct_dic[sequence[i + 1]])
            else:
                label.append(0)
                
    feature = np.array(feature).astype(np.int32)
    label = np.array(label).astype(np.int32)

    mask = ((label == 0)*0.6 + (label != 0)).astype(np.float32)
    
    return (feature, len(feature)), (label, len(label), mask)


def generator_fn(data_file_url):
    '''генератор чтения строк из файла и
        преобразования из в последовательности'''
    with open(data_file_url, 'r') as file:
        for row in file:
            yield parse_fn(row[:-1])


def input_fn(data_file_url, params, mode):
    '''функция подачи данных в модель,
        на вход получает адрес файла'''
    shapes = (([None], ()), ([None], (), [None]))
    types = ((tf.int32, tf.int32), (tf.int32, tf.int32, tf.float32))
    defaults = ((0, 0), (0, 0, 0.))

    dataset = tf.data.Dataset.from_generator(lambda : generator_fn(data_file_url),
                                             output_shapes=shapes, output_types=types)
    

    if mode == 'train':
        dataset = dataset.shuffle(buffer_size=params['train_size'], reshuffle_each_iteration=True)
        dataset = dataset.repeat(params['num_epochs'])
        dataset = dataset.repeat()

    return dataset.padded_batch(params['batch_size'], shapes, defaults).prefetch(params['train_size'])    

def test_genarator_fn(data, bpemb=bpemb_ru):
    '''генератор преобразования массива стор
        в массив последовательностей'''
    for line in data:
        sequence = bpemb_ru.encode_ids(line)
        yield (np.array(sequence).astype(np.int32), len(sequence)), 0

def test_input_fn(data, params):
    '''функция подачи данных в модель для теста
        на вход получает массив строк'''
    shapes = ([None], ()), ()
    types = (tf.int32, tf.int32), tf.int32
    defaults = (0, 0), 0

    dataset = tf.data.Dataset.from_generator(lambda : test_genarator_fn(data),
                                             output_shapes=shapes, output_types=types)
    
    return dataset.padded_batch(params['batch_size'], shapes, defaults).prefetch(1)    

In [5]:
# модель
def model_fn(features, labels, mode, params):
    
    sequences, lengths = features
    current_butch_size = tf.shape(sequences)[0]

    # матрица эмбеддингов decoder-а
    embeddings = tf.Variable(initial_value = load_emb_matrix() ,trainable=False)
    
    embeded = tf.nn.embedding_lookup(params=embeddings, ids=sequences)
    
    
    bi_cell_fw  = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(num_units=params['lstm_hidden_dim']) 
                          for _ in range(params['num_layers'])])

    bi_cell_bw  = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(num_units=params['lstm_hidden_dim']) 
                          for _ in range(params['num_layers'])])   
    
    
    outputs, states = tf.nn.bidirectional_dynamic_rnn(   
                                                         bi_cell_fw,
                                                         bi_cell_bw,
                                                         embeded, 
                                                         sequence_length=lengths, 
                                                         dtype=tf.float32
                                                     )
    #prepare state
    output_fw, output_bw = outputs
    
    output = tf.concat([output_fw, output_bw], axis=-1)
    
    training = mode == tf.estimator.ModeKeys.TRAIN
    dropout = tf.layers.dropout(inputs=output, 
                                rate= params['dropout_rate'], 
                                training=training)
    
    lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=params['lstm_hidden_dim'])
    
    
    outputs_lstm, _ = tf.nn.dynamic_rnn(lstm_cell, dropout, sequence_length=lengths, dtype=tf.float32)
    
#    training = mode == tf.estimator.ModeKeys.TRAIN
#    dropout = tf.layers.dropout(inputs=outputs.rnn_output, 
#                                rate= params['dropout_rate'], 
#                                training=training)
#    

                                
    dense1 = tf.layers.dense(outputs_lstm, params['lstm_hidden_dim'], activation=tf.math.tanh)

    dense2 = tf.layers.dense(dense1, params['output_vocab_size'])
    

    logits = dense2
    sample_id = tf.argmax(logits, 2)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'sequences': sequences,
            'prediction': sample_id,
            'lengths': lengths
        }

        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    else:
        target, target_lengths, mask = labels

        #mask = tf.sequence_mask(lengths, dtype=tf.float32)


        loss = tf.contrib.seq2seq.sequence_loss(logits=logits, 
                                                targets=target, 
                                                weights=mask, 
                                                average_across_timesteps=True, 
                                                average_across_batch=True)

        # в режиме eval возвращаем усреднённый лосс
        if mode == tf.estimator.ModeKeys.EVAL:
            mask = tf.sequence_mask(lengths, dtype=tf.float32)
            metrics = {
                'acc': tf.metrics.accuracy(target, sample_id, mask),
                #'f1_score' : tf.contrib.metrics.f1_score(target, sample_id, mask),
            }
            
            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)

        # в режиме train ещё и обновляем обучаемые параметры
        elif mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer
            optimizer = optimizer(learning_rate=params['learning_rate'])
            train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

            return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
    
    


In [6]:
params = {
    'vocab_size': vocab_size,
    'output_vocab_size': 8,
    'train_size': 256*buffer_size,
    'num_layers': 2,
    'embedding_dim': emb_dim,
    'dropout_rate': 0.2,
    'lstm_hidden_dim': 512,
    'max_iter': max_len,
    'batch_size': 256,
    'num_epochs': 1,
    'learning_rate': 1e-3
}

config = tf.estimator.RunConfig(model_dir='project_v2_2',
                                save_checkpoints_steps = 200,
                               save_checkpoints_secs = None)
model = tf.estimator.Estimator(model_fn=model_fn, params=params, config=config)

INFO:tensorflow:Using config: {'_model_dir': 'project_v2_2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 200, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0652c66358>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [None]:
model.train(lambda: input_fn('flibusta_full_train.txt', params=params, mode='train'))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into project_v2_2/model.ckpt.
INFO:tensorflow:loss = 2.0704308, step = 1
INFO:tensorflow:global_step/sec: 0.206115
INFO:tensorflow:loss = 0.6540087, step = 101 (485.168 sec)
INFO:tensorflow:Saving checkpoints for 200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.222026
INFO:tensorflow:loss = 0.46706274, step = 201 (450.398 sec)
INFO:tensorflow:global_step/sec: 0.228938
INFO:tensorflow:loss = 0.39895663, step = 301 (436.800 sec)
INFO:tensorflow:Saving checkpoints for 400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.229738
INFO:tensorflow:loss = 0.3054805, step = 401 (435.277 sec)
INFO:tensorflow:global_step/sec: 0.223954
INFO:tensorflow:loss = 0.32328272, step = 501 (446.520 

INFO:tensorflow:loss = 0.24245346, step = 5701 (462.253 sec)
INFO:tensorflow:Saving checkpoints for 5800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.219153
INFO:tensorflow:loss = 0.29320988, step = 5801 (456.302 sec)
INFO:tensorflow:global_step/sec: 0.215769
INFO:tensorflow:loss = 0.29620254, step = 5901 (463.459 sec)
INFO:tensorflow:Saving checkpoints for 6000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.24024
INFO:tensorflow:loss = 0.29398084, step = 6001 (416.251 sec)
INFO:tensorflow:global_step/sec: 0.226633
INFO:tensorflow:loss = 0.26217553, step = 6101 (441.243 sec)
INFO:tensorflow:Saving checkpoints for 6200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.225964
INFO:tensorflow:loss = 0.24495342, step = 6201 (442.550 sec)
INFO:tensorflow:global_step/sec: 0.229445
INFO:tensorflow:loss = 0.24014305, step = 6301 (435.834 sec)
INFO:tensorflow:Saving checkpoints for 6400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_ste

INFO:tensorflow:global_step/sec: 0.265727
INFO:tensorflow:loss = 0.17987627, step = 11601 (376.326 sec)
INFO:tensorflow:global_step/sec: 0.263762
INFO:tensorflow:loss = 0.22028692, step = 11701 (379.129 sec)
INFO:tensorflow:Saving checkpoints for 11800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.26248
INFO:tensorflow:loss = 0.209353, step = 11801 (380.981 sec)
INFO:tensorflow:global_step/sec: 0.263716
INFO:tensorflow:loss = 0.18974131, step = 11901 (379.197 sec)
INFO:tensorflow:Saving checkpoints for 12000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.264481
INFO:tensorflow:loss = 0.23417684, step = 12001 (378.099 sec)
INFO:tensorflow:global_step/sec: 0.264056
INFO:tensorflow:loss = 0.30757982, step = 12101 (378.707 sec)
INFO:tensorflow:Saving checkpoints for 12200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.26141
INFO:tensorflow:loss = 0.23953158, step = 12201 (382.541 sec)
INFO:tensorflow:global_step/sec: 0.265119
INFO:tens

INFO:tensorflow:loss = 0.23812766, step = 17401 (452.731 sec)
INFO:tensorflow:global_step/sec: 0.22457
INFO:tensorflow:loss = 0.27350432, step = 17501 (445.295 sec)
INFO:tensorflow:Saving checkpoints for 17600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.223505
INFO:tensorflow:loss = 0.24015555, step = 17601 (447.417 sec)
INFO:tensorflow:global_step/sec: 0.229447
INFO:tensorflow:loss = 0.19238351, step = 17701 (435.831 sec)
INFO:tensorflow:Saving checkpoints for 17800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.221017
INFO:tensorflow:loss = 0.29083875, step = 17801 (452.453 sec)
INFO:tensorflow:global_step/sec: 0.227578
INFO:tensorflow:loss = 0.23398647, step = 17901 (439.410 sec)
INFO:tensorflow:Saving checkpoints for 18000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.246005
INFO:tensorflow:loss = 0.20238511, step = 18001 (406.495 sec)
INFO:tensorflow:global_step/sec: 0.241368
INFO:tensorflow:loss = 0.21024208, step = 18101 

INFO:tensorflow:global_step/sec: 0.196534
INFO:tensorflow:loss = 0.22789086, step = 23301 (508.818 sec)
INFO:tensorflow:Saving checkpoints for 23400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.236917
INFO:tensorflow:loss = 0.25947487, step = 23401 (422.089 sec)
INFO:tensorflow:global_step/sec: 0.197673
INFO:tensorflow:loss = 0.236283, step = 23501 (505.887 sec)
INFO:tensorflow:Saving checkpoints for 23600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.1922
INFO:tensorflow:loss = 0.30697823, step = 23601 (520.290 sec)
INFO:tensorflow:global_step/sec: 0.232305
INFO:tensorflow:loss = 0.25236067, step = 23701 (430.468 sec)
INFO:tensorflow:Saving checkpoints for 23800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.245416
INFO:tensorflow:loss = 0.25151753, step = 23801 (407.471 sec)
INFO:tensorflow:global_step/sec: 0.243769
INFO:tensorflow:loss = 0.17393799, step = 23901 (410.226 sec)
INFO:tensorflow:Saving checkpoints for 24000 into p

INFO:tensorflow:loss = 0.29738602, step = 29101 (382.455 sec)
INFO:tensorflow:Saving checkpoints for 29200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.262175
INFO:tensorflow:loss = 0.23519835, step = 29201 (381.425 sec)
INFO:tensorflow:global_step/sec: 0.259396
INFO:tensorflow:loss = 0.15678169, step = 29301 (385.511 sec)
INFO:tensorflow:Saving checkpoints for 29400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266134
INFO:tensorflow:loss = 0.20382836, step = 29401 (375.750 sec)
INFO:tensorflow:global_step/sec: 0.261828
INFO:tensorflow:loss = 0.16972867, step = 29501 (381.930 sec)
INFO:tensorflow:Saving checkpoints for 29600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.265268
INFO:tensorflow:loss = 0.27882588, step = 29601 (376.978 sec)
INFO:tensorflow:global_step/sec: 0.267783
INFO:tensorflow:loss = 0.23758666, step = 29701 (373.437 sec)
INFO:tensorflow:Saving checkpoints for 29800 into project_v2_2/model.ckpt.
INFO:tensorflo

INFO:tensorflow:Saving checkpoints for 35000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.265704
INFO:tensorflow:loss = 0.22407205, step = 35001 (376.358 sec)
INFO:tensorflow:global_step/sec: 0.263776
INFO:tensorflow:loss = 0.24299295, step = 35101 (379.110 sec)
INFO:tensorflow:Saving checkpoints for 35200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.261775
INFO:tensorflow:loss = 0.19785424, step = 35201 (382.007 sec)
INFO:tensorflow:global_step/sec: 0.263825
INFO:tensorflow:loss = 0.21548489, step = 35301 (379.038 sec)
INFO:tensorflow:Saving checkpoints for 35400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266504
INFO:tensorflow:loss = 0.28045613, step = 35401 (375.229 sec)
INFO:tensorflow:global_step/sec: 0.260659
INFO:tensorflow:loss = 0.243266, step = 35501 (383.644 sec)
INFO:tensorflow:Saving checkpoints for 35600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266345
INFO:tensorflow:loss = 0.17898823, s

INFO:tensorflow:global_step/sec: 0.229644
INFO:tensorflow:loss = 0.17971532, step = 40801 (435.456 sec)
INFO:tensorflow:global_step/sec: 0.235065
INFO:tensorflow:loss = 0.24131688, step = 40901 (425.413 sec)
INFO:tensorflow:Saving checkpoints for 41000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.237721
INFO:tensorflow:loss = 0.21721672, step = 41001 (420.661 sec)
INFO:tensorflow:global_step/sec: 0.231981
INFO:tensorflow:loss = 0.19190806, step = 41101 (431.070 sec)
INFO:tensorflow:Saving checkpoints for 41200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.238874
INFO:tensorflow:loss = 0.3070047, step = 41201 (418.631 sec)
INFO:tensorflow:global_step/sec: 0.23647
INFO:tensorflow:loss = 0.21702293, step = 41301 (422.886 sec)
INFO:tensorflow:Saving checkpoints for 41400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.242567
INFO:tensorflow:loss = 0.14657444, step = 41401 (412.258 sec)
INFO:tensorflow:global_step/sec: 0.240947
INFO:te

INFO:tensorflow:loss = 0.20706211, step = 46601 (379.383 sec)
INFO:tensorflow:global_step/sec: 0.262972
INFO:tensorflow:loss = 0.2136559, step = 46701 (380.269 sec)
INFO:tensorflow:Saving checkpoints for 46800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.2651
INFO:tensorflow:loss = 0.22396761, step = 46801 (377.216 sec)
INFO:tensorflow:global_step/sec: 0.261109
INFO:tensorflow:loss = 0.18931231, step = 46901 (382.982 sec)
INFO:tensorflow:Saving checkpoints for 47000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.262797
INFO:tensorflow:loss = 0.30185238, step = 47001 (380.522 sec)
INFO:tensorflow:global_step/sec: 0.266635
INFO:tensorflow:loss = 0.24918234, step = 47101 (375.044 sec)
INFO:tensorflow:Saving checkpoints for 47200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.260381
INFO:tensorflow:loss = 0.23785353, step = 47201 (384.052 sec)
INFO:tensorflow:global_step/sec: 0.262905
INFO:tensorflow:loss = 0.23030409, step = 47301 (3

INFO:tensorflow:global_step/sec: 0.261843
INFO:tensorflow:loss = 0.1836902, step = 52501 (381.908 sec)
INFO:tensorflow:Saving checkpoints for 52600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.259113
INFO:tensorflow:loss = 0.2805734, step = 52601 (385.932 sec)
INFO:tensorflow:global_step/sec: 0.261159
INFO:tensorflow:loss = 0.197604, step = 52701 (382.908 sec)
INFO:tensorflow:Saving checkpoints for 52800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.261227
INFO:tensorflow:loss = 0.17274076, step = 52801 (382.809 sec)
INFO:tensorflow:global_step/sec: 0.261814
INFO:tensorflow:loss = 0.264409, step = 52901 (381.950 sec)
INFO:tensorflow:Saving checkpoints for 53000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.261505
INFO:tensorflow:loss = 0.19891, step = 53001 (382.401 sec)
INFO:tensorflow:global_step/sec: 0.265665
INFO:tensorflow:loss = 0.21825288, step = 53101 (376.414 sec)
INFO:tensorflow:Saving checkpoints for 53200 into projec

INFO:tensorflow:loss = 0.25441042, step = 58301 (379.412 sec)
INFO:tensorflow:Saving checkpoints for 58400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266433
INFO:tensorflow:loss = 0.27340716, step = 58401 (375.329 sec)
INFO:tensorflow:global_step/sec: 0.261797
INFO:tensorflow:loss = 0.24494311, step = 58501 (381.975 sec)
INFO:tensorflow:Saving checkpoints for 58600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.273375
INFO:tensorflow:loss = 0.27149224, step = 58601 (365.798 sec)
INFO:tensorflow:global_step/sec: 0.259576
INFO:tensorflow:loss = 0.19507025, step = 58701 (385.244 sec)
INFO:tensorflow:Saving checkpoints for 58800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.260312
INFO:tensorflow:loss = 0.22052394, step = 58801 (384.155 sec)
INFO:tensorflow:global_step/sec: 0.372234
INFO:tensorflow:loss = 0.24309994, step = 58901 (268.648 sec)
INFO:tensorflow:Saving checkpoints for 59000 into project_v2_2/model.ckpt.
INFO:tensorflo

INFO:tensorflow:Saving checkpoints for 64200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.259799
INFO:tensorflow:loss = 0.22954877, step = 64201 (384.912 sec)
INFO:tensorflow:global_step/sec: 0.263247
INFO:tensorflow:loss = 0.18130861, step = 64301 (379.872 sec)
INFO:tensorflow:Saving checkpoints for 64400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.259905
INFO:tensorflow:loss = 0.22169854, step = 64401 (384.755 sec)
INFO:tensorflow:global_step/sec: 0.262355
INFO:tensorflow:loss = 0.2737446, step = 64501 (381.163 sec)
INFO:tensorflow:Saving checkpoints for 64600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266909
INFO:tensorflow:loss = 0.29648012, step = 64601 (374.659 sec)
INFO:tensorflow:global_step/sec: 0.262404
INFO:tensorflow:loss = 0.26842105, step = 64701 (381.093 sec)
INFO:tensorflow:Saving checkpoints for 64800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.26548
INFO:tensorflow:loss = 0.25680247, s

INFO:tensorflow:global_step/sec: 0.261739
INFO:tensorflow:loss = 0.1999196, step = 70001 (382.060 sec)
INFO:tensorflow:global_step/sec: 0.259718
INFO:tensorflow:loss = 0.26127937, step = 70101 (385.032 sec)
INFO:tensorflow:Saving checkpoints for 70200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.265429
INFO:tensorflow:loss = 0.20373498, step = 70201 (376.749 sec)
INFO:tensorflow:global_step/sec: 0.26338
INFO:tensorflow:loss = 0.28782374, step = 70301 (379.680 sec)
INFO:tensorflow:Saving checkpoints for 70400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.257258
INFO:tensorflow:loss = 0.3142717, step = 70401 (388.715 sec)
INFO:tensorflow:global_step/sec: 0.261881
INFO:tensorflow:loss = 0.21045657, step = 70501 (381.853 sec)
INFO:tensorflow:Saving checkpoints for 70600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.262651
INFO:tensorflow:loss = 0.19822809, step = 70601 (380.733 sec)
INFO:tensorflow:global_step/sec: 0.261404
INFO:ten

INFO:tensorflow:loss = 0.26461935, step = 75801 (384.181 sec)
INFO:tensorflow:global_step/sec: 0.263304
INFO:tensorflow:loss = 0.29412434, step = 75901 (379.789 sec)
INFO:tensorflow:Saving checkpoints for 76000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.26337
INFO:tensorflow:loss = 0.27510083, step = 76001 (379.694 sec)
INFO:tensorflow:global_step/sec: 0.261867
INFO:tensorflow:loss = 0.29397035, step = 76101 (381.873 sec)
INFO:tensorflow:Saving checkpoints for 76200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266032
INFO:tensorflow:loss = 0.19865018, step = 76201 (375.894 sec)
INFO:tensorflow:global_step/sec: 0.26477
INFO:tensorflow:loss = 0.2125385, step = 76301 (377.687 sec)
INFO:tensorflow:Saving checkpoints for 76400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266917
INFO:tensorflow:loss = 0.26551425, step = 76401 (374.648 sec)
INFO:tensorflow:global_step/sec: 0.266603
INFO:tensorflow:loss = 0.22208008, step = 76501 (3

INFO:tensorflow:global_step/sec: 0.264792
INFO:tensorflow:loss = 0.18913361, step = 81701 (377.655 sec)
INFO:tensorflow:Saving checkpoints for 81800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.263104
INFO:tensorflow:loss = 0.2629195, step = 81801 (380.078 sec)
INFO:tensorflow:global_step/sec: 0.259871
INFO:tensorflow:loss = 0.35227394, step = 81901 (384.806 sec)
INFO:tensorflow:Saving checkpoints for 82000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.263083
INFO:tensorflow:loss = 0.23789416, step = 82001 (380.108 sec)
INFO:tensorflow:global_step/sec: 0.262259
INFO:tensorflow:loss = 0.23220688, step = 82101 (381.302 sec)
INFO:tensorflow:Saving checkpoints for 82200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.266362
INFO:tensorflow:loss = 0.23501705, step = 82201 (375.429 sec)
INFO:tensorflow:global_step/sec: 0.259328
INFO:tensorflow:loss = 0.2726818, step = 82301 (385.612 sec)
INFO:tensorflow:Saving checkpoints for 82400 into

INFO:tensorflow:loss = 0.15930073, step = 87501 (375.925 sec)
INFO:tensorflow:Saving checkpoints for 87600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.262709
INFO:tensorflow:loss = 0.22796158, step = 87601 (380.649 sec)
INFO:tensorflow:global_step/sec: 0.265264
INFO:tensorflow:loss = 0.19854929, step = 87701 (376.983 sec)
INFO:tensorflow:Saving checkpoints for 87800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.265222
INFO:tensorflow:loss = 0.22536606, step = 87801 (377.043 sec)
INFO:tensorflow:global_step/sec: 0.258426
INFO:tensorflow:loss = 0.2865061, step = 87901 (386.958 sec)
INFO:tensorflow:Saving checkpoints for 88000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.251578
INFO:tensorflow:loss = 0.26497653, step = 88001 (397.491 sec)
INFO:tensorflow:global_step/sec: 0.246475
INFO:tensorflow:loss = 0.22988272, step = 88101 (405.721 sec)
INFO:tensorflow:Saving checkpoints for 88200 into project_v2_2/model.ckpt.
INFO:tensorflow

INFO:tensorflow:Saving checkpoints for 93400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.262885
INFO:tensorflow:loss = 0.28591105, step = 93401 (380.395 sec)
INFO:tensorflow:global_step/sec: 0.266262
INFO:tensorflow:loss = 0.25480828, step = 93501 (375.569 sec)
INFO:tensorflow:Saving checkpoints for 93600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.26016
INFO:tensorflow:loss = 0.34120446, step = 93601 (384.379 sec)
INFO:tensorflow:global_step/sec: 0.260482
INFO:tensorflow:loss = 0.23483911, step = 93701 (383.904 sec)
INFO:tensorflow:Saving checkpoints for 93800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.258012
INFO:tensorflow:loss = 0.2621496, step = 93801 (387.578 sec)
INFO:tensorflow:global_step/sec: 0.266139
INFO:tensorflow:loss = 0.32373938, step = 93901 (375.743 sec)
INFO:tensorflow:Saving checkpoints for 94000 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.260501
INFO:tensorflow:loss = 0.20816204, s

INFO:tensorflow:global_step/sec: 0.265813
INFO:tensorflow:loss = 0.22483741, step = 99201 (376.204 sec)
INFO:tensorflow:global_step/sec: 0.265966
INFO:tensorflow:loss = 0.2577505, step = 99301 (375.989 sec)
INFO:tensorflow:Saving checkpoints for 99400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.261903
INFO:tensorflow:loss = 0.24072316, step = 99401 (381.821 sec)
INFO:tensorflow:global_step/sec: 0.263835
INFO:tensorflow:loss = 0.24633366, step = 99501 (379.025 sec)
INFO:tensorflow:Saving checkpoints for 99600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.263208
INFO:tensorflow:loss = 0.24503453, step = 99601 (379.928 sec)
INFO:tensorflow:global_step/sec: 0.263217
INFO:tensorflow:loss = 0.2720677, step = 99701 (379.914 sec)
INFO:tensorflow:Saving checkpoints for 99800 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.261588
INFO:tensorflow:loss = 0.2991102, step = 99801 (382.281 sec)
INFO:tensorflow:global_step/sec: 0.264124
INFO:ten

INFO:tensorflow:global_step/sec: 0.246909
INFO:tensorflow:loss = 0.23610625, step = 105001 (405.008 sec)
INFO:tensorflow:global_step/sec: 0.248318
INFO:tensorflow:loss = 0.22445187, step = 105101 (402.710 sec)
INFO:tensorflow:Saving checkpoints for 105200 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.247895
INFO:tensorflow:loss = 0.27745554, step = 105201 (403.396 sec)
INFO:tensorflow:global_step/sec: 0.247138
INFO:tensorflow:loss = 0.22977541, step = 105301 (404.632 sec)
INFO:tensorflow:Saving checkpoints for 105400 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.24739
INFO:tensorflow:loss = 0.29189384, step = 105401 (404.219 sec)
INFO:tensorflow:global_step/sec: 0.243762
INFO:tensorflow:loss = 0.2545232, step = 105501 (410.236 sec)
INFO:tensorflow:Saving checkpoints for 105600 into project_v2_2/model.ckpt.
INFO:tensorflow:global_step/sec: 0.250586
INFO:tensorflow:loss = 0.25204152, step = 105601 (399.064 sec)
INFO:tensorflow:global_step/sec: 0.2498

In [None]:
# обучение и валидация
eval_result = model.evaluate(lambda: input_fn('eval.txt', params=params, mode='eval'))

In [11]:
# дешифровка 
def placement_punctuation(data, mode='from_list'):
    '''
        функция расстановки пунктуации (.,!,?:;)
        mode -  тип подоваемых данных:
                если 'from_list' то data должна быть массивом строк
                если 'from_file' то data должна быть путем до файла
        data -  либо строка, либо массив строк, в зависимости от mode
        
        -----------------------------------------------------------------
        пример:
        
        test_data = [
            'Он уже должен был прийти обратно Почему он не возвращается',
            'Я не знаю Аэтпио не видит ничего кроме мертвых камней'
        ]
        placement_punctuation(test_data)
        -----------------------------------------------------------------
        пример:
        
        placement_punctuation('test.txt', mode='from_file')
        -----------------------------------------------------------------
    '''
    d = {1:2922, 2:2921, 3:2978, 4:2985, 5:2947, 6:2963, 7:2936}
    if mode == 'from_file':
        prediction = model.predict(lambda: input_fn(data, params=params, mode='predicted'))
    else:
        prediction = model.predict(lambda: test_input_fn(data, params=params))
    for x in prediction:
        a = []
        for i in range(x['lengths']):
            a.append(x['sequences'][i])
            if x['prediction'][i] != 0:
                a.append(d[x['prediction'][i]])
        print(bpemb_ru.decode_ids(np.array(a)))


In [12]:
test_data = [
    'он уже должен был прийти обратно почему он не возвращается',
    'я не знаю аэтпио не видит ничего кроме мертвых камней'
]
placement_punctuation(test_data)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from project_v2/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
он уже должен был прийти обратно. почему он не возвращается?
я не знаю, аэтпио не видит ничего, кроме мертвых камней.


In [13]:
placement_punctuation('test.txt', mode='from_file')

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from project_v2/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
он уже должен был прийти обратно. почему он не возвращается?
я не знаю, аэтпио не видит ничего, кроме мертвых камней.
и ютипа тоже. почему взгляд хранителей не может проникнуть глубже?
кто знает? ариман не смел даже самому себе признаться, насколько сильно он встревожен.
я думал, ты сумеешь туда заглянуть. в конце концов, ты же корвид.
здесь это не поможет.
тебе легко говорить, но не всем же быть павонидами. кое-кому приходится бороться с жарой собственными средствами.
с каких это пор ты овладел мастерством атенейцев?
мне этого не требуется. и так понятно, что вы оба встревожены. это очевидно.


In [15]:
placement_punctuation('barmaglot.txt', mode='from_file')

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from project_v2/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
варкалось. хливкие шорьки пырялись по нове и хрюкотали зелюки, как мюмзики в мове.
о, бойся, бармаглота сын!
он так свирлеп и дик, а в глуще рымит исполин злопастный брандашмыг, но взял он меч и взял. он щит высоких полон дум.
в глущобу путь его лежит под дерево тумтум.
он стал под дерево и ждет, и вдруг граахнул. гром летит ужасный бармаглот и пылкает огнем.
раз-два раз-два горит трова взы-взы стрижает меч, ува ува и голова барабардает с плеч.
о, светозарный мальчик мой! ты победил в бою о храброславленный герой. хвалу тебе пою.
варкалось. хливкие шорьки пырялись по нове и хрюкотали зелюки, как мюмзики в мове.
