In [1]:
import os
import math
import random
import sys
import time
import numpy as np

import tensorflow as tf

from easy_seq2seq import data_utils, seq2seq_model

In [2]:
_buckets = [(5, 10), (10, 15), (20, 25), (40, 50)]

In [3]:
if not os.path.exists('./working_dir'):
    os.makedirs('./working_dir')
    
print('새로운 폴더 ./working_dir가 생성되었습니다.')

새로운 폴더 ./working_dir가 생성되었습니다.


In [4]:
enc_train, dec_train, enc_dev, dec_dev, _, _ = data_utils.prepare_custom_data('working_dir/',
                                                                              'data/train.enc',
                                                                              'data/train.dec',
                                                                              'data/test.enc',
                                                                              'data/test.dec',
                                                                              20000,
                                                                              20000)
print('데이터 변환이 완성되었습니다.')

데이터 변환이 완성되었습니다.


In [5]:
# 데이터 로딩
def read_data(source_path, target_path, max_size=None):
    data_set = [[] for _ in _buckets]
    with tf.gfile.GFile(source_path, mode="r") as source_file:
        with tf.gfile.GFile(target_path, mode="r") as target_file:
            source, target = source_file.readline(), target_file.readline()
            counter = 0
            while source and target and (not max_size or counter < max_size):
                counter += 1
                if counter % 100000 == 0:
                    print("  reading data line %d" % counter)
                    sys.stdout.flush()
                source_ids = [int(x) for x in source.split()]
                target_ids = [int(x) for x in target.split()]
                target_ids.append(data_utils.EOS_ID)
                for bucket_id, (source_size, target_size) in enumerate(_buckets):
                    if len(source_ids) < source_size and len(target_ids) < target_size:
                        data_set[bucket_id].append([source_ids, target_ids])
                        break
                source, target = source_file.readline(), target_file.readline()
    return data_set

In [6]:
def create_model(sess, forward_only):
    model = seq2seq_model.Seq2SeqModel(20000,
                                       20000,
                                       _buckets, 
                                       128,
                                       3,
                                       5.0,
                                       32,
                                       0.5,
                                       0.99,
                                       forward_only=forward_only)

    ckpt = tf.train.get_checkpoint_state('./working_dir')
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        sess.run(tf.initialize_all_variables())
    return model

In [7]:
config = tf.ConfigProto() 

sess = tf.InteractiveSession(config=config)
print("Creating 3 layers of 128 units.")

Creating 3 layers of 128 units.


In [8]:
model = create_model(sess, False)

Created model with fresh parameters.


In [9]:
dev_set = read_data(enc_dev, dec_dev)
train_set = read_data(enc_train, dec_train, 0)
train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
train_total_size = float(sum(train_bucket_sizes))

  reading data line 100000


In [10]:
train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

In [11]:
step_time, loss = 0.0, 0.0
current_step = 0
previous_losses = []

In [13]:
while current_step < 300:
    # Choose a bucket according to data distribution. We pick a random number
    # in [0, 1] and use the corresponding interval in train_buckets_scale.
    random_number_01 = np.random.random_sample()
    bucket_id = min([i for i in xrange(len(train_buckets_scale))
                   if train_buckets_scale[i] > random_number_01])

    # Get a batch and make a step.
    start_time = time.time()
    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
      train_set, bucket_id)
    _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                               target_weights, bucket_id, False)
    step_time += (time.time() - start_time) / 30
    loss += step_loss / 30
    current_step += 1

    # Once in a while, we save checkpoint, print statistics, and run evals.
    if current_step % 30 == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
            sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join('./working_dir', "seq2seq.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        # Run evals on development set and print their perplexity.
        for bucket_id in xrange(len(_buckets)):
            if len(dev_set[bucket_id]) == 0:
                print("  eval: empty bucket %d" % (bucket_id))
                continue
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
              dev_set, bucket_id)
            _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                       target_weights, bucket_id, True)
            eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
            print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
        sys.stdout.flush()
        
print('트레이닝이 완료되었습니다.')

global step 61 learning rate 0.5000 step-time 0.46 perplexity 330.50
  eval: bucket 0 perplexity 151.74
  eval: bucket 1 perplexity 198.26
  eval: bucket 2 perplexity 448.60
  eval: bucket 3 perplexity 248.53
global step 91 learning rate 0.5000 step-time 0.51 perplexity 373.81
  eval: bucket 0 perplexity 91.28
  eval: bucket 1 perplexity 226.17
  eval: bucket 2 perplexity 534.05
  eval: bucket 3 perplexity 655.43
global step 121 learning rate 0.5000 step-time 0.47 perplexity 286.37
  eval: bucket 0 perplexity 220.41
  eval: bucket 1 perplexity 349.38
  eval: bucket 2 perplexity 303.70
  eval: bucket 3 perplexity 381.85
global step 151 learning rate 0.5000 step-time 0.42 perplexity 222.04
  eval: bucket 0 perplexity 90.25
  eval: bucket 1 perplexity 148.89
  eval: bucket 2 perplexity 192.33
  eval: bucket 3 perplexity 166.89
global step 181 learning rate 0.5000 step-time 0.55 perplexity 209.04
  eval: bucket 0 perplexity 66.62
  eval: bucket 1 perplexity 135.74
  eval: bucket 2 perplexi