In [1]:
import sys
import os
import time
import random
import re
import json
import pickle
from typing import List, Tuple, Dict, Callable, Optional, Any, Sequence, Mapping, NamedTuple
from attrdict import AttrDict
from multiprocessing import Process

In [2]:
import tensorflow as tf
from tensorflow.keras.utils import multi_gpu_model
import numpy as np
import matplotlib as plt

In [3]:
from model.transformer import Transformer
from datasource.sample_ds import SampleDataSource

In [4]:
tf.enable_eager_execution()

In [None]:
hparams = AttrDict()
hparams.num_layers = 4
hparams.num_units = 512
hparams.num_filter_units = hparams.num_units * 4
hparams.num_heads = 8
hparams.dropout_rate = 0.1
hparams.max_length = 50
hparams.batch_size = 32
hparams.learning_rate = 0.001
hparams.warmup_steps = 4000
hparams.num_epochs = 50
hparams.vocab_size = 3278
hparams.data_path = './data/'
hparams.ckpt_path = './ckpt/vanilla/l{}_u{}/model.ckpt'.format(hparams.num_layers, hparams.num_units)
hparams.log_dir = './logs/vanilla/l{}_u{}'.format(hparams.num_layers, hparams.num_units)
hparams1 = hparams

In [5]:
hparams2 = AttrDict()
hparams2.num_layers = 6
hparams2.num_units = 512
hparams2.num_filter_units = hparams2.num_units * 4
hparams2.num_heads = 8
hparams2.dropout_rate = 0.1
hparams2.max_length = 50
hparams2.batch_size = 64
hparams2.learning_rate = 0.001
hparams2.warmup_steps = 4000
hparams2.num_epochs = 30
hparams2.vocab_size = 3278
hparams2.data_path = './data/'
hparams2.ckpt_path = './ckpt/vanilla/l{}_u{}/model.ckpt'.format(hparams2.num_layers, hparams2.num_units)
hparams2.log_dir = './logs/vanilla/l{}_u{}'.format(hparams2.num_layers, hparams2.num_units)

In [6]:
# eager
def worker(hparams, gpu_id):
    with tf.device('/gpu:{}'.format(gpu_id)):
        ds = SampleDataSource(hparams)
        model = Transformer(hparams, True)
        optimizer = tf.train.AdamOptimizer(model.learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-09)
        model.load(optimizer)
        writer = tf.contrib.summary.create_file_writer(hparams['log_dir'])
        writer.set_as_default()
        model.fit(ds, optimizer, writer)

In [None]:
# graph mode
def worker_graph(hparams, gpu_id):
    gpu_id = 1
    with tf.Graph().as_default():
        with tf.device('/gpu:{}'.format(gpu_id)):
            ds = SampleDataSource(hparams)
            model = Transformer(hparams, True)
            model.build_graph()
            learning_rate = model.learning_rate()
            optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-09)
            tf_config = tf.ConfigProto(
                allow_soft_placement=True,
                gpu_options=tf.GPUOptions(
                    allow_growth=True
                )
            )
            with tf.Session(config=tf_config) as sess:
                sess.run(tf.global_variables_initializer())
                for e in range(hparams['num_epochs']):
                    ds.shuffle()
                    batch = ds.feed_dict(model, hparams['batch_size'], True)
                    start = time.time()
                    for b in batch:
                        inputs, targets = b[0], b[2]
                        loss_op = model.loss_op
                        grads = tf.gradients(loss_op, tf.trainable_variables())
                        train_op = optimizer.apply_gradients(zip(grads, tf.trainable_variables()), model.global_step)

                        _, loss, acc = sess.run([train_op, model.loss_op, model.acc_op], feed_dict={
                            model.encoder_inputs_ph: inputs,
                            model.decoder_inputs_ph: targets,
                            model.is_training_ph: True
                        })
                        step = sess.run(model.global_step)
                        with tf.contrib.summary.record_summaries_every_n_global_steps(10):
                            tf.contrib.summary.scalar('summary/acc', acc)
                            tf.contrib.summary.scalar('summary/loss', loss)
                            tf.contrib.summary.scalar('summary/learning_rate', model.learning_rate())
                    print('elapsed: ', time.time() - start)
                    model.save(optimizer)
                    print('{} epoch finished. now {} step, loss: {:.4f}, acc: {:.4f}'.format(e, step, loss ,acc))

In [7]:
process_0 = Process(target=worker,args=(hparams2, 1))
#process_1 = Process(target=worker,args=(hparams2, 1))

In [8]:
process_0.start()

restored


Process Process-1:
Traceback (most recent call last):
  File "/home/kentaro.nakanishi/.pyenv/versions/3.6.5/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/kentaro.nakanishi/.pyenv/versions/3.6.5/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-6-1a20639367fe>", line 8, in worker
    writer = tf.contrib.summary.create_file_writer(hparams['log_dir'])
  File "/home/kentaro.nakanishi/.local/share/virtualenvs/universal_transformer-eUF550pf/lib/python3.6/site-packages/tensorflow/python/util/lazy_loader.py", line 53, in __getattr__
    module = self._load()
  File "/home/kentaro.nakanishi/.local/share/virtualenvs/universal_transformer-eUF550pf/lib/python3.6/site-packages/tensorflow/python/util/lazy_loader.py", line 42, in _load
    module = importlib.import_module(self.__name__)
  File "/home/kentaro.nakanishi/.local/share/virtualenvs/universal_transformer-eUF550pf/l

In [None]:
process_1.start()