In [1]:
import sys
sys.path.append('gqn-datasets')

import tensorflow as tf

from callback import PixelVarianceScheduler, TensorBoardImage
from data_reader import data_reader
from model import GQN

tfk = tf.keras
tfkl = tfk.layers

In [2]:
mu_i = 5e-4
mu_f = 5e-5
n_mu = int(1.6e6)
beta_1 = 0.9
beta_2 = 0.99
epsilon = 1e-8
sigma_i = 2.0
sigma_f = 0.7
n_sigma = int(2e5)
L = 6
B = 36
S_max = int(2e6)
representation = 'pyramid'
shared_core = True
D = 'rooms_ring_camera'
root_path = '/share/dataset/GQN'
log_dir = '/tf/logs'
seed = 0

In [3]:
def lr_schedule(epoch):
    return max(mu_f + (mu_i - mu_f) * (1 - epoch / n_mu), mu_f)

In [4]:
dataset = data_reader(dataset=D,
                      root=root_path,
                      mode='train',
                      batch_size=B,
                      custom_frame_size=64,
                      shuffle_buffer_size=256,
                      seed=seed)

In [5]:
validation_data = data_reader(dataset=D,
                              root=root_path,
                              mode='test',
                              batch_size=B,
                              custom_frame_size=64,
                              shuffle_buffer_size=256,
                              seed=seed)

In [6]:
test_inputs, test_target = next(iter(validation_data))

In [7]:
model = GQN(representation, shared_core, L)

In [8]:
optimizer = tfk.optimizers.Adam(
    learning_rate=mu_i,
    beta_1=beta_1,
    beta_2=beta_2,
    epsilon=epsilon)

In [9]:
negative_log_likelihood = lambda x_q, rv_x_q: -rv_x_q.log_prob(x_q)

In [10]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    model.compile(optimizer=optimizer, loss=negative_log_likelihood)

In [11]:
callbacks = [tfk.callbacks.TensorBoard(log_dir=log_dir),
             tfk.callbacks.LearningRateScheduler(lr_schedule),
             PixelVarianceScheduler(sigma_i, sigma_f, n_sigma),
             TensorBoardImage(log_dir, test_inputs, test_target)]

In [None]:
model.fit(dataset,
          epochs=S_max,
          callbacks=callbacks,
          validation_data=validation_data,
          steps_per_epoch=1,
          validation_steps=1,
          validation_freq=100)

Train for 1 steps, validate for 1 steps
Epoch 1/2000000
INFO:tensorflow:batch_all_reduce: 23 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:batch_all_reduce: 23 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
Epoch 2/2000000
Epoch 3/2000000
Epoch 4/2000000
Epoch 5/2000000
Epoch 6/2000000


In [13]:
model.sigma

1.999389

In [1]:
from datetime import datetime

In [4]:
'/tf/logs/' + datetime.now().strftime('%Y%m%d-%H%M%S')

'/tf/logs/20191009-092717'

In [26]:
epoch = 200000

In [27]:
f"/model/cp-{epoch:07d}.ckpt"

'/model/cp-0200000.ckpt'

In [4]:
a = 99
if a % 100 == 99:
    print(a)

99


In [None]:
import da