In [1]:
import sys
!{sys.executable} -m pip install -e ../
!{sys.executable} -m pip install addict

Defaulting to user installation because normal site-packages is not writeable
Obtaining file:///tf/pose3D/src
Installing collected packages: ShAReD-Net
  Attempting uninstall: ShAReD-Net
    Found existing installation: ShAReD-Net 1.0
    Uninstalling ShAReD-Net-1.0:
      Successfully uninstalled ShAReD-Net-1.0
  Running setup.py develop for ShAReD-Net
Successfully installed ShAReD-Net
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
%reset -f

In [3]:
import sys

In [4]:
import time
import itertools

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [5]:
def init_devs():
    sys.setrecursionlimit(10**7) 
    
    tf.config.set_soft_device_placement(True)
    
    options = {
                "layout_optimizer": True,
                "constant_folding": True,
                "shape_optimization": True,
                "remapping": True,
                "arithmetic_optimization": True,
                "dependency_optimization": True,
                "loop_optimization": True,
                "function_optimization": True,
                "debug_stripper": False,
                "disable_model_pruning": False,
                "scoped_allocator_optimization": True,
                "pin_to_host_optimization": True,
                "implementation_selector": True,
                "disable_meta_optimizer": False
              }
    tf.config.optimizer.set_experimental_options(options)

    
    devs = tf.config.get_visible_devices()
    print(devs)

    print(tf.config.threading.get_inter_op_parallelism_threads())
    print(tf.config.threading.get_intra_op_parallelism_threads())
    tf.config.threading.set_inter_op_parallelism_threads(12)
    tf.config.threading.set_intra_op_parallelism_threads(12)
    print(tf.config.threading.get_inter_op_parallelism_threads())
    print(tf.config.threading.get_intra_op_parallelism_threads())

    gpus = tf.config.experimental.list_physical_devices('GPU')
    gpus = gpus[:] 
    if gpus:
        try:
        # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            tf.config.experimental.set_visible_devices(gpus, 'GPU')
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)
    logical_devs = tf.config.list_logical_devices()
    physical_devs = tf.config.experimental.list_physical_devices()

    print("physical_devs",physical_devs)
    print("logical_devs", logical_devs)
    
    print(tf.version.VERSION)
init_devs()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]
0
0
12
12
4 Physical GPUs, 4 Logical GPUs
physical_devs [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:2', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:3', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2'

In [6]:
import ShAReD_Net.training.train_distributed as train

import ShAReD_Net.training.slim as training_slim
import ShAReD_Net.model.slim as model_slim
import ShAReD_Net.training.loss.slim as loss_slim



In [7]:
from ShAReD_Net.configure import config

config.dataset.IMG_PATH = "/dataset/jta/images_jpg"
config.dataset.ANNO_PATH = "/dataset/jta/new_image_annotations"

config.checkpoint.path = "/tf/pose3D/checkpoints/run16"
config.tensorboard.path = "/tf/pose3D/logdir/run16"


In [8]:
import ShAReD_Net.data.transform.transform as transform



def create_train_dataset(per_replica_batch_size):
    data_split = "train"
    with tf.device("/cpu:0"):
        train_ds = transform.create_dataset(data_split, config.training.batch_size).shuffle(500).prefetch(100)
    return train_ds

def create_test_dataset(per_replica_batch_size):
    data_split = "test"
    with tf.device("/cpu:0"):
        train_ds = transform.create_dataset(data_split, config.training.batch_size).prefetch(10)
    return train_ds


In [9]:
def create_opt():
    #opt = tf.keras.optimizers.SGD(learning_rate = config.training.learning_rate)
    #opt = tf.keras.optimizers.Nadam(learning_rate = config.training.learning_rate, epsilon=0.001)
    opt = tf.keras.optimizers.Adam(learning_rate = config.training.learning_rate, epsilon=0.001)
    return opt

In [10]:
def train_loss():
    loss = training_slim.SlimTrainingLoss()
    return loss

def eval_loss():
    loss = training_slim.SlimTrainingLoss()
    return loss
    
def train_model():
    
    low_level_extractor = model_slim.LowLevelExtractor(color_channel=13, texture_channel=16, texture_compositions=16, out_channel=32)

    encoder = model_slim.Encoder(dense_blocks_count=2, dense_filter_count=16)
    
    pos_decoder = model_slim.PosDecoder(dense_blocks_count=3, dense_filter_count=8)
    
    pose_decoder = model_slim.PoseDecoder(keypoints=config.model.output.keypoints, z_bins=config.model.z_bins, dense_blocks_count=2, dense_filter_count=16)
    
    model = training_slim.SlimTrainingModel(low_level_extractor, encoder, pos_decoder, pose_decoder)    
    
    return model

def create_checkpoint(step, optimizer, train_model, train_loss):
    nets = {"low_level_extractor":train_model.low_level_extractor,
            "encoder":train_model.encoder,
            "pos_decoder":train_model.pos_decoder,
            "pose_decoder":train_model.pose_decoder,
            "loss_agg":train_loss.loss_agg,
           }
    ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, **nets)
    manager = tf.train.CheckpointManager(ckpt, config.checkpoint.path, max_to_keep=50)
    return ckpt, manager

In [11]:
import ShAReD_Net.model.modules.slim as slim_modules

roi_size_img = np.asarray(config.model.roi_size) * config.model.img_downsampling + 1
        
roi_extractor = slim_modules.Roi_Extractor(roi_size=roi_size_img)

def calc_img_index(roiindices):
    new_indexes = roiindices * config.model.img_downsampling
    return new_indexes
    

In [12]:

def print_loss(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    
    extra_loss_sum = tf.reduce_sum(extra_loss) / 10
       
    tf.print("On", dev)
    tf.print("detection_loss", detection_loss_sum)
    
    tf.print("estimator_loss_xy", loss_pos_xy_sum)
    tf.print("estimator_loss_z", loss_pos_z_sum)
    
    tf.print("estimator_loss_var_xy", loss_var_xy_sum)
    tf.print("estimator_loss_var_z", loss_var_z_sum)
    
    tf.print("extra_loss_sum", extra_loss_sum)


def simple_summery(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    tf.print("simple_summery")  
    
    loss_per_batch_sum = tf.reduce_sum(loss_per_batch)
    
    extra_loss_sum = tf.reduce_sum(extra_loss)
    
    tf.summary.scalar(f"detection_loss", detection_loss_sum)
    
    tf.summary.scalar(f"loss_pos_xy_sum", loss_pos_xy_sum)
    tf.summary.scalar(f"loss_pos_z_sum", loss_pos_z_sum)
    
    tf.summary.scalar(f"loss_var_xy_sum", loss_var_xy_sum)
    tf.summary.scalar(f"loss_var_z_sum", loss_var_z_sum)
    
    tf.summary.scalar(f"extra_loss", extra_loss_sum)
    
    tf.summary.scalar(f"agg_loss", loss_per_batch_sum)
    
    mean_grad = tf.zeros(())
    for grad in grads:
        mean_grad += tf.reduce_mean(tf.abs(grad))
    mean_grad /= len(grads)
    
    max_grad = tf.zeros(())
    for grad in grads:
        max_grad = tf.math.maximum(tf.reduce_max(tf.abs(grad)),max_grad)
        
    tf.summary.scalar(f"max_grad", max_grad)
    
    tf.summary.scalar(f"mean_grad", mean_grad)
    
    for k in range(config.model.output.keypoints):
        tf.summary.scalar(f"pose x, kp {k}", tf.reduce_mean(tf.abs(poses_xyz[:,k,0]-pose_xyz_gt[:,k,0])))
        tf.summary.scalar(f"pose y, kp {k}", tf.reduce_mean(tf.abs(poses_xyz[:,k,0]-pose_xyz_gt[:,k,1])))
        tf.summary.scalar(f"pose z, kp {k}", tf.reduce_mean(tf.abs(poses_xyz[:,k,0]-pose_xyz_gt[:,k,2])))
    
    
def complex_summery(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    tf.print("complex_summery")
    
    tf.summary.image("image", img, max_outputs=4)
    
    pos_hm_gt_near = pos_hm_gt[...,0,None]
    pos_hm_gt_fare = pos_hm_gt[...,1,None]
    tf.summary.image("pos_hm_gt_near", pos_hm_gt_near, max_outputs=4)
    tf.summary.image("pos_hm_gt_fare", pos_hm_gt_fare, max_outputs=4)
    
    pos_hm_near = pos_hm[...,0,None]
    pos_hm_fare = pos_hm[...,1,None]
    tf.summary.image("pos_hm_near", pos_hm_near, max_outputs=4)
    tf.summary.image("pos_hm_fare", pos_hm_fare, max_outputs=4)
    
    new_indexes = calc_img_index(roi_indexes)
    pose_imges = roi_extractor([img, new_indexes])
    tf.summary.image("pose_imges", pose_imges, max_outputs=4)
    tf.summary.image(f"pose_hm", tf.reduce_sum(pose_prob_map_xy,axis=1), max_outputs=4)
    
    pose_prob_map_xy = tf.unstack(pose_prob_map_xy, axis=1)
    for i, pose_hm in enumerate(pose_prob_map_xy):
        tf.summary.image(f"pose_hm for keypoint {i}", pose_hm, max_outputs=2)
    
    pose_prob_maps_z = tf.unstack(pose_prob_maps_z, axis=1)
    for i, z_slice in enumerate(pose_prob_maps_z):
        tf.summary.image(f"z_slice for keypoint {i}", z_slice[...,None,None], max_outputs=2)


In [13]:
def eval_summery(dev, step, batch, output, loss, eval_loss):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
        
    tf.print("eval_summery")  
    
    loss_per_batch_sum = tf.reduce_sum(loss_per_batch)
        
    tf.summary.scalar(f"detection_loss", detection_loss_sum)
    
    tf.summary.scalar(f"loss_pos_xy_sum", loss_pos_xy_sum)
    tf.summary.scalar(f"loss_pos_z_sum", loss_pos_z_sum)
    
    tf.summary.scalar(f"loss_var_xy_sum", loss_var_xy_sum)
    tf.summary.scalar(f"loss_var_z_sum", loss_var_z_sum)
        
    tf.summary.scalar(f"agg_loss", loss_per_batch_sum)

def complex_eval_summery(dev, step, batch, output, loss, eval_loss):
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    
    tf.print("complex_eval_summery")
    
    tf.summary.image("image", img, max_outputs=4)
    
    pos_hm_gt_near = pos_hm_gt[...,0,None]
    pos_hm_gt_fare = pos_hm_gt[...,1,None]
    tf.summary.image("pos_hm_gt_near", pos_hm_gt_near, max_outputs=4)
    tf.summary.image("pos_hm_gt_fare", pos_hm_gt_fare, max_outputs=4)
    
    pos_hm_near = pos_hm[...,0,None]
    pos_hm_fare = pos_hm[...,1,None]
    tf.summary.image("pos_hm_near", pos_hm_near, max_outputs=4)
    tf.summary.image("pos_hm_fare", pos_hm_fare, max_outputs=4)
    
    new_indexes = calc_img_index(roi_indexes)
    pose_imges = roi_extractor([img, new_indexes])
    tf.summary.image("pose_imges", pose_imges, max_outputs=4)
    tf.summary.image(f"pose_hm", tf.reduce_sum(pose_prob_map_xy,axis=1), max_outputs=4)

In [14]:
def save_checkpoint(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    def save():
        save_path = manager.save(int(step))
        print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
    tf.py_function(save,[], [])

def finalize(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    def run():
        print("Finalized")
        tf.Graph.finalize(tf.compat.v1.get_default_graph())
    tf.py_function(run,[], [])


def count_params(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    print(train_model.count_params())



In [15]:
def batching(dataset, batch_size):
    batched_ds = dataset.batch(batch_size)

    def unragg(img, pos_stuff, roi_indexes, pose_stuff):
        rel_pose, pose_indexes = pose_stuff
        pos_heatmap, weights = pos_stuff

        def unragg_roi_indexes(indexes, row_length):
            new_indexes = np.empty([indexes.shape[0], indexes.shape[1]+1], dtype=np.int32)
            new_indexes[:,1:] = indexes
            i = 0
            b = 0
            for length in row_length[1]:
                index = indexes[i:int(i+length)]
                new_indexes[i:int(i+length),0] = b
                i += length
                b += 1
                
            return new_indexes
        
        def unragg_pose_indexes(indexes, row_length):
            new_indexes = np.empty([indexes.shape[0], indexes.shape[1], indexes.shape[-1]+1], dtype=np.int32)
            new_indexes[:,:,1:] = indexes
            i = 0
            b = 0
            for length in row_length[1]:
                index = indexes[i:int(i+length)]
                new_indexes[i:int(i+length),:,0] = b
                i += length
                b += 1
                
            return new_indexes

        roi_indexes_flat = tf.numpy_function(unragg_roi_indexes, [roi_indexes.flat_values, roi_indexes.nested_row_lengths()], Tout=roi_indexes.dtype)
        pose_indexes_flat = tf.numpy_function(unragg_pose_indexes, [pose_indexes.flat_values, pose_indexes.nested_row_lengths()], Tout=pose_indexes.dtype)

        rel_pose_flat =  rel_pose.flat_values

        return img, (pos_heatmap, weights), roi_indexes_flat, (rel_pose_flat, pose_indexes_flat)

    unragged_ds = batched_ds.map(unragg).prefetch(100)
    return unragged_ds


In [16]:
def input_pre(batch):
    img, (pos_heatmap, weights), roi_indexes, (rel_pose, pose_indexes) = batch
    return img, roi_indexes, pose_indexes
    
def loss_pre(output, batch):
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    return (poses_xyz, pose_xyz_gt, pose_prob_map_xy, pose_prob_maps_z), (pos_hm, pos_hm_gt, loss_weights)


lc_extra = training_slim.LossClipping(name="extra")
def grad_pre(loss, extra_loss, batch, optimizer, train_model, train_loss):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    
    extra_loss = lc_extra(extra_loss)
    
    extra_loss_sum = tf.reduce_sum(extra_loss) * 50
    
    loss_per_batch_sum = loss_per_batch
        
    trainable_vars =  train_model.low_level_extractor.trainable_variables + train_model.encoder.trainable_variables + train_model.pos_decoder.trainable_variables + train_model.pose_decoder.trainable_variables+ train_loss.trainable_variables
        
    return loss_per_batch, extra_loss_sum, trainable_vars



In [17]:
dist_strat = tf.distribute.MirroredStrategy(cross_device_ops = tf.distribute.HierarchicalCopyAllReduce())

steps = 100000

step_callbacks = train.standart_callbacks()
step_callbacks.every_steps[20] = print_loss
step_callbacks.every_steps[250] = save_checkpoint
step_callbacks.every_steps[10] = simple_summery
step_callbacks.every_steps[50] = complex_summery

step_callbacks.every_eval_steps[200] = eval_summery
step_callbacks.every_eval_steps[400] = complex_eval_summery

step_callbacks.at_step[1] = count_params
step_callbacks.at_step[2] = finalize

step_callbacks.make_batches = batching

step_callbacks.grad_pre = grad_pre
step_callbacks.input_pre = input_pre
step_callbacks.loss_pre = loss_pre

step_callbacks.create_train_dataset = create_train_dataset
step_callbacks.create_test_dataset = create_test_dataset

step_callbacks.create_ckpt = create_checkpoint

step_callbacks.create_loss = train_loss
step_callbacks.create_eval_loss = eval_loss

step_callbacks.create_model = train_model
step_callbacks.create_opt = create_opt


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


In [None]:
train.train(steps, dist_strat, batch_size = config.training.batch_size, learning_rate=config.training.learning_rate, callbacks = step_callbacks)


Roi_Extractor [TensorShape([1, None, None, 3]), TensorShape([None, 3])]
Restored from /tf/pose3D/checkpoints/run16/ckpt-9750
Instructions for updating:
renamed to `run`
SlimInferenzModel (TensorShape([None, 421, 421, 3]), TensorShape(None), TensorShape(None))
Extractor (None, None, None, 3)
Encoder (None, None, None, 32)
stage1 [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
big_shared1 [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
ResAttention [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
Attention [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
ShReD [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 96])]
DenseModule (None, None, None, 128)
DenseBlock (None, None, None, 24)
BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
big_normal (None, None, None, 56)
normal_shared1 [TensorShape(

BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
scale_1_4_res (None, None, None, 32)
scale_1_4_shc (None, None, None, 56)
stage3 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
big_shared1 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
ResAttention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
Attention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
ShReD [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 192])]
DenseModule (None, None, None, 256)
DenseBlock (None, None, None, 24)
BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
big_normal (None, None, None, 56)
normal_shared1 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([Non

scale_up (None, None, None, 48)
small_medium (None, None, None, 48)
medium_shared3 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
Attention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
DenseModule (None, None, None, 256)
DenseBlock (None, None, None, 24)
BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 32)
BnDoConfReluConfRelu (None, None, None, 32)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
scale_up (None, None, None, 48)
medium_normal (None, None, None, 48)
normal_shared3 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
Attention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]

DenseBlock (None, 35, 35, 40)
BnDoConfReluConfRelu (None, 35, 35, 40)
SelfShAReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
ShAReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
ResAttention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
Attention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
ShReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 210])]
DenseModule (None, 35, 35, 280)
DenseBlock (None, 35, 35, 24)
BnDoConfReluConfRelu (None, 35, 35, 24)
DenseBlock (None, 35, 35, 40)
BnDoConfReluConfRelu (None, 35, 35, 40)
Attention [TensorShape([None, 35, 35, 56]), TensorShape([None, 35, 35, 56])]
ShAReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 112])]
ResAttention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 112])]
Attention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 112])]
ShReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 3

tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:0']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:1']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:2']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:3']]
INFO:tensorflow:batch_all_reduce: 1434 all-reduces with algorithm = hierarchical_copy, num_packs = 1
Instructions for updating:
Use fn_output_signature instead
Roi_Extractor [TensorShape([None, 421, 421, 3]), TensorShape(None)]
3731973
3731973
3731973
3731973
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:0']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:1']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:2']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:3']]
INFO:tensorflow:batch_all_reduce: 1434 all-reduces with algorithm = hierarchical_copy, num_packs = 1
3731973
3731973
3731973
3731973
Step: 9751
Step: 9751
Step: 975

estimator_loss_xy 8549.82617
estimator_loss_z 3005.40649
estimator_loss_var_xy 532.099365
estimator_loss_var_z 265.930054
extra_loss_sum 0.100461438
simple_summery
simple_summery
simple_summery
Step: 9821
Step: 9821
Step: 9821
Step: 9821
Step: 9822
Step: 9822
Step: 9822
Step: 9822
Step: 9823
Step: 9823
Step: 9823
Step: 9823
Step: 9824
Step: 9824
Step: 9824
Step: 9824
Step: 9825
Step: 9825
Step: 9825
Step: 9825
Step: 9826
Step: 9826
Step: 9826
Step: 9826
Step: 9827
Step: 9827
Step: 9827
Step: 9827
Step: 9828
Step: 9828
Step: 9828
Step: 9828
Step: 9829
Step: 9829
Step: 9829
Step: 9829
Step: 9830
simple_summery
Step: 9830
Step: 9830
Step: 9830
simple_summery
simple_summery
simple_summery
Step: 9831
Step: 9831
Step: 9831
Step: 9831
Step: 9832
Step: 9832
Step: 9832
Step: 9832
Step: 9833
Step: 9833
Step: 9833
Step: 9833
Step: 9834
Step: 9834
Step: 9834
Step: 9834
Step: 9835
Step: 9835
Step: 9835
Step: 9835
Step: 9836
Step: 9836
Step: 9836
Step: 9836
Step: 9837
Step: 9837
Step: 9837
Step: 983

Step: 9906
Step: 9906
Step: 9906
Step: 9907
Step: 9907
Step: 9907
Step: 9907
Step: 9908
Step: 9908
Step: 9908
Step: 9908
Step: 9909
Step: 9909
Step: 9909
Step: 9909
Step: 9910
simple_summery
Step: 9910
Step: 9910
Step: 9910
simple_summery
simple_summery
simple_summery
Step: 9911
Step: 9911
Step: 9911
Step: 9911
Step: 9912
Step: 9912
Step: 9912
Step: 9912
Step: 9913
Step: 9913
Step: 9913
Step: 9913
Step: 9914
Step: 9914
Step: 9914
Step: 9914
Step: 9915
Step: 9915
Step: 9915
Step: 9915
Step: 9916
Step: 9916
Step: 9916
Step: 9916
Step: 9917
Step: 9917
Step: 9917
Step: 9917
Step: 9918
Step: 9918
Step: 9918
Step: 9918
Step: 9919
Step: 9919
Step: 9919
Step: 9919
Step: 9920
simple_summery
Step: 9920
Step: 9920
Step: 9920
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 278.576
estimator_loss_xy 13470.0312
estimator_loss_z 2426.28564
estimator_loss_var_xy 949.79834
estimator_loss_var_z 136.95459
extra_loss_sum 0.100968167
On [['/job:localhost/replica:0/task:0/device:GPU:1']

Step: 9995
Step: 9996
Step: 9996
Step: 9996
Step: 9996
Step: 9997
Step: 9997
Step: 9997
Step: 9997
Step: 9998
Step: 9998
Step: 9998
Step: 9998
Step: 9999
Step: 9999
Step: 9999
Step: 9999
Step: 10000
simple_summery
Step: 10000
Step: 10000
Step: 10000
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 270.809631
estimator_loss_xy 13476.5723
estimator_loss_z 1346.09448
estimator_loss_var_xy 298.896149
estimator_loss_var_z 102.672821
extra_loss_sum 0.100472108
complex_summery
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0',).
simple_summery
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0

estimator_loss_var_z 146.24118
extra_loss_sum 0.100515626
On [['/job:localhost/replica:0/task:0/device:GPU:1']]
detection_loss 55.6771736
estimator_loss_xy 12928.0312
estimator_loss_z 3344.7959
estimator_loss_var_xy 676.59845
estimator_loss_var_z 328.734253
extra_loss_sum 0.100515626
On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 102.451279
estimator_loss_xy 2214.9082
estimator_loss_z 204.727585
estimator_loss_var_xy 144.211136
estimator_loss_var_z 10.986742
extra_loss_sum 0.100515626
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 119.455673
estimator_loss_xy 7807.20166
estimator_loss_z 1376.90796
estimator_loss_var_xy 377.835663
estimator_loss_var_z 106.292313
extra_loss_sum 0.100515626
simple_summery
simple_summery
simple_summery
Step: 10061
Step: 10061
Step: 10061
Step: 10061
Step: 10062
Step: 10062
Step: 10062
Step: 10062
Step: 10063
Step: 10063
Step: 10063
Step: 10063
Step: 10064
Step: 10064
Step: 10064
Step: 10064
Step: 10065
Step: 1006

estimator_loss_z 327.566376
estimator_loss_var_xy 178.085449
estimator_loss_var_z 11.911272
extra_loss_sum 0.100718215
On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 257.849487
estimator_loss_xy 10576.8057
estimator_loss_z 381.444519
estimator_loss_var_xy 683.263611
estimator_loss_var_z 14.7327423
extra_loss_sum 0.100718215
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 51.0319672
estimator_loss_xy 7321.69971
estimator_loss_z 1565.61987
estimator_loss_var_xy 494.588684
estimator_loss_var_z 148.044495
extra_loss_sum 0.100718215
simple_summery
simple_summery
simple_summery
Step: 10141
Step: 10141
Step: 10141
Step: 10141
Step: 10142
Step: 10142
Step: 10142
Step: 10142
Step: 10143
Step: 10143
Step: 10143
Step: 10143
Step: 10144
Step: 10144
Step: 10144
Step: 10144
Step: 10145
Step: 10145
Step: 10145
Step: 10145
Step: 10146
Step: 10146
Step: 10146
Step: 10146
Step: 10147
Step: 10147
Step: 10147
Step: 10147
Step: 10148
Step: 10148
Step: 10148
Step: 

On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 115.154106
estimator_loss_xy 1667.63147
estimator_loss_z 347.417816
estimator_loss_var_xy 83.8035507
estimator_loss_var_z 10.5170097
extra_loss_sum 0.101206779
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 67.0797272
estimator_loss_xy 11543.2656
estimator_loss_z 3090.67188
estimator_loss_var_xy 698.050537
estimator_loss_var_z 290.732513
extra_loss_sum 0.101206779
simple_summery
simple_summery
simple_summery
Step: 10221
Step: 10221
Step: 10221
Step: 10221
Step: 10222
Step: 10222
Step: 10222
Step: 10222
Step: 10223
Step: 10223
Step: 10223
Step: 10223
Step: 10224
Step: 10224
Step: 10224
Step: 10224
Step: 10225
Step: 10225
Step: 10225
Step: 10225
Step: 10226
Step: 10226
Step: 10226
Step: 10226
Step: 10227
Step: 10227
Step: 10227
Step: 10227
Step: 10228
Step: 10228
Step: 10228
Step: 10228
Step: 10229
Step: 10229
Step: 10229
Step: 10229
Step: 10230
simple_summery
Step: 10230
Step: 10230
Step: 10230
si

estimator_loss_z 732.523132
estimator_loss_var_xy 167.704788
estimator_loss_var_z 59.704319
extra_loss_sum 0.101218343
On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 139.114487
estimator_loss_xy 2559.12891
estimator_loss_z 620.726807
estimator_loss_var_xy 161.305389
estimator_loss_var_z 31.9780121
extra_loss_sum 0.101218343
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 62.7723045
estimator_loss_xy 1728.07959
estimator_loss_z 913.273499
estimator_loss_var_xy 119.784973
estimator_loss_var_z 69.0922775
extra_loss_sum 0.101218343
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
Step: 10301
Step: 10301
Step: 10301
Step: 10301
Step: 10302
Step: 10302
Step: 10302
Step: 10302
Step: 10303
Step: 10303
Step: 10303
Step: 10303
Step: 10304
Step: 10304
Step: 10304
Step: 10304
Step: 10305
Step: 10305
Step: 10305
Step: 10305
Step: 10306
Step: 10306
Step: 10306
Step: 10306
Step: 10307
Step: 10307
St

On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 264.946259
estimator_loss_xy 3493.72192
estimator_loss_z 373.148773
estimator_loss_var_xy 228.519806
estimator_loss_var_z 26.5995235
extra_loss_sum 0.10081365
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 113.211693
estimator_loss_xy 1530.98486
estimator_loss_z 621.267578
estimator_loss_var_xy 108.825272
estimator_loss_var_z 58.6225128
extra_loss_sum 0.10081365
simple_summery
simple_summery
simple_summery
Step: 10381
Step: 10381
Step: 10381
Step: 10381
Step: 10382
Step: 10382
Step: 10382
Step: 10382
Step: 10383
Step: 10383
Step: 10383
Step: 10383
Step: 10384
Step: 10384
Step: 10384
Step: 10384
Step: 10385
Step: 10385
Step: 10385
Step: 10385
Step: 10386
Step: 10386
Step: 10386
Step: 10386
Step: 10387
Step: 10387
Step: 10387
Step: 10387
Step: 10388
Step: 10388
Step: 10388
Step: 10388
Step: 10389
Step: 10389
Step: 10389
Step: 10389
Step: 10390
simple_summery
Step: 10390
Step: 10390
Step: 10390
simp

estimator_loss_z 118.696236
estimator_loss_var_xy 438.513275
estimator_loss_var_z 7.47048616
extra_loss_sum 0.100005984
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 203.607849
estimator_loss_xy 4896.76465
estimator_loss_z 874.104858
estimator_loss_var_xy 237.484406
estimator_loss_var_z 62.9222107
extra_loss_sum 0.100005984
simple_summery
simple_summery
simple_summery
Step: 10461
Step: 10461
Step: 10461
Step: 10461
Step: 10462
Step: 10462
Step: 10462
Step: 10462
Step: 10463
Step: 10463
Step: 10463
Step: 10463
Step: 10464
Step: 10464
Step: 10464
Step: 10464
Step: 10465
Step: 10465
Step: 10465
Step: 10465
Step: 10466
Step: 10466
Step: 10466
Step: 10466
Step: 10467
Step: 10467
Step: 10467
Step: 10467
Step: 10468
Step: 10468
Step: 10468
Step: 10468
Step: 10469
Step: 10469
Step: 10469
Step: 10469
Step: 10470
simple_summery
Step: 10470
Step: 10470
Step: 10470
simple_summery
simple_summery
simple_summery
Step: 10471
Step: 10471
Step: 10471
Step: 10471
Step: 10472
Step: 

On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 49.5098534
estimator_loss_xy 2988.0166
estimator_loss_z 2364.58032
estimator_loss_var_xy 169.777695
estimator_loss_var_z 201.425323
extra_loss_sum 0.100241974
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 92.3012
estimator_loss_xy 3469.37305
estimator_loss_z 476.437195
estimator_loss_var_xy 169.72644
estimator_loss_var_z 39.2051888
extra_loss_sum 0.100241974
simple_summery
simple_summery
simple_summery
Step: 10541
Step: 10541
Step: 10541
Step: 10541
Step: 10542
Step: 10542
Step: 10542
Step: 10542
Step: 10543
Step: 10543
Step: 10543
Step: 10543
Step: 10544
Step: 10544
Step: 10544
Step: 10544
Step: 10545
Step: 10545
Step: 10545
Step: 10545
Step: 10546
Step: 10546
Step: 10546
Step: 10546
Step: 10547
Step: 10547
Step: 10547
Step: 10547
Step: 10548
Step: 10548
Step: 10548
Step: 10548
Step: 10549
Step: 10549
Step: 10549
Step: 10549
Step: 10550
simple_summery
Step: 10550
Step: 10550
Step: 10550
complex

estimator_loss_z 630.873779
estimator_loss_var_xy 339.796051
estimator_loss_var_z 41.5331955
extra_loss_sum 0.0997722
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 16.7748
estimator_loss_xy 2919.52979
estimator_loss_z 801.467224
estimator_loss_var_xy 111.73069
estimator_loss_var_z 61.4012146
extra_loss_sum 0.0997722
simple_summery
simple_summery
simple_summery
Step: 10621
Step: 10621
Step: 10621
Step: 10621
Step: 10622
Step: 10622
Step: 10622
Step: 10622
Step: 10623
Step: 10623
Step: 10623
Step: 10623
Step: 10624
Step: 10624
Step: 10624
Step: 10624
Step: 10625
Step: 10625
Step: 10625
Step: 10625
Step: 10626
Step: 10626
Step: 10626
Step: 10626
Step: 10627
Step: 10627
Step: 10627
Step: 10627
Step: 10628
Step: 10628
Step: 10628
Step: 10628
Step: 10629
Step: 10629
Step: 10629
Step: 10629
Step: 10630
simple_summery
Step: 10630
Step: 10630
Step: 10630
simple_summery
simple_summery
simple_summery
Step: 10631
Step: 10631
Step: 10631
Step: 10631
Step: 10632
Step: 10632
St

detection_loss 82.2743683
estimator_loss_xy 6802.62305
estimator_loss_z 1540.90149
estimator_loss_var_xy 268.23587
estimator_loss_var_z 128.901199
extra_loss_sum 0.0995459855
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
Step: 10701
Step: 10701
Step: 10701
Step: 10701
Step: 10702
Step: 10702
Step: 10702
Step: 10702
Step: 10703
Step: 10703
Step: 10703
Step: 10703
Step: 10704
Step: 10704
Step: 10704
Step: 10704
Step: 10705
Step: 10705
Step: 10705
Step: 10705
Step: 10706
Step: 10706
Step: 10706
Step: 10706
Step: 10707
Step: 10707
Step: 10707
Step: 10707
Step: 10708
Step: 10708
Step: 10708
Step: 10708
Step: 10709
Step: 10709
Step: 10709
Step: 10709
Step: 10710
simple_summery
Step: 10710
Step: 10710
Step: 10710
simple_summery
simple_summery
simple_summery
Step: 10711
Step: 10711
Step: 10711
Step: 10711
Step: 10712
Step: 10712
Step: 10712
Step: 10712
Step: 10713
Step: 10713
Step: 10713
Step: 10713
Step: 10714
Step: 10714
Step: 10

estimator_loss_xy 1797.66016
estimator_loss_z 173.391174
estimator_loss_var_xy 117.164734
estimator_loss_var_z 9.11475372
extra_loss_sum 0.0995000377
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 328.205292
estimator_loss_xy 2129.6958
estimator_loss_z 302.579742
estimator_loss_var_xy 148.813843
estimator_loss_var_z 15.4067631
extra_loss_sum 0.0995000377
simple_summery
simple_summery
simple_summery
Step: 10781
Step: 10781
Step: 10781
Step: 10781
Step: 10782
Step: 10782
Step: 10782
Step: 10782
Step: 10783
Step: 10783
Step: 10783
Step: 10783
Step: 10784
Step: 10784
Step: 10784
Step: 10784
Step: 10785
Step: 10785
Step: 10785
Step: 10785
Step: 10786
Step: 10786
Step: 10786
Step: 10786
Step: 10787
Step: 10787
Step: 10787
Step: 10787
Step: 10788
Step: 10788
Step: 10788
Step: 10788
Step: 10789
Step: 10789
Step: 10789
Step: 10789
Step: 10790
simple_summery
Step: 10790
Step: 10790
Step: 10790
simple_summery
simple_summery
simple_summery
Step: 10791
Step: 10791
Step: 10791


estimator_loss_var_z 509.930908
extra_loss_sum 0.0994256884
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 82.2632904
estimator_loss_xy 13525.5039
estimator_loss_z 3366.85791
estimator_loss_var_xy 695.664368
estimator_loss_var_z 323.106445
extra_loss_sum 0.0994256884
simple_summery
simple_summery
simple_summery
Step: 10861
Step: 10861
Step: 10861
Step: 10861
Step: 10862
Step: 10862
Step: 10862
Step: 10862
Step: 10863
Step: 10863
Step: 10863
Step: 10863
Step: 10864
Step: 10864
Step: 10864
Step: 10864
Step: 10865
Step: 10865
Step: 10865
Step: 10865
Step: 10866
Step: 10866
Step: 10866
Step: 10866
Step: 10867
Step: 10867
Step: 10867
Step: 10867
Step: 10868
Step: 10868
Step: 10868
Step: 10868
Step: 10869
Step: 10869
Step: 10869
Step: 10869
Step: 10870
simple_summery
Step: 10870
Step: 10870
Step: 10870
simple_summery
simple_summery
simple_summery
Step: 10871
Step: 10871
Step: 10871
Step: 10871
Step: 10872
Step: 10872
Step: 10872
Step: 10872
Step: 10873
Step: 10873
Step:

estimator_loss_z 3279.93628
estimator_loss_var_xy 346.046326
estimator_loss_var_z 322.26355
extra_loss_sum 0.099499
simple_summery
simple_summery
simple_summery
Step: 10941
Step: 10941
Step: 10941
Step: 10941
Step: 10942
Step: 10942
Step: 10942
Step: 10942
Step: 10943
Step: 10943
Step: 10943
Step: 10943
Step: 10944
Step: 10944
Step: 10944
Step: 10944
Step: 10945
Step: 10945
Step: 10945
Step: 10945
Step: 10946
Step: 10946
Step: 10946
Step: 10946
Step: 10947
Step: 10947
Step: 10947
Step: 10947
Step: 10948
Step: 10948
Step: 10948
Step: 10948
Step: 10949
Step: 10949
Step: 10949
Step: 10949
Step: 10950
simple_summery
Step: 10950
Step: 10950
Step: 10950
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
Step: 10951
Step: 10951
Step: 10951
Step: 10951
Step: 10952
Step: 10952
Step: 10952
Step: 10952
Step: 10953
Step: 10953
Step: 10953
Step: 10953
Step: 10954
Step: 10954
Step: 10954
Step: 10954
Step: 10955
Step: 10955
Step: 10955
Step: 1

estimator_loss_z 228960.422
estimator_loss_xy 15820.8545

detection_loss 98.055069


estimator_loss_var_z 15143.8232
estimator_loss_var_xy 124911

cp 606