In [1]:
import sys
!{sys.executable} -m pip install -e ../
!{sys.executable} -m pip install addict

Defaulting to user installation because normal site-packages is not writeable
Obtaining file:///tf/pose3D/src
Installing collected packages: ShAReD-Net
  Attempting uninstall: ShAReD-Net
    Found existing installation: ShAReD-Net 1.0
    Uninstalling ShAReD-Net-1.0:
      Successfully uninstalled ShAReD-Net-1.0
  Running setup.py develop for ShAReD-Net
Successfully installed ShAReD-Net
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
%reset -f

In [3]:
import sys

In [4]:
import time
import itertools

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [5]:
def init_devs():
    sys.setrecursionlimit(10**7) 
    
    tf.config.set_soft_device_placement(True)
    
    options = {
                "layout_optimizer": True,
                "constant_folding": True,
                "shape_optimization": True,
                "remapping": True,
                "arithmetic_optimization": True,
                "dependency_optimization": True,
                "loop_optimization": True,
                "function_optimization": True,
                "debug_stripper": False,
                "disable_model_pruning": False,
                "scoped_allocator_optimization": True,
                "pin_to_host_optimization": True,
                "implementation_selector": True,
                "disable_meta_optimizer": False
              }
    tf.config.optimizer.set_experimental_options(options)

    
    devs = tf.config.get_visible_devices()
    print(devs)

    print(tf.config.threading.get_inter_op_parallelism_threads())
    print(tf.config.threading.get_intra_op_parallelism_threads())
    tf.config.threading.set_inter_op_parallelism_threads(12)
    tf.config.threading.set_intra_op_parallelism_threads(12)
    print(tf.config.threading.get_inter_op_parallelism_threads())
    print(tf.config.threading.get_intra_op_parallelism_threads())

    gpus = tf.config.experimental.list_physical_devices('GPU')
    gpus = gpus[:] 
    if gpus:
        try:
        # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            tf.config.experimental.set_visible_devices(gpus, 'GPU')
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)
    logical_devs = tf.config.list_logical_devices()
    physical_devs = tf.config.experimental.list_physical_devices()

    print("physical_devs",physical_devs)
    print("logical_devs", logical_devs)
    
    print(tf.version.VERSION)
init_devs()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]
0
0
12
12
4 Physical GPUs, 4 Logical GPUs
physical_devs [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:2', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:3', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2'

In [6]:
import ShAReD_Net.training.train_distributed as train

import ShAReD_Net.training.slim as training_slim
import ShAReD_Net.model.slim as model_slim
import ShAReD_Net.training.loss.slim as loss_slim



In [7]:
from ShAReD_Net.configure import config

config.dataset.IMG_PATH = "/dataset/jta/images_jpg"
config.dataset.ANNO_PATH = "/dataset/jta/new_image_annotations"

config.checkpoint.path = "/tf/pose3D/checkpoints/run16"
config.tensorboard.path = "/tf/pose3D/logdir/run16"


In [8]:
import ShAReD_Net.data.transform.transform as transform



def create_train_dataset(per_replica_batch_size):
    data_split = "train"
    with tf.device("/cpu:0"):
        train_ds = transform.create_dataset(data_split, config.training.batch_size).shuffle(500).prefetch(100)
    return train_ds

def create_test_dataset(per_replica_batch_size):
    data_split = "test"
    with tf.device("/cpu:0"):
        train_ds = transform.create_dataset(data_split, config.training.batch_size).prefetch(10)
    return train_ds


In [9]:
def create_opt():
    #opt = tf.keras.optimizers.SGD(learning_rate = config.training.learning_rate)
    #opt = tf.keras.optimizers.Nadam(learning_rate = config.training.learning_rate, epsilon=0.001)
    opt = tf.keras.optimizers.Adam(learning_rate = config.training.learning_rate, epsilon=0.001)
    return opt

In [10]:
def train_loss():
    loss = training_slim.SlimTrainingLoss()
    return loss

def eval_loss():
    loss = training_slim.SlimTrainingLoss()
    return loss
    
def train_model():
    
    low_level_extractor = model_slim.LowLevelExtractor(color_channel=13, texture_channel=16, texture_compositions=16, out_channel=32)

    encoder = model_slim.Encoder(dense_blocks_count=2, dense_filter_count=16)
    
    pos_decoder = model_slim.PosDecoder(dense_blocks_count=3, dense_filter_count=8)
    
    pose_decoder = model_slim.PoseDecoder(keypoints=config.model.output.keypoints, z_bins=config.model.z_bins, dense_blocks_count=2, dense_filter_count=16)
    
    model = training_slim.SlimTrainingModel(low_level_extractor, encoder, pos_decoder, pose_decoder)    
    
    return model

def create_checkpoint(step, optimizer, train_model, train_loss):
    nets = {"low_level_extractor":train_model.low_level_extractor,
            "encoder":train_model.encoder,
            "pos_decoder":train_model.pos_decoder,
            "pose_decoder":train_model.pose_decoder,
            "loss_agg":train_loss.loss_agg,
           }
    ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, **nets)
    manager = tf.train.CheckpointManager(ckpt, config.checkpoint.path, max_to_keep=50)
    return ckpt, manager

In [11]:
import ShAReD_Net.model.modules.slim as slim_modules

roi_size_img = np.asarray(config.model.roi_size) * config.model.img_downsampling + 1
        
roi_extractor = slim_modules.Roi_Extractor(roi_size=roi_size_img)

def calc_img_index(roiindices):
    new_indexes = roiindices * config.model.img_downsampling
    return new_indexes
    

In [12]:

def print_loss(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    
    extra_loss_sum = tf.reduce_sum(extra_loss) / 10
       
    tf.print("On", dev)
    tf.print("detection_loss", detection_loss_sum)
    
    tf.print("estimator_loss_xy", loss_pos_xy_sum)
    tf.print("estimator_loss_z", loss_pos_z_sum)
    
    tf.print("estimator_loss_var_xy", loss_var_xy_sum)
    tf.print("estimator_loss_var_z", loss_var_z_sum)
    
    tf.print("extra_loss_sum", extra_loss_sum)


def simple_summery(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    tf.print("simple_summery")  
    
    loss_per_batch_sum = tf.reduce_sum(loss_per_batch)
    
    extra_loss_sum = tf.reduce_sum(extra_loss)
    
    tf.summary.scalar(f"detection_loss", detection_loss_sum)
    
    tf.summary.scalar(f"loss_pos_xy_sum", loss_pos_xy_sum)
    tf.summary.scalar(f"loss_pos_z_sum", loss_pos_z_sum)
    
    tf.summary.scalar(f"loss_var_xy_sum", loss_var_xy_sum)
    tf.summary.scalar(f"loss_var_z_sum", loss_var_z_sum)
    
    tf.summary.scalar(f"extra_loss", extra_loss_sum)
    
    tf.summary.scalar(f"agg_loss", loss_per_batch_sum)
    
    mean_grad = tf.zeros(())
    for grad in grads:
        mean_grad += tf.reduce_mean(tf.abs(grad))
    mean_grad /= len(grads)
    
    max_grad = tf.zeros(())
    for grad in grads:
        max_grad = tf.math.maximum(tf.reduce_max(tf.abs(grad)),max_grad)
        
    tf.summary.scalar(f"max_grad", max_grad)
    
    tf.summary.scalar(f"mean_grad", mean_grad)
    
    for k in range(config.model.output.keypoints):
        tf.summary.scalar(f"pose x, kp {k}", tf.reduce_mean(tf.abs(poses_xyz[:,k,0]-pose_xyz_gt[:,k,0])))
        tf.summary.scalar(f"pose y, kp {k}", tf.reduce_mean(tf.abs(poses_xyz[:,k,0]-pose_xyz_gt[:,k,1])))
        tf.summary.scalar(f"pose z, kp {k}", tf.reduce_mean(tf.abs(poses_xyz[:,k,0]-pose_xyz_gt[:,k,2])))
    
    
def complex_summery(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    tf.print("complex_summery")
    
    tf.summary.image("image", img, max_outputs=4)
    
    pos_hm_gt_near = pos_hm_gt[...,0,None]
    pos_hm_gt_fare = pos_hm_gt[...,1,None]
    tf.summary.image("pos_hm_gt_near", pos_hm_gt_near, max_outputs=4)
    tf.summary.image("pos_hm_gt_fare", pos_hm_gt_fare, max_outputs=4)
    
    pos_hm_near = pos_hm[...,0,None]
    pos_hm_fare = pos_hm[...,1,None]
    tf.summary.image("pos_hm_near", pos_hm_near, max_outputs=4)
    tf.summary.image("pos_hm_fare", pos_hm_fare, max_outputs=4)
    
    new_indexes = calc_img_index(roi_indexes)
    pose_imges = roi_extractor([img, new_indexes])
    tf.summary.image("pose_imges", pose_imges, max_outputs=4)
    tf.summary.image(f"pose_hm", tf.reduce_sum(pose_prob_map_xy,axis=1), max_outputs=4)
    
    pose_prob_map_xy = tf.unstack(pose_prob_map_xy, axis=1)
    for i, pose_hm in enumerate(pose_prob_map_xy):
        tf.summary.image(f"pose_hm for keypoint {i}", pose_hm, max_outputs=2)
    
    pose_prob_maps_z = tf.unstack(pose_prob_maps_z, axis=1)
    for i, z_slice in enumerate(pose_prob_maps_z):
        tf.summary.image(f"z_slice for keypoint {i}", z_slice[...,None,None], max_outputs=2)


In [13]:
def eval_summery(dev, step, batch, output, loss, eval_loss):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
        
    tf.print("eval_summery")  
    
    loss_per_batch_sum = tf.reduce_sum(loss_per_batch)
        
    tf.summary.scalar(f"detection_loss", detection_loss_sum)
    
    tf.summary.scalar(f"loss_pos_xy_sum", loss_pos_xy_sum)
    tf.summary.scalar(f"loss_pos_z_sum", loss_pos_z_sum)
    
    tf.summary.scalar(f"loss_var_xy_sum", loss_var_xy_sum)
    tf.summary.scalar(f"loss_var_z_sum", loss_var_z_sum)
        
    tf.summary.scalar(f"agg_loss", loss_per_batch_sum)

def complex_eval_summery(dev, step, batch, output, loss, eval_loss):
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    
    tf.print("complex_eval_summery")
    
    tf.summary.image("image", img, max_outputs=4)
    
    pos_hm_gt_near = pos_hm_gt[...,0,None]
    pos_hm_gt_fare = pos_hm_gt[...,1,None]
    tf.summary.image("pos_hm_gt_near", pos_hm_gt_near, max_outputs=4)
    tf.summary.image("pos_hm_gt_fare", pos_hm_gt_fare, max_outputs=4)
    
    pos_hm_near = pos_hm[...,0,None]
    pos_hm_fare = pos_hm[...,1,None]
    tf.summary.image("pos_hm_near", pos_hm_near, max_outputs=4)
    tf.summary.image("pos_hm_fare", pos_hm_fare, max_outputs=4)
    
    new_indexes = calc_img_index(roi_indexes)
    pose_imges = roi_extractor([img, new_indexes])
    tf.summary.image("pose_imges", pose_imges, max_outputs=4)
    tf.summary.image(f"pose_hm", tf.reduce_sum(pose_prob_map_xy,axis=1), max_outputs=4)

In [14]:
def save_checkpoint(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    def save():
        save_path = manager.save(int(step))
        print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
    tf.py_function(save,[], [])

def finalize(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    def run():
        print("Finalized")
        tf.Graph.finalize(tf.compat.v1.get_default_graph())
    tf.py_function(run,[], [])


def count_params(dev, step, batch, output, loss, extra_loss, ckpt, manager, train_model, grads):
    print(train_model.count_params())



In [15]:
def batching(dataset, batch_size):
    batched_ds = dataset.batch(batch_size)

    def unragg(img, pos_stuff, roi_indexes, pose_stuff):
        rel_pose, pose_indexes = pose_stuff
        pos_heatmap, weights = pos_stuff

        def unragg_roi_indexes(indexes, row_length):
            new_indexes = np.empty([indexes.shape[0], indexes.shape[1]+1], dtype=np.int32)
            new_indexes[:,1:] = indexes
            i = 0
            b = 0
            for length in row_length[1]:
                index = indexes[i:int(i+length)]
                new_indexes[i:int(i+length),0] = b
                i += length
                b += 1
                
            return new_indexes
        
        def unragg_pose_indexes(indexes, row_length):
            new_indexes = np.empty([indexes.shape[0], indexes.shape[1], indexes.shape[-1]+1], dtype=np.int32)
            new_indexes[:,:,1:] = indexes
            i = 0
            b = 0
            for length in row_length[1]:
                index = indexes[i:int(i+length)]
                new_indexes[i:int(i+length),:,0] = b
                i += length
                b += 1
                
            return new_indexes

        roi_indexes_flat = tf.numpy_function(unragg_roi_indexes, [roi_indexes.flat_values, roi_indexes.nested_row_lengths()], Tout=roi_indexes.dtype)
        pose_indexes_flat = tf.numpy_function(unragg_pose_indexes, [pose_indexes.flat_values, pose_indexes.nested_row_lengths()], Tout=pose_indexes.dtype)

        rel_pose_flat =  rel_pose.flat_values

        return img, (pos_heatmap, weights), roi_indexes_flat, (rel_pose_flat, pose_indexes_flat)

    unragged_ds = batched_ds.map(unragg).prefetch(100)
    return unragged_ds


In [16]:
def input_pre(batch):
    img, (pos_heatmap, weights), roi_indexes, (rel_pose, pose_indexes) = batch
    return img, roi_indexes, pose_indexes
    
def loss_pre(output, batch):
    poses_xyz, pos_hm, (pose_prob_map_xy, pose_prob_maps_z) = output
    img, (pos_hm_gt, loss_weights), roi_indexes, (pose_xyz_gt, pose_indexes) = batch
    return (poses_xyz, pose_xyz_gt, pose_prob_map_xy, pose_prob_maps_z), (pos_hm, pos_hm_gt, loss_weights)


lc_extra = training_slim.LossClipping(name="extra")
def grad_pre(loss, extra_loss, batch, optimizer, train_model, train_loss):
    loss_per_batch, detection_loss_sum, (loss_pos_xy_sum, loss_var_xy_sum), (loss_pos_z_sum, loss_var_z_sum) = loss    
    
    extra_loss = lc_extra(extra_loss)
    
    extra_loss_sum = tf.reduce_sum(extra_loss) * 100
    
    loss_per_batch_sum = loss_per_batch
        
    trainable_vars =  train_model.low_level_extractor.trainable_variables + train_model.encoder.trainable_variables + train_model.pos_decoder.trainable_variables + train_model.pose_decoder.trainable_variables+ train_loss.trainable_variables
        
    return loss_per_batch, extra_loss_sum, trainable_vars



In [17]:
dist_strat = tf.distribute.MirroredStrategy(cross_device_ops = tf.distribute.HierarchicalCopyAllReduce())

steps = 100000

step_callbacks = train.standart_callbacks()
step_callbacks.every_steps[20] = print_loss
step_callbacks.every_steps[250] = save_checkpoint
step_callbacks.every_steps[10] = simple_summery
step_callbacks.every_steps[50] = complex_summery

step_callbacks.every_eval_steps[200] = eval_summery
step_callbacks.every_eval_steps[400] = complex_eval_summery

step_callbacks.at_step[1] = count_params
step_callbacks.at_step[2] = finalize

step_callbacks.make_batches = batching

step_callbacks.grad_pre = grad_pre
step_callbacks.input_pre = input_pre
step_callbacks.loss_pre = loss_pre

step_callbacks.create_train_dataset = create_train_dataset
step_callbacks.create_test_dataset = create_test_dataset

step_callbacks.create_ckpt = create_checkpoint

step_callbacks.create_loss = train_loss
step_callbacks.create_eval_loss = eval_loss

step_callbacks.create_model = train_model
step_callbacks.create_opt = create_opt


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


In [None]:
train.train(steps, dist_strat, batch_size = config.training.batch_size, learning_rate=config.training.learning_rate, callbacks = step_callbacks)


Roi_Extractor [TensorShape([1, None, None, 3]), TensorShape([None, 3])]
Restored from /tf/pose3D/checkpoints/run16/ckpt-1750
Instructions for updating:
renamed to `run`
SlimInferenzModel (TensorShape([None, 421, 421, 3]), TensorShape(None), TensorShape(None))
Extractor (None, None, None, 3)
Encoder (None, None, None, 32)
stage1 [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
big_shared1 [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
scale_down (None, None, None, 32)
ResAttention [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
Attention [TensorShape([None, None, None, 32]), TensorShape([None, None, None, 32])]
DenseModule (None, None, None, 128)
DenseBlock (None, None, None, 24)
BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
scale_up (None, None, None, 56)
big_normal (None, None, None, 56)
normal_shared1 [TensorShape([None, None, No

DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
scale_up (None, None, None, 56)
scale_1_4_res (None, None, None, 32)
scale_1_4_shc (None, None, None, 56)
stage3 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
big_shared1 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
Attention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
DenseModule (None, None, None, 256)
DenseBlock (None, None, None, 24)
BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
scale_up (None, None, None, 56)
big_normal (None, None, None, 56)
normal_shared1 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 56])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([None, None, None, 64]), TensorS

scale_up (None, None, None, 48)
small_medium (None, None, None, 48)
medium_shared3 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
Attention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
DenseModule (None, None, None, 256)
DenseBlock (None, None, None, 24)
BnDoConfReluConfRelu (None, None, None, 24)
DenseBlock (None, None, None, 32)
BnDoConfReluConfRelu (None, None, None, 32)
DenseBlock (None, None, None, 40)
BnDoConfReluConfRelu (None, None, None, 40)
scale_up (None, None, None, 48)
medium_normal (None, None, None, 48)
normal_shared3 [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
scale_down (None, None, None, 64)
ResAttention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]
Attention [TensorShape([None, None, None, 64]), TensorShape([None, None, None, 96])]

DenseBlock (None, 35, 35, 40)
BnDoConfReluConfRelu (None, 35, 35, 40)
scale_up (None, 35, 35, 56)
SelfShAReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
ShAReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
ResAttention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
Attention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 56])]
ShReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 210])]
DenseModule (None, 35, 35, 280)
DenseBlock (None, 35, 35, 24)
BnDoConfReluConfRelu (None, 35, 35, 24)
DenseBlock (None, 35, 35, 40)
BnDoConfReluConfRelu (None, 35, 35, 40)
Attention [TensorShape([None, 35, 35, 56]), TensorShape([None, 35, 35, 56])]
ShAReD [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 112])]
ResAttention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 112])]
Attention [TensorShape([None, 35, 35, 70]), TensorShape([None, 35, 35, 112])]
ShReD [TensorShape([None, 35, 35, 70

tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:0']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:1']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:2']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:3']]
INFO:tensorflow:batch_all_reduce: 1534 all-reduces with algorithm = hierarchical_copy, num_packs = 1
Instructions for updating:
Use fn_output_signature instead
Roi_Extractor [TensorShape([None, 421, 421, 3]), TensorShape(None)]
4261709
4261709
4261709
4261709
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:0']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:1']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:2']]
tracing gradients on [['/job:localhost/replica:0/task:0/device:GPU:3']]
INFO:tensorflow:batch_all_reduce: 1534 all-reduces with algorithm = hierarchical_copy, num_packs = 1
4261709
4261709
4261709
4261709
Step: 1751
Step: 1751
Step: 175

detection_loss 455.918457
estimator_loss_xy 5706.1084
estimator_loss_z 237.112
estimator_loss_var_xy 400.359344
estimator_loss_var_z 131.62233
extra_loss_sum 0.0202812441
simple_summery
simple_summery
simple_summery
Step: 1821
Step: 1821
Step: 1821
Step: 1821
Step: 1822
Step: 1822
Step: 1822
Step: 1822
Step: 1823
Step: 1823
Step: 1823
Step: 1823
Step: 1824
Step: 1824
Step: 1824
Step: 1824
Step: 1825
Step: 1825
Step: 1825
Step: 1825
Step: 1826
Step: 1826
Step: 1826
Step: 1826
Step: 1827
Step: 1827
Step: 1827
Step: 1827
Step: 1828
Step: 1828
Step: 1828
Step: 1828
Step: 1829
Step: 1829
Step: 1829
Step: 1829
Step: 1830
simple_summery
Step: 1830
Step: 1830
Step: 1830
simple_summery
simple_summery
simple_summery
Step: 1831
Step: 1831
Step: 1831
Step: 1831
Step: 1832
Step: 1832
Step: 1832
Step: 1832
Step: 1833
Step: 1833
Step: 1833
Step: 1833
Step: 1834
Step: 1834
Step: 1834
Step: 1834
Step: 1835
Step: 1835
Step: 1835
Step: 1835
Step: 1836
Step: 1836
Step: 1836
Step: 1836
Step: 1837
Step: 183

Step: 1905
Step: 1905
Step: 1905
Step: 1906
Step: 1906
Step: 1906
Step: 1906
Step: 1907
Step: 1907
Step: 1907
Step: 1907
Step: 1908
Step: 1908
Step: 1908
Step: 1908
Step: 1909
Step: 1909
Step: 1909
Step: 1909
Step: 1910
simple_summery
Step: 1910
Step: 1910
Step: 1910
simple_summery
simple_summery
simple_summery
Step: 1911
Step: 1911
Step: 1911
Step: 1911
Step: 1912
Step: 1912
Step: 1912
Step: 1912
Step: 1913
Step: 1913
Step: 1913
Step: 1913
Step: 1914
Step: 1914
Step: 1914
Step: 1914
Step: 1915
Step: 1915
Step: 1915
Step: 1915
Step: 1916
Step: 1916
Step: 1916
Step: 1916
Step: 1917
Step: 1917
Step: 1917
Step: 1917
Step: 1918
Step: 1918
Step: 1918
Step: 1918
Step: 1919
Step: 1919
Step: 1919
Step: 1919
Step: 1920
simple_summery
Step: 1920
Step: 1920
Step: 1920
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 678.541809
estimator_loss_xy 13012.9727
estimator_loss_z 2092.82764
estimator_loss_var_xy 944.055847
estimator_loss_var_z 2027.41638
extra_loss_sum 0.0187889561
On

Step: 1994
Step: 1995
Step: 1995
Step: 1995
Step: 1995
Step: 1996
Step: 1996
Step: 1996
Step: 1996
Step: 1997
Step: 1997
Step: 1997
Step: 1997
Step: 1998
Step: 1998
Step: 1998
Step: 1998
Step: 1999
Step: 1999
Step: 1999
Step: 1999
Step: 2000
simple_summery
Step: 2000
Step: 2000
Step: 2000
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 260.053
estimator_loss_xy 8961.26074
estimator_loss_z 1989.11401
estimator_loss_var_xy 550.908325
estimator_loss_var_z 1667.7605
extra_loss_sum 0.0177222602
complex_summery
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica

estimator_loss_var_xy 815.147888
estimator_loss_var_z 1536.79529
extra_loss_sum 0.0172329415
On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 132.433594
estimator_loss_xy 22675.7695
estimator_loss_z 3147.91919
estimator_loss_var_xy 839.050964
estimator_loss_var_z 2866.54565
extra_loss_sum 0.0172329415
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 147.557831
estimator_loss_xy 4680.68701
estimator_loss_z 750.214844
estimator_loss_var_xy 233.01207
estimator_loss_var_z 568.552063
extra_loss_sum 0.0172329415
simple_summery
simple_summery
simple_summery
Step: 2061
Step: 2061
Step: 2061
Step: 2061
Step: 2062
Step: 2062
Step: 2062
Step: 2062
Step: 2063
Step: 2063
Step: 2063
Step: 2063
Step: 2064
Step: 2064
Step: 2064
Step: 2064
Step: 2065
Step: 2065
Step: 2065
Step: 2065
Step: 2066
Step: 2066
Step: 2066
Step: 2066
Step: 2067
Step: 2067
Step: 2067
Step: 2067
Step: 2068
Step: 2068
Step: 2068
Step: 2068
Step: 2069
Step: 2069
Step: 2069
Step: 2069
Step: 2

estimator_loss_var_z 1189.05786
extra_loss_sum 0.0166275632
simple_summery
simple_summery
simple_summery
Step: 2141
Step: 2141
Step: 2141
Step: 2141
Step: 2142
Step: 2142
Step: 2142
Step: 2142
Step: 2143
Step: 2143
Step: 2143
Step: 2143
Step: 2144
Step: 2144
Step: 2144
Step: 2144
Step: 2145
Step: 2145
Step: 2145
Step: 2145
Step: 2146
Step: 2146
Step: 2146
Step: 2146
Step: 2147
Step: 2147
Step: 2147
Step: 2147
Step: 2148
Step: 2148
Step: 2148
Step: 2148
Step: 2149
Step: 2149
Step: 2149
Step: 2149
Step: 2150
simple_summery
Step: 2150
Step: 2150
Step: 2150
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
Step: 2151
Step: 2151
Step: 2151
Step: 2151
Step: 2152
Step: 2152
Step: 2152
Step: 2152
Step: 2153
Step: 2153
Step: 2153
Step: 2153
Step: 2154
Step: 2154
Step: 2154
Step: 2154
Step: 2155
Step: 2155
Step: 2155
Step: 2155
Step: 2156
Step: 2156
Step: 2156
Step: 2156
Step: 2157
Step: 2157
Step: 2157
Step: 2157
Step: 2158
Step: 2158
S

Step: 2228
Step: 2228
Step: 2228
Step: 2228
Step: 2229
Step: 2229
Step: 2229
Step: 2229
Step: 2230
simple_summery
Step: 2230
Step: 2230
Step: 2230
simple_summery
simple_summery
simple_summery
Step: 2231
Step: 2231
Step: 2231
Step: 2231
Step: 2232
Step: 2232
Step: 2232
Step: 2232
Step: 2233
Step: 2233
Step: 2233
Step: 2233
Step: 2234
Step: 2234
Step: 2234
Step: 2234
Step: 2235
Step: 2235
Step: 2235
Step: 2235
Step: 2236
Step: 2236
Step: 2236
Step: 2236
Step: 2237
Step: 2237
Step: 2237
Step: 2237
Step: 2238
Step: 2238
Step: 2238
Step: 2238
Step: 2239
Step: 2239
Step: 2239
Step: 2239
Step: 2240
simple_summery
Step: 2240
Step: 2240
Step: 2240
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 216.283661
estimator_loss_xy 5869.15625
estimator_loss_z 1157.91443
estimator_loss_var_xy 420.186127
estimator_loss_var_z 850.854492
extra_loss_sum 0.0161465351
On [['/job:localhost/replica:0/task:0/device:GPU:1']]
detection_loss 613.005371
estimator_loss_xy 4407.18
estimator_loss_z 

Step: 2310
Step: 2310
simple_summery
simple_summery
simple_summery
Step: 2311
Step: 2311
Step: 2311
Step: 2311
Step: 2312
Step: 2312
Step: 2312
Step: 2312
Step: 2313
Step: 2313
Step: 2313
Step: 2313
Step: 2314
Step: 2314
Step: 2314
Step: 2314
Step: 2315
Step: 2315
Step: 2315
Step: 2315
Step: 2316
Step: 2316
Step: 2316
Step: 2316
Step: 2317
Step: 2317
Step: 2317
Step: 2317
Step: 2318
Step: 2318
Step: 2318
Step: 2318
Step: 2319
Step: 2319
Step: 2319
Step: 2319
Step: 2320
simple_summery
Step: 2320
Step: 2320
Step: 2320
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 452.099487
estimator_loss_xy 5518.60791
estimator_loss_z 203.566116
estimator_loss_var_xy 473.406647
estimator_loss_var_z 93.8128357
extra_loss_sum 0.0152669894
On [['/job:localhost/replica:0/task:0/device:GPU:1']]
detection_loss 653.405334
estimator_loss_xy 5690.88281
estimator_loss_z 600.064148
estimator_loss_var_xy 419.790833
estimator_loss_var_z 467.152161
extra_loss_sum 0.0152669894
On [['/job:localho

simple_summery
Step: 2400
Step: 2400
Step: 2400
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 803.989746
estimator_loss_xy 2479.51904
estimator_loss_z 79.40065
estimator_loss_var_xy 186.123367
estimator_loss_var_z 30.0875721
extra_loss_sum 0.0147306565
On [['/job:localhost/replica:0/task:0/device:GPU:1']]
detection_loss 51.6444931
estimator_loss_xy 8266.94727
estimator_loss_z 1239.72546
estimator_loss_var_xy 374.276245
estimator_loss_var_z 1019.02197
extra_loss_sum 0.0147306565
On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 306.526642
estimator_loss_xy 2247.14258
estimator_loss_z 2078.58765
estimator_loss_var_xy 178.18187
estimator_loss_var_z 1883.5238
extra_loss_sum 0.0147306565
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 587.717957
estimator_loss_xy 15078.4893
estimator_loss_z 289.670074
estimator_loss_var_xy 507.337555
estimator_loss_var_z 127.619865
extra_loss_sum 0.0147306565
complex_summery
simple_summery
compl

estimator_loss_var_xy 423.692108
estimator_loss_var_z 571.304688
extra_loss_sum 0.0143029718
On [['/job:localhost/replica:0/task:0/device:GPU:2']]
detection_loss 803.843872
estimator_loss_xy 14970.4053
estimator_loss_z 630.731934
estimator_loss_var_xy 780.342712
estimator_loss_var_z 413.33252
extra_loss_sum 0.0143029718
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 221.040451
estimator_loss_xy 8232.54102
estimator_loss_z 1094.19409
estimator_loss_var_xy 484.441315
estimator_loss_var_z 827.641602
extra_loss_sum 0.0143029718
simple_summery
simple_summery
simple_summery
Step: 2481
Step: 2481
Step: 2481
Step: 2481
Step: 2482
Step: 2482
Step: 2482
Step: 2482
Step: 2483
Step: 2483
Step: 2483
Step: 2483
Step: 2484
Step: 2484
Step: 2484
Step: 2484
Step: 2485
Step: 2485
Step: 2485
Step: 2485
Step: 2486
Step: 2486
Step: 2486
Step: 2486
Step: 2487
Step: 2487
Step: 2487
Step: 2487
Step: 2488
Step: 2488
Step: 2488
Step: 2488
Step: 2489
Step: 2489
Step: 2489
Step: 2489
Step: 2

detection_loss 418.761169
estimator_loss_xy 7604.6377
estimator_loss_z 290.22757
estimator_loss_var_xy 612.985046
estimator_loss_var_z 218.38739
extra_loss_sum 0.014212789
On [['/job:localhost/replica:0/task:0/device:GPU:3']]
detection_loss 322.898193
estimator_loss_xy 8304.3418
estimator_loss_z 480.859619
estimator_loss_var_xy 478.70285
estimator_loss_var_z 407.53064
extra_loss_sum 0.014212789
simple_summery
simple_summery
simple_summery
Step: 2561
Step: 2561
Step: 2561
Step: 2561
Step: 2562
Step: 2562
Step: 2562
Step: 2562
Step: 2563
Step: 2563
Step: 2563
Step: 2563
Step: 2564
Step: 2564
Step: 2564
Step: 2564
Step: 2565
Step: 2565
Step: 2565
Step: 2565
Step: 2566
Step: 2566
Step: 2566
Step: 2566
Step: 2567
Step: 2567
Step: 2567
Step: 2567
Step: 2568
Step: 2568
Step: 2568
Step: 2568
Step: 2569
Step: 2569
Step: 2569
Step: 2569
Step: 2570
simple_summery
Step: 2570
Step: 2570
Step: 2570
simple_summery
simple_summery
simple_summery
Step: 2571
Step: 2571
Step: 2571
Step: 2571
Step: 2572
St

Step: 2641
Step: 2642
Step: 2642
Step: 2642
Step: 2642
Step: 2643
Step: 2643
Step: 2643
Step: 2643
Step: 2644
Step: 2644
Step: 2644
Step: 2644
Step: 2645
Step: 2645
Step: 2645
Step: 2645
Step: 2646
Step: 2646
Step: 2646
Step: 2646
Step: 2647
Step: 2647
Step: 2647
Step: 2647
Step: 2648
Step: 2648
Step: 2648
Step: 2648
Step: 2649
Step: 2649
Step: 2649
Step: 2649
Step: 2650
simple_summery
Step: 2650
Step: 2650
Step: 2650
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
simple_summery
complex_summery
Step: 2651
Step: 2651
Step: 2651
Step: 2651
Step: 2652
Step: 2652
Step: 2652
Step: 2652
Step: 2653
Step: 2653
Step: 2653
Step: 2653
Step: 2654
Step: 2654
Step: 2654
Step: 2654
Step: 2655
Step: 2655
Step: 2655
Step: 2655
Step: 2656
Step: 2656
Step: 2656
Step: 2656
Step: 2657
Step: 2657
Step: 2657
Step: 2657
Step: 2658
Step: 2658
Step: 2658
Step: 2658
Step: 2659
Step: 2659
Step: 2659
Step: 2659
Step: 2660
simple_summery
Step: 2660
Step: 2660
Step: 2660
On [['/job:loc

Step: 2730
simple_summery
simple_summery
simple_summery
Step: 2731
Step: 2731
Step: 2731
Step: 2731
Step: 2732
Step: 2732
Step: 2732
Step: 2732
Step: 2733
Step: 2733
Step: 2733
Step: 2733
Step: 2734
Step: 2734
Step: 2734
Step: 2734
Step: 2735
Step: 2735
Step: 2735
Step: 2735
Step: 2736
Step: 2736
Step: 2736
Step: 2736
Step: 2737
Step: 2737
Step: 2737
Step: 2737
Step: 2738
Step: 2738
Step: 2738
Step: 2738
Step: 2739
Step: 2739
Step: 2739
Step: 2739
Step: 2740
simple_summery
Step: 2740
Step: 2740
Step: 2740
On [['/job:localhost/replica:0/task:0/device:GPU:0']]
detection_loss 41.9683495
estimator_loss_xy 6681.89551
estimator_loss_z 700.053223
estimator_loss_var_xy 362.30658
estimator_loss_var_z 582.312073
extra_loss_sum 0.0141569022
On [['/job:localhost/replica:0/task:0/device:GPU:1']]
detection_loss 520.326
estimator_loss_xy 4246.47656
estimator_loss_z 688.024048
estimator_loss_var_xy 283.529541
estimator_loss_var_z 208.319809
extra_loss_sum 0.0141569022
On [['/job:localhost/replica:0/ta

estimator_loss_z 228960.422
estimator_loss_xy 15820.8545

detection_loss 98.055069


estimator_loss_var_z 15143.8232
estimator_loss_var_xy 124911

cp 606