In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"
from math import ceil

import time
import ipywidgets as widgets
from IPython.display import display

import numpy as np
import tensorflow as tf
from tensorflow.python.training.summary_io import SummaryWriterCache

import graph_manager
import net
import eval_utils
import loss_utils
import tf_inputs
import tf_utils
import viz

import logging
logging.getLogger("tensorflow").setLevel(logging.WARNING)

## Base Configuration

--- 

In [2]:
data = 'stanford'

configuration = {}
if data == 'vedai':
    configuration['setting'] = 'vedai'
    configuration['exp_name'] = 'vedai'
    configuration['save_summaries_steps'] = 100
    configuration['save_evaluation_steps'] = 250
    configuration['num_epochs'] = 1000
elif data == 'stanford':
    configuration['setting'] = 'sdd'
    configuration['exp_name'] = 'sdd'
    configuration['save_summaries_steps'] = 200
    configuration['save_evaluation_steps'] = 500
    configuration['num_epochs'] = 120
    
## Metadata
tfrecords_path = '/home/aroyer/indolentDetect/Data/metadata_%s.txt'
metadata = graph_manager.load_metadata(tfrecords_path % configuration['setting'])
configuration.update(metadata)
configuration['num_classes'] = len(configuration['data_classes'])

## GPUs
configuration['num_gpus'] = 2                                 
configuration['gpu_mem_frac'] = 1.

## Inputs Pipeline
configuration['subset'] = -1
configuration['batch_size'] = 16
configuration['test_batch_size'] = 16
configuration['shuffle_buffer'] = 2000
    
## Evaluation
configuration['save_checkpoint_secs'] = 3600
configuration['retrieval_intersection_threshold'] = [0.25, 0.5, 0.75]

## Training
configuration['learning_rate'] = 1e-3
configuration['centers_localization_loss_weight'] = 1.
configuration['scales_localization_loss_weight']  = 1.
configuration['confidence_loss_weight']  = 5.
configuration['noobj_confidence_loss_weight']  = 1.
configuration['group_classification_loss_weight']  = 1.
configuration['offsets_loss_weight']  = 1.

graph_manager.finalize_configuration(configuration)

39688 training steps
...which means 121 epochs
10476 training samples (328 iters)
2619 validation samples (82 iters)

[41mConfig:[0m
[96mbatch_size:[0m 16
[96mcenters_localization_loss_weight:[0m 1.0
[96mconfidence_loss_weight:[0m 5.0
[96mdata_classes:[0m ['Biker', 'Bus', 'Car', 'Cart', 'Pedestrian', 'Skater']
[96mexp_name:[0m sdd
[96mfeature_keys:[0m ['im_id', 'num_boxes', 'bounding_boxes', 'classes']
[96mgpu_mem_frac:[0m 1.0
[96mgroup_classification_loss_weight:[0m 1.0
[96mimage_folder:[0m /home/aroyer/Datasets/sdd_images
[96mlast_test_batch_size:[0m 27
[96mlearning_rate:[0m 0.001
[96mnoobj_confidence_loss_weight:[0m 1.0
[96mnum_classes:[0m 6
[96mnum_epochs:[0m 120
[96mnum_gpus:[0m 2
[96mnum_steps:[0m 39688
[96moffsets_loss_weight:[0m 1.0
[96mretrieval_intersection_threshold:[0m [0.25, 0.5, 0.75]
[96msave_checkpoint_secs:[0m 3600
[96msave_evaluation_steps:[0m 500
[96msave_summaries_steps:[0m 200
[96mscales_localization_loss_weight:[0m 1.0

## Network
---

In [3]:
def forward_pass(inputs, 
                 outputs, 
                 configuration,
                 is_training=True,
                 reuse=False, 
                 verbose=False,
                 scope_name='model'):
    """Forward-pass in the net"""
    with tf.variable_scope(scope_name, reuse=reuse):        
        activations = net.tiny_yolo_v2(
            inputs["image"], is_training=is_training, reuse=reuse, verbose=verbose, **configuration)
        net.get_detection_with_groups_outputs(
            activations, outputs, reuse=reuse, verbose=verbose, **configuration)
            
            
def train_pass(inputs, configuration, intermediate_stage=False, is_chief=False):
    """ Compute outputs of the net and add losses to the graph.
    """
    outputs = {}
    base_name = graph_manager.get_defaults(configuration, ['base_name'], verbose=is_chief)[0]
    if is_chief: print(' \033[34m%s:\033[0m' % base_name)
        
    # Feed forward
    with tf.name_scope('%s/net' % base_name):
        forward_pass(inputs, outputs, configuration, scope_name=base_name, 
                     is_training=True, reuse=not is_chief, verbose=is_chief) 
        
    # Compute crops to feed to the next stage
    if intermediate_stage:
        with tf.name_scope('extract_patches'):
            tf_inputs.extract_groups(inputs, outputs, mode='train', verbose=is_chief, **configuration)  
        
    # Add losses
    with tf.name_scope('%s/loss' % base_name):
        if intermediate_stage:
            loss_fn = loss_utils.get_odgi_loss
        else:
            loss_fn = loss_utils.get_standard_loss
        graph_manager.add_losses_to_graph(
            loss_fn, inputs, outputs, configuration, is_chief=is_chief, verbose=is_chief)
        
    if is_chief:
        print('\n'.join("    \033[32m%s\033[0m: shape=%s, dtype=%s" % (
            key, value.get_shape().as_list(), value.dtype) for key, value in outputs.items()))
    return outputs


def feed_pass(inputs, outputs, configuration, mode='train', is_chief=False):
    """
        Args:
            inputs: inputs dictionnary
            outputs: outputs dictionnary
            configuration: config dictionnary
        
        Returns:
            Dictionnary of inputs for the next stage
    """
    if is_chief: print(' \033[34mextract patches:\033[0m')
    return graph_manager.get_stage2_inputs(
        inputs, outputs['crop_boxes'], mode=mode, verbose=is_chief, **configuration)
        
    
def eval_pass_intermediate_stage(inputs, configuration, metrics_to_norms, clear_metrics_op, 
                                 update_metrics_op, device=0, is_chief=False):
    """ Evaluation pass for intermediate stages."""
    outputs = {}
    base_name = graph_manager.get_defaults(configuration, ['base_name'], verbose=is_chief)[0]
    if is_chief: print(' \033[34m%s:\033[0m' % base_name)
        
    # Feed forward
    with tf.name_scope('%s/net' % base_name):
        forward_pass(inputs, outputs, configuration, scope_name=base_name, is_training=False, 
                     reuse=True, verbose=is_chief) 
        
    # Compute crops to feed to the next stage
    with tf.name_scope('extract_patches'):
        tf_inputs.extract_groups(inputs, outputs, mode='test', verbose=is_chief, **configuration)        
        
    with tf.name_scope('%s/eval' % base_name):
        # Add number of samples counter
        graph_manager.add_metrics_to_graph(
            eval_utils.get_samples_running_counters, inputs, outputs, metrics_to_norms, clear_metrics_op, 
            update_metrics_op, configuration, device=device, verbose=is_chief) 
        # Add metrics
        graph_manager.add_metrics_to_graph(
            eval_utils.get_odgi_eval, inputs, outputs, metrics_to_norms, clear_metrics_op, 
            update_metrics_op, configuration, device=device, verbose=is_chief)     
        
    return outputs    


def eval_pass_final_stage(stage2_inputs, stage1_inputs, stage1_outputs, configuration, metrics_to_norms, 
                          clear_metrics_op, update_metrics_op, device=0, is_chief=False):
    """ Evaluation for the full pipeline.
        Args:
            stage2_inputs: inputs dictionnary for stage2
            stage1_inputs: inputs dictionnary for stage 1
            stage1_outputs: outputs dictionnary for stage1
            configuration: config dictionnary
            metrics_to_norms: Map metrics key to normalizer key
            clear_metrics_op: List to be updated with reset operations
            update_metrics_op: List to be updated with update operation
            device: Current device number to be used in the variable scope for each metric
        
        Returns:
            Dictionnary of outputs, merge by image
            Dictionnary of unscaled ouputs (for summary purposes)
    """
    base_name = graph_manager.get_defaults(configuration, ['base_name'], verbose=is_chief)[0]
    if is_chief: print(' \033[34m%s:\033[0m' % base_name)
    outputs = {}
    
    # Feed forward
    with tf.name_scope('net'):
        forward_pass(stage2_inputs, outputs, configuration, scope_name=base_name,
                     is_training=False, reuse=True, verbose=is_chief) 
            
    # Reshape outputs from stage2 to stage1
    # for summary
    unscaled_outputs = {key: outputs[key] for key in ['bounding_boxes', 'detection_scores']} 
    with tf.name_scope('reshape_outputs'):
        crop_boxes = stage1_outputs["crop_boxes"]  
        num_crops = crop_boxes.get_shape()[1].value
        num_boxes = outputs['bounding_boxes'].get_shape()[-2].value
        batch_size = graph_manager.get_defaults(configuration, ['test_batch_size'], verbose=is_chief)[0]
        # outputs:  (stage1_batch * num_crops, num_cell, num_cell, num_boxes, ...)
        # to: (stage1_batch, num_cell, num_cell, num_boxes * num_crops, ...)
        for key, value in outputs.items():
            shape = tf.shape(value)
            batches = tf.split(value, batch_size, axis=0)
            batches = [tf.concat(tf.unstack(b, num=num_crops, axis=0), axis=2) for b in batches]
            outputs[key] = tf.stack(batches, axis=0)
    
    # Rescale bounding boxes from stage2 to stage1
    with tf.name_scope('rescale_bounding_boxes'):
        # crop_boxes: (stage1_batch, 1, 1, num_crops * num_boxes, 4)
        crop_boxes = tf.reshape(crop_boxes, (batch_size, num_crops, 1, 4))
        crop_boxes = tf.tile(crop_boxes, (1, 1, num_boxes, 1))
        crop_boxes = tf.reshape(crop_boxes, (batch_size, 1, 1, num_crops * num_boxes, 4))
        crop_mins, crop_maxs = tf.split(crop_boxes, 2, axis=-1)
        # bounding_boxes: (stage1_batch, num_cells, num_cells, num_crops * num_boxes, 4)
        bounding_boxes = outputs['bounding_boxes']
        bounding_boxes *= tf.maximum(1e-8, tf.tile(crop_maxs - crop_mins, (1, 1, 1, 1, 2)))
        bounding_boxes += tf.tile(crop_mins, (1, 1, 1, 1, 2))
        bounding_boxes = tf.clip_by_value(bounding_boxes, 0., 1.)
        outputs['bounding_boxes'] = bounding_boxes
            
    ## Add the additional bounding boxes outputs propagated from earlier stages
    if 'added_bounding_boxes' in stage1_outputs:
        assert 'added_detection_scores' in stage1_outputs
        outputs['bounding_boxes'] = tf_utils.flatten_percell_output(outputs['bounding_boxes'])
        outputs['bounding_boxes'] = tf.concat([outputs['bounding_boxes'],
                                               stage1_outputs['added_bounding_boxes']], axis=1)
        outputs['detection_scores'] = tf_utils.flatten_percell_output(outputs['detection_scores'])
        outputs['detection_scores'] = tf.concat([outputs['detection_scores'],
                                                stage1_outputs['added_detection_scores']], axis=1)
        
    # Evaluate `output` versus the initial (stage 1) inputs
    with tf.name_scope('eval'):
        graph_manager.add_metrics_to_graph(
            eval_utils.get_standard_eval, stage1_inputs, outputs, metrics_to_norms, clear_metrics_op, 
            update_metrics_op, configuration, device=device, verbose=is_chief)
    return outputs, unscaled_outputs

## Multistage YOLO

---

In [4]:
########################################################################## Config
multistage_configuration = configuration.copy()
multistage_configuration['full_image_size'] = 1024
multistage_configuration['num_boxes'] = 1
stage1_configuration = multistage_configuration.copy()
stage2_configuration = multistage_configuration.copy()

# Inputs sizes
stage1_configuration['image_size'] = 512
stage2_configuration['image_size'] = stage1_configuration['image_size'] // 2

# Finalize
# stage 1
stage1_configuration['num_boxes'] = 1
stage1_configuration['base_name'] = 'stage1'
stage1_configuration['with_groups'] = True
stage1_configuration['with_group_flags'] = True
stage1_configuration['with_offsets'] = True
graph_manager.finalize_grid_offsets(stage1_configuration)

# stage 2
stage2_configuration['num_boxes'] = 1
stage2_configuration['base_name'] = 'stage2'
graph_manager.finalize_grid_offsets(stage2_configuration, finalize_retrieval_top_n=False)
multistage_configuration['exp_name'] += '/odgi_%d_%d' % (stage1_configuration['image_size'], 
                                                         stage2_configuration['image_size'])

# Compute the final number of outputs (need to define k in topk for final evaluation)
num_outputs_stage1 = (stage1_configuration['num_boxes'] *  stage1_configuration['num_cells'][0] * 
                      stage1_configuration['num_cells'][1])
num_outputs_stage2 = (stage2_configuration['num_boxes'] * stage2_configuration['num_cells'][0] * 
                      stage2_configuration['num_cells'][1])
num_crops, retrieval_top_n = graph_manager.get_defaults(
    stage2_configuration, ['test_num_crops', 'retrieval_top_n'], verbose=False)
num_outputs_final = num_crops * num_outputs_stage2 + num_outputs_stage1
stage2_configuration['retrieval_top_n'] = min(retrieval_top_n, num_outputs_final)
print('Retrieval top k = %d (final)' % stage2_configuration['retrieval_top_n'])
    

with tf.Graph().as_default() as graph:          
    ########################################################################## Train graph
    with tf.name_scope('train'):
        print('\n\033[44mLoad inputs:\033[0m')
        inputs = graph_manager.get_inputs(mode='train', verbose=True, **stage1_configuration)   
        
        print('\n\033[43mTrain Graph:\033[0m')
        viz.display_graph_size('inputs(train)')        
        for i, train_inputs in enumerate(inputs):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('dev%d' % i):
                    is_chief = (i == 0)
                    train_s1_outputs = train_pass(train_inputs, stage1_configuration, 
                                                  intermediate_stage=True, is_chief=is_chief)    
                    train_s2_inputs = feed_pass(train_inputs, train_s1_outputs, stage2_configuration,
                                                mode='train', is_chief=is_chief)
                    train_s2_outputs = train_pass(train_s2_inputs, stage2_configuration,
                                                  intermediate_stage=False, is_chief=is_chief) 
                    if is_chief:
                        print(' \033[34msummaries:\033[0m')
                        graph_manager.add_summaries(train_inputs, train_s1_outputs, mode='train', 
                                                    family="train_stage1", **stage1_configuration)
                        graph_manager.add_summaries(train_s2_inputs, train_s2_outputs, mode='train', verbose=0,
                                                    family="train_stage2", **stage2_configuration)
            viz.display_graph_size('train net (gpu:%d)' % i)

        # Training Objective
        with tf.name_scope('losses'):
            losses = graph_manager.get_total_loss(splits=['stage1', 'stage2'])            
            full_loss = tf.add_n([x[0] for x in losses])
        viz.display_graph_size('full loss')

        # Train op    
        with tf.name_scope('train_op'):   
            global_step, train_op = graph_manager.get_train_op(losses, **multistage_configuration)
        viz.display_graph_size('train op')
        
        # Additional info
        with tf.name_scope('config_summary'):
            viz.add_text_summaries(stage1_configuration, family="stage1") 
            viz.add_text_summaries(stage2_configuration, family="stage2") 
            print('\n\033[43mLosses:\033[0m')
            print('\n'.join(["    \033[35m%s:\033[0m %s tensors" % (x, len(tf.get_collection(x)))  
                            for x in tf.get_default_graph().get_all_collection_keys() 
                            if x.endswith('_loss')]))
            
    ##########################################################################  Evaluation graph
    with tf.name_scope('eval'):        
        print('\n\033[43mVal Graph:\033[0m')
        update_metrics_op = []    # Store operations to update the metrics
        clear_metrics_op = []     # Store operations to reset the metrics
        metrics_to_norms = {}
        inputs = graph_manager.get_inputs(mode='test', verbose=False, **stage1_configuration)         
        viz.display_graph_size('inputs(test)')            
        
        for i, val_inputs in enumerate(inputs):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('dev%d' % i):
                    is_chief = (i == 0)
                    val_s1_outputs = eval_pass_intermediate_stage(
                        val_inputs, stage1_configuration, metrics_to_norms, clear_metrics_op,
                        update_metrics_op, device=i, is_chief=is_chief) 
                    val_s2_inputs = feed_pass(val_inputs, val_s1_outputs, stage2_configuration,
                                              mode='test', is_chief=is_chief)
                    val_s2_outputs, val_s2_unscaled_outputs = eval_pass_final_stage(
                        val_s2_inputs, val_inputs,  val_s1_outputs, stage2_configuration, metrics_to_norms, 
                        clear_metrics_op, update_metrics_op, device=i, is_chief=is_chief)
                    
                    if is_chief:
                        with tf.name_scope('stage1'):
                            graph_manager.add_summaries(val_inputs, val_s1_outputs, mode='test', 
                                                        **stage1_configuration)   
                        with tf.name_scope('stage2'):
                            graph_manager.add_summaries(val_s2_inputs, val_s2_unscaled_outputs, mode='test',
                                                        verbose=False, **stage2_configuration) 
                        with tf.name_scope('total_pipeline'):
                            graph_manager.add_summaries(val_inputs, val_s2_outputs, mode='test', verbose=False,
                                                        display_inputs=False, **stage2_configuration)
            viz.display_graph_size('test net (gpu:%d)' % i)

        with tf.name_scope('eval'):
            print('    \x1b[32m%d\x1b[0m eval update ops' % len(update_metrics_op))
            print('    \x1b[32m%d\x1b[0m eval clear ops' % len(clear_metrics_op))
            update_metrics_op = tf.group(*update_metrics_op)
            clear_metrics_op = tf.group(*clear_metrics_op)
            eval_summary_op = graph_manager.get_eval_op(metrics_to_norms)
        
        # Additional info
        print('\n\033[43mEval metrics:\033[0m')
        print('\n'.join(["    \033[35m%s:\033[0m %s tensors" % (x, len(tf.get_collection(x)))  
                        for x in tf.get_default_graph().get_all_collection_keys() 
                        if x.endswith('_eval')]))
        
    ########################################################################## Run    
    try:
        print('\n\033[44mLaunch session:\033[0m')
        graph_manager.generate_log_dir(multistage_configuration)
        summary_writer = SummaryWriterCache.get(multistage_configuration["log_dir"])
        print('    Log directory', os.path.abspath(multistage_configuration["log_dir"]))
        
        with graph_manager.get_monitored_training_session(**multistage_configuration) as sess:    
            loss_widget = widgets.HTML(value="")
            global_step_ = 0
            start_time = time.time()
            
            print('\n\033[44mStart training:\033[0m')
            display(loss_widget)   
            last_eval_step = 1
            while not sess.should_stop(): 
                        
                # Train
                global_step_, full_loss_, _ = sess.run([global_step, full_loss, train_op])
                
                # Evaluate
                if (multistage_configuration["save_evaluation_steps"] is not None and (global_step_ > 1)
                    and global_step_  % multistage_configuration["save_evaluation_steps"] == 0):
                    num_epochs = multistage_configuration["test_num_iters_per_epoch"]
                    sess.run(clear_metrics_op)
                    for epoch in range(num_epochs):
                        viz.display_eval(loss_widget, global_step_, epoch + 1, num_epochs, start_time)
                        sess.run(update_metrics_op) 
                        if epoch == num_epochs - 1: eval_summary = sess.run(eval_summary_op)
                    # Write summary
                    summary_writer.add_summary(eval_summary, global_step_)
                    summary_writer.flush()
                    
                # Display
                if (global_step_ - 1) % 20 == 0:
                    viz.display_loss(loss_widget, global_step_, full_loss_, start_time,
                                     multistage_configuration["train_num_samples_per_iter"],
                                     multistage_configuration["train_num_samples"])
                
    except KeyboardInterrupt:
        print('\nInterrupted at step %d' % global_step_)   

grid size [16 16]
grid size [8 8]
Retrieval top k = 576 (final)

[44mLoad inputs:[0m
    with default `num_threads` = 8
    with default `prefetch_capacity` = 1
    with default `data_augmentation_threshold` = 0.5
    with default `with_classification` = False
    pad [32mtrain[0m inputs with [32m0[0m dummy samples
    [32mimage[0m: shape=[None, 512, 512, 3], dtype=<dtype: 'float32'>
    [32mgroup_bounding_boxes_per_cell[0m: shape=[None, 16, 16, 1, 4], dtype=<dtype: 'float32'>
    [32mnum_boxes[0m: shape=[None], dtype=<dtype: 'int32'>
    [32mbounding_boxes[0m: shape=[None, 100, 4], dtype=<dtype: 'float32'>
    [32mnum_group_boxes[0m: shape=[None], dtype=<dtype: 'int32'>
    [32mim_id[0m: shape=[None], dtype=<dtype: 'int32'>
    [32mobj_i_mask_bbs[0m: shape=[None, 16, 16, 1, 100], dtype=<dtype: 'float32'>
    [32mis_flipped[0m: shape=[None], dtype=<dtype: 'float32'>
    [32mgroup_flags[0m: shape=[None, 16, 16, 1, 1], dtype=<dtype: 'float32'>

[43mTrain Graph:[