In [19]:
# show images inline
%matplotlib inline

# automatically reload modules when they have changed
%load_ext autoreload
%autoreload 2

import os,sys,inspect
import keras
import keras.preprocessing.image
import tensorflow as tf

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from keras_retinanet import layers  # noqa: F401
from keras_retinanet import losses
from keras_retinanet import models
from keras_retinanet.callbacks import RedirectModel
from keras_retinanet.callbacks.eval import Evaluate
from keras_retinanet.models.retinanet import retinanet_bbox
from keras_retinanet.preprocessing.csv_generator import CSVGenerator
from keras_retinanet.preprocessing.kitti import KittiGenerator
from keras_retinanet.preprocessing.open_images import OpenImagesGenerator
from keras_retinanet.preprocessing.pascal_voc import PascalVocGenerator
from keras_retinanet.utils.anchors import make_shapes_callback
from keras_retinanet.utils.config import read_config_file, parse_anchor_parameters
from keras_retinanet.utils.keras_version import check_keras_version
from keras_retinanet.utils.model import freeze as freeze_model
from keras_retinanet.utils.transform import random_transform_generator
from keras_retinanet.utils.image import random_visual_effect_generator

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
def makedirs(path):
    # Intended behavior: try to create the directory,
    # pass if the directory exists already, fails otherwise.
    # Meant for Python 2.7/3.n compatibility.
    try:
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise

In [21]:
def get_session():
    """ Construct a modified tf session.
    """
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    return tf.Session(config=config)

In [22]:
def model_with_weights(model, weights, skip_mismatch):
    """ Load weights for model.
    Args
        model         : The model to load weights for.
        weights       : The weights to load.
        skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model.
    """
    if weights is not None:
        model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)
    return model

In [23]:
def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0,
                  freeze_backbone=False, lr=1e-5, config=None):
    """ Creates three models (model, training_model, prediction_model).
    Args
        backbone_retinanet : A function to call to create a retinanet model with a given backbone.
        num_classes        : The number of classes to train.
        weights            : The weights to load into the model.
        multi_gpu          : The number of GPUs to use for training.
        freeze_backbone    : If True, disables learning for the backbone.
        config             : Config parameters, None indicates the default configuration.
    Returns
        model            : The base model. This is also the model that is saved in snapshots.
        training_model   : The training model. If multi_gpu=0, this is identical to model.
        prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
    """

    modifier = freeze_model if freeze_backbone else None

    # load anchor parameters, or pass None (so that defaults will be used)
    anchor_params = None
    num_anchors   = None
    if config and 'anchor_parameters' in config:
        anchor_params = parse_anchor_parameters(config)
        num_anchors   = anchor_params.num_anchors()

    # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
    # optionally wrap in a parallel model
    if multi_gpu > 1:
        from keras.utils import multi_gpu_model
        with tf.device('/cpu:0'):
            model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True)
        training_model = multi_gpu_model(model, gpus=multi_gpu)
    else:
        model          = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True)
        training_model = model

    # make prediction model
    prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params)

    # compile model
    training_model.compile(
        loss={
            'regression'    : losses.smooth_l1(),
            'classification': losses.focal()
        },
        optimizer=keras.optimizers.adam(lr=lr, clipnorm=0.001)
    )

    return model, training_model, prediction_model

In [24]:
def create_callbacks(model, training_model, prediction_model, validation_generator, config):
    """ Creates the callbacks to use during training.
    Args
        model: The base model.
        training_model: The model that is used for training.
        prediction_model: The model that should be used for validation.
        validation_generator: The generator for creating validation data.
        config: RetinanetConfig object.
    Returns:
        A list of callbacks used for training.
    """
    callbacks = []

    tensorboard_callback = None

    if config.tensorboard_dir:
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir                = config.tensorboard_dir,
            histogram_freq         = 0,
            batch_size             = config.batch_size,
            write_graph            = True,
            write_grads            = False,
            write_images           = False,
            embeddings_freq        = 0,
            embeddings_layer_names = None,
            embeddings_metadata    = None
        )
        callbacks.append(tensorboard_callback)

    if config.evaluation and validation_generator:
        if config.dataset_type == 'coco':
            from ..callbacks.coco import CocoEval

            # use prediction model for evaluation
            evaluation = CocoEval(validation_generator, tensorboard=tensorboard_callback)
        else:
            evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, weighted_average=config.weighted_average)
        evaluation = RedirectModel(evaluation, prediction_model)
        callbacks.append(evaluation)

    # save the model
    if config.snapshots:
        # ensure directory created first; otherwise h5py will error after epoch.
        makedirs(config.snapshot_path)
        checkpoint = keras.callbacks.ModelCheckpoint(
            os.path.join(
                config.snapshot_path,
                '{backbone}_{dataset_type}_{{epoch:02d}}.h5'.format(backbone=config.backbone, dataset_type=config.dataset_type)
            ),
            verbose=1,
            # save_best_only=True,
            # monitor="mAP",
            # mode='max'
        )
        checkpoint = RedirectModel(checkpoint, model)
        callbacks.append(checkpoint)

    callbacks.append(keras.callbacks.ReduceLROnPlateau(
        monitor    = 'loss',
        factor     = 0.1,
        patience   = 2,
        verbose    = 1,
        mode       = 'auto',
        min_delta  = 0.0001,
        cooldown   = 0,
        min_lr     = 0
    ))

    return callbacks

In [25]:
def create_generators(config, preprocess_image):
    """ Create generators for training and validation.
    Args
        config           : RetinanetConfig object containing configuration for generators.
        preprocess_image : Function that preprocesses an image for the network.
    """
    common_args = {
        'batch_size'       : config.batch_size,
        'config'           : config.config,
        'image_min_side'   : config.image_min_side,
        'image_max_side'   : config.image_max_side,
        'preprocess_image' : preprocess_image,
    }

    # create random transform generator for augmenting training data
    if config.random_transform:
        transform_generator = random_transform_generator(
            min_rotation=-0.1,
            max_rotation=0.1,
            min_translation=(-0.1, -0.1),
            max_translation=(0.1, 0.1),
            min_shear=-0.1,
            max_shear=0.1,
            min_scaling=(0.9, 0.9),
            max_scaling=(1.1, 1.1),
            flip_x_chance=0.5,
            flip_y_chance=0.5,
        )
        visual_effect_generator = random_visual_effect_generator(
            contrast_range=(0.9, 1.1),
            brightness_range=(-.1, .1),
            hue_range=(-0.05, 0.05),
            saturation_range=(0.95, 1.05)
        )
    elif config.defect_transform:
        transform_generator = random_transform_generator(
            min_rotation=-0.1,
            max_rotation=0.1,
            min_translation=(-0.1, -0.1),
            max_translation=(0.1, 0.1),
            min_shear=-0.1,
            max_shear=0.1,
            flip_x_chance=0.5,
            flip_y_chance=0.5,
        )
        visual_effect_generator = random_visual_effect_generator(
            contrast_range=(0.9, 1.1),
            brightness_range=(-.1, .1),
            hue_range=(-0.05, 0.05),
            saturation_range=(0.95, 1.05)
        )
    else:
        transform_generator = random_transform_generator(flip_x_chance=0.5)
        visual_effect_generator = None

    if config.dataset_type == 'coco':
        # import here to prevent unnecessary dependency on cocoapi
        from ..preprocessing.coco import CocoGenerator

        train_generator = CocoGenerator(
            config.coco_path,
            'train2017',
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = CocoGenerator(
            config.coco_path,
            'val2017',
            shuffle_groups=False,
            **common_args
        )
    elif config.dataset_type == 'pascal':
        train_generator = PascalVocGenerator(
            config.pascal_path,
            'trainval',
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = PascalVocGenerator(
            config.pascal_path,
            'test',
            shuffle_groups=False,
            **common_args
        )
    elif config.dataset_type == 'csv':
        train_generator = CSVGenerator(
            config.csv_annotations,
            config.csv_classes,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        if config.csv_val_annotations:
            validation_generator = CSVGenerator(
                config.csv_val_annotations,
                config.csv_classes,
                shuffle_groups=False,
                **common_args
            )
        else:
            validation_generator = None
    elif config.dataset_type == 'oid':
        train_generator = OpenImagesGenerator(
            config.oid_main_dir,
            subset='train',
            version=config.oid_version,
            labels_filter=config.oid_labels_filter,
            annotation_cache_dir=config.oid_annotation_cache_dir,
            parent_label=config.oid_parent_label,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = OpenImagesGenerator(
            config.oid_main_dir,
            subset='validation',
            version=config.oid_version,
            labels_filter=config.oid_labels_filter,
            annotation_cache_dir=config.oid_annotation_cache_dir,
            parent_label=config.oid_parent_label,
            shuffle_groups=False,
            **common_args
        )
    elif config.dataset_type == 'kitti':
        train_generator = KittiGenerator(
            config.kitti_path,
            subset='train',
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = KittiGenerator(
            config.kitti_path,
            subset='val',
            shuffle_groups=False,
            **common_args
        )
    else:
        raise ValueError('Invalid data type received: {}'.format(config.dataset_type))

    return train_generator, validation_generator

In [26]:
def check_config(config):
    """ Function to check for inherent contradictions within parsed arguments.
    For example, batch_size < num_gpus
    Intended to raise errors prior to backend initialisation.
    Args
        config: RetinanetConfig object
    Returns
        config
    """

    if config.multi_gpu > 1 and config.batch_size < config.multi_gpu:
        raise ValueError(
            "Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(config.batch_size,
                                                                                             config.multi_gpu))

    if config.multi_gpu > 1 and config.snapshot:
        raise ValueError(
            "Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(config.multi_gpu,
                                                                                                config.snapshot))

    if config.multi_gpu > 1 and not config.multi_gpu_force:
        raise ValueError("Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue.")

    if 'resnet' not in config.backbone:
        warnings.warn('Using experimental backbone {}. Only resnet50 has been properly tested.'.format(config.backbone))

    return config

In [27]:
def parse_args(args):
    """ Parse the arguments.
    """
    parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
    subparsers.required = True

    coco_parser = subparsers.add_parser('coco')
    coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')

    pascal_parser = subparsers.add_parser('pascal')
    pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')

    kitti_parser = subparsers.add_parser('kitti')
    kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).')

    def csv_list(string):
        return string.split(',')

    oid_parser = subparsers.add_parser('oid')
    oid_parser.add_argument('main_dir', help='Path to dataset directory.')
    oid_parser.add_argument('--version',  help='The current dataset version is v4.', default='v4')
    oid_parser.add_argument('--labels-filter',  help='A list of labels to filter.', type=csv_list, default=None)
    oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.')
    oid_parser.add_argument('--parent-label', help='Use the hierarchy children of this label.', default=None)

    csv_parser = subparsers.add_parser('csv')
    csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for training.')
    csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
    csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('--snapshot',          help='Resume training from a snapshot.')
    group.add_argument('--imagenet-weights',  help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True)
    group.add_argument('--weights',           help='Initialize the model with weights from a file.')
    group.add_argument('--no-weights',        help='Don\'t initialize the model with any weights.', dest='imagenet_weights', action='store_const', const=False)

    parser.add_argument('--backbone',         help='Backbone model used by retinanet.', default='resnet50', type=str)
    parser.add_argument('--batch-size',       help='Size of the batches.', default=1, type=int)
    parser.add_argument('--gpu',              help='Id of the GPU to use (as reported by nvidia-smi).')
    parser.add_argument('--multi-gpu',        help='Number of GPUs to use for parallel processing.', type=int, default=0)
    parser.add_argument('--multi-gpu-force',  help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true')
    parser.add_argument('--epochs',           help='Number of epochs to train.', type=int, default=50)
    parser.add_argument('--steps',            help='Number of steps per epoch.', type=int, default=10000)
    parser.add_argument('--lr',               help='Learning rate.', type=float, default=1e-5)
    parser.add_argument('--snapshot-path',    help='Path to store snapshots of models during training (defaults to \'./snapshots\')', default='./snapshots')
    parser.add_argument('--tensorboard-dir',  help='Log directory for Tensorboard output', default='./logs')
    parser.add_argument('--no-snapshots',     help='Disable saving snapshots.', dest='snapshots', action='store_false')
    parser.add_argument('--no-evaluation',    help='Disable per epoch evaluation.', dest='evaluation', action='store_false')
    parser.add_argument('--freeze-backbone',  help='Freeze training of backbone layers.', action='store_true')
    parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true')
    parser.add_argument('--defect-transform', help='Apply defect detection transforms to image and annotations.', action='store_true')
    parser.add_argument('--image-min-side',   help='Rescale the image so the smallest side is min_side.', type=int, default=800)
    parser.add_argument('--image-max-side',   help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
    parser.add_argument('--config',           help='Path to a configuration parameters .ini file.')
    parser.add_argument('--weighted-average', help='Compute the mAP using the weighted average of precisions among classes.', action='store_true')
    parser.add_argument('--compute-val-loss', help='Compute validation loss during training', dest='compute_val_loss', action='store_true')

    # Fit generator arguments
    parser.add_argument('--multiprocessing',  help='Use multiprocessing in fit_generator.', action='store_true')
    parser.add_argument('--workers',          help='Number of generator workers.', type=int, default=1)
    parser.add_argument('--max-queue-size',   help='Queue length for multiprocessing workers in fit_generator.', type=int, default=10)

    return check_args(parser.parse_args(args))

In [28]:
def train(config=None):
    check_config(config)
    
    # create object that stores backbone information
    backbone = models.backbone(config.backbone)

    # make sure keras is the minimum required version
    check_keras_version()

    # optionally choose specific GPU
    if config.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = config.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    # optionally load config parameters
    if config.config:
        config.config = read_config_file(config.config)

    # create the generators
    train_generator, validation_generator = create_generators(config, backbone.preprocess_image)

    # create the model
    if config.snapshot is not None:
        print('Loading model, this may take a second...')
        model            = models.load_model(config.snapshot, backbone_name=config.backbone)
        training_model   = model
        anchor_params    = None
        if config.config and 'anchor_parameters' in config.config:
            anchor_params = parse_anchor_parameters(config.config)
        prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params)
    else:
        weights = config.weights
        # default to imagenet if nothing else is specified
        if weights is None and config.imagenet_weights:
            weights = backbone.download_imagenet()

        print('Creating model, this may take a second...')
        inputs = keras.layers.Input(shape=(358, 1333, 3))
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.retinanet(inputs=inputs,
            num_classes=train_generator.num_classes(),
            weights=weights,
            multi_gpu=config.multi_gpu,
            freeze_backbone=config.freeze_backbone,
            lr=config.lr,
            config=config.config
        )

    # print model summary
    print(model.summary())

    # this lets the generator compute backbone layer shapes using the actual backbone model
    if 'vgg' in config.backbone or 'densenet' in config.backbone:
        train_generator.compute_shapes = make_shapes_callback(model)
        if validation_generator:
            validation_generator.compute_shapes = train_generator.compute_shapes

    # create the callbacks
    callbacks = create_callbacks(
        model,
        training_model,
        prediction_model,
        validation_generator,
        config,
    )

    if not config.compute_val_loss:
        validation_generator = None

    # start training
    return training_model.fit_generator(
        generator=train_generator,
        steps_per_epoch=config.steps,
        epochs=config.epochs,
        verbose=1,
        callbacks=callbacks,
        workers=config.workers,
        use_multiprocessing=config.multiprocessing,
        max_queue_size=config.max_queue_size,
        validation_data=validation_generator
    )

In [33]:
class RetinanetConfig(object):
    def __init__(self):
        # Select dataset type
        self.dataset_type = 'csv'
        
        # If training on COCO dataset
        # Path to dataset directory (ie. /tmp/COCO).
        self.coco_path = None

        # If training on Pascal dataset
        # Path to dataset directory (ie. /tmp/VOCdevkit).
        self.pascal_path = None

        # If training on Kitti dataset
        # Path to dataset directory (ie. /tmp/kitti).
        self.kitti_path = None

        # If training on custom dataset
        # Path to CSV file containing annotations for training.
        self.csv_annotations = None
        # Path to a CSV file containing class label mapping.
        self.csv_classes = None
        # Path to CSV file containing annotations for validation (optional).
        self.csv_val_annotations = None

        # If OID dataset
        # Path to dataset directory.
        self.oid_main_dir = None
        # The current dataset version is v4.
        self.oid_version = 'v4'
        # A list of labels to filter.
        self.oid_labels_filter = None
        if self.oid_labels_filter:
            self.oid_labels_filter = csv_list(self.oid_labels_filter)
        # Path to store annotation cache.
        self.oid_annotation_cache_dir = '.'
        # Use the hierarchy children of this label.
        self.oid_parent_label = None

        # Training parameters
        # Resume training from a snapshot.
        self.snapshot = None
        # Initialize the model with pretrained imagenet weights. This is the default behavior.
        self.imagenet_weights = True
        # Initialize the model with weights from a file.
        self.weights = None
        # Don't initialize the model with any weights.
        self.no_weights = False
        # Backbone model used by retinanet.
        self.backbone = 'resnet50'
        # Size of the batches.
        self.batch_size = 1
        # ID of the GPU to use (as reported by nvidia-smi).
        self.gpu = None
        # Number of GPUs to use for parallel processing.
        self.multi_gpu = 0
        # Extra flag needed to enable (experimental) multi-gpu support.
        self.multi_gpu_force = False
        # Number of epochs to train.
        self.epochs = 50
        # Number of steps per epoch.
        self.steps = 10000
        # Learning rate.
        self.lr = 1e-5
        # Path to store snapshots of models during training (defaults to \'./snapshots\').
        self.snapshot_path = './snapshots'
        # Log directory for Tensorboard output.
        self.tensorboard_dir = './logs'
        # Disable saving snapshots.
        self.no_snapshots = False
        # Disable per epoch evaluation.
        self.no_evaluation = False
        # Freeze training of backbone layers.
        self.freeze_backbone = True
        # Randomly transform image and annotations.
        self.random_transform = False
        # Apply defect detection transforms to image and annotations.
        self.defect_transform = True
        # Rescale the image so the smallest side is min_side.
        self.image_min_side = 800
        # Rescale the image if the largest side is larger than max_side.
        self.image_max_side = 1333
        # Path to a configuration parameters .ini file.
        self.config = None
        # Compute the mAP using the weighted average of precisions among classes.
        self.weighted_average = True
        # Compute validation loss during training.
        self.compute_val_loss = True
        # Use multiprocessing in fit_generator.
        self.multiprocessing = True
        # Number of generator workers.
        self.workers = 1
        # Queue length for multiprocessing workers in fit_generator.
        self.max_queue_size = 10
        
    
def csv_list(string):
    return string.split(',')

In [34]:
config = RetinanetConfig()
train(config)

TypeError: expected str, bytes or os.PathLike object, not NoneType