reference:
- https://github.com/MPieter/YOLOv4-CoreML-Converter/blob/master/convert.py
- https://www.codeproject.com/script/Content/ViewReadingList.aspx?rlid=33
- https://github.com/allanzelener/YAD2K/blob/master/yad2k.py

In [None]:
import argparse
import configparser
import io
import os
from collections import defaultdict

import numpy as np
from keras import backend as K
from keras.layers import (Conv2D, GlobalAveragePooling2D, Input, Lambda,
                          MaxPooling2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.merge import concatenate
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.models import Model
from keras.regularizers import l2
from keras.utils.vis_utils import plot_model as plot


import coremltools as ct

In [None]:
config_path = "yolov2.cfg"
weights_path = "yolov2.weights"
keras_output_path = 'yolov2_keras_mpieter.h5'
# yolov2-coco-9.mlmodel
cml_model_path  = "yolov2_coreml.mlmodel"

In [None]:
print('Loading weights.')
weights_file = open(weights_path, 'rb')
weights_header = np.ndarray(
    shape=(4, ), dtype='int32', buffer=weights_file.read(16))
print('Weights Header: ', weights_header)

In [None]:
def unique_config_sections(config_file):
    """Convert all config sections to have unique names.
    Adds unique suffixes to config sections for compability with configparser.
    """
    section_counters = defaultdict(int)
    output_stream = io.StringIO()
    with open(config_file) as fin:
        for line in fin:
            if line.startswith('['):
                section = line.strip().strip('[]')
                _section = section + '_' + str(section_counters[section])
                section_counters[section] += 1
                line = line.replace(section, _section)
            output_stream.write(line)
    output_stream.seek(0)
    return output_stream

In [None]:
print('Parsing Darknet config.')
unique_config_file = unique_config_sections(config_path)
cfg_parser = configparser.ConfigParser()
cfg_parser.read_file(unique_config_file)

In [None]:
image_height = int(cfg_parser['net_0']['height'])
image_width = int(cfg_parser['net_0']['width'])
print(f'w ={image_width}, h={image_height}')

In [None]:
prev_layer = Input(shape=(image_height, image_width, 3))
all_layers = [prev_layer]

weight_decay = float(cfg_parser['net_0']['decay']) if 'net_0' in cfg_parser.sections() else 5e-4
count = 0

In [None]:
def space_to_depth_x2(x):
    """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
    # Import currently required to make Lambda work.
    # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
    import tensorflow as tf
    
    return tf.nn.space_to_depth(x, block_size=2)


In [None]:
def space_to_depth_x2_output_shape(input_shape):
    """Determine space_to_depth output shape for block_size=2.
    Note: For Lambda with TensorFlow backend, output shape may not be needed.
    """
    return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 *
            input_shape[3]) if input_shape[1] else (input_shape[0], None, None,
                                                    4 * input_shape[3])

In [None]:
for section in cfg_parser.sections():
    print('Parsing section {}'.format(section))
    if section.startswith('convolutional'):
        filters = int(cfg_parser[section]['filters'])
        size = int(cfg_parser[section]['size'])
        stride = int(cfg_parser[section]['stride'])
        pad = int(cfg_parser[section]['pad'])
        activation = cfg_parser[section]['activation']
        batch_normalize = 'batch_normalize' in cfg_parser[section]

        # padding='same' is equivalent to Darknet pad=1
        padding = 'same' if pad == 1 else 'valid'

        # Setting weights.
        # Darknet serializes convolutional weights as:
        # [bias/beta, [gamma, mean, variance], conv_weights]
        prev_layer_shape = K.int_shape(prev_layer)

        # TODO: This assumes channel last dim_ordering.
        weights_shape = (size, size, prev_layer_shape[-1], filters)
        darknet_w_shape = (filters, weights_shape[2], size, size)
        weights_size = np.product(weights_shape)

        print('conv2d', 'bn'
              if batch_normalize else '  ', activation, weights_shape)

        conv_bias = np.ndarray(
            shape=(filters, ),
            dtype='float32',
            buffer=weights_file.read(filters * 4))
        count += filters

        if batch_normalize:
            bn_weights = np.ndarray(
                shape=(3, filters),
                dtype='float32',
                buffer=weights_file.read(filters * 12))
            count += 3 * filters

            # TODO: Keras BatchNormalization mistakenly refers to var
            # as std.
            bn_weight_list = [
                bn_weights[0],  # scale gamma
                conv_bias,  # shift beta
                bn_weights[1],  # running mean
                bn_weights[2]  # running var
            ]

        conv_weights = np.ndarray(
            shape=darknet_w_shape,
            dtype='float32',
            buffer=weights_file.read(weights_size * 4))
        count += weights_size

        # DarkNet conv_weights are serialized Caffe-style:
        # (out_dim, in_dim, height, width)
        # We would like to set these to Tensorflow order:
        # (height, width, in_dim, out_dim)
        # TODO: Add check for Theano dim ordering.
        conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
        conv_weights = [conv_weights] if batch_normalize else [
            conv_weights, conv_bias
        ]

        # Handle activation.
        act_fn = None
        if activation == 'leaky':
            pass  # Add advanced activation later.
        elif activation != 'linear':
            raise ValueError(
                'Unknown activation function `{}` in section {}'.format(
                    activation, section))

        # Create Conv2D layer
        conv_layer = (Conv2D(
            filters, (size, size),
            strides=(stride, stride),
            kernel_regularizer=l2(weight_decay),
            use_bias=not batch_normalize,
            weights=conv_weights,
            activation=act_fn,
            padding=padding))(prev_layer)

        if batch_normalize:
            conv_layer = (BatchNormalization(
                weights=bn_weight_list))(conv_layer)
        prev_layer = conv_layer

        if activation == 'linear':
            all_layers.append(prev_layer)
        elif activation == 'leaky':
            act_layer = LeakyReLU(alpha=0.1)(prev_layer)
            prev_layer = act_layer
            all_layers.append(act_layer)

    elif section.startswith('maxpool'):
        size = int(cfg_parser[section]['size'])
        stride = int(cfg_parser[section]['stride'])
        all_layers.append(
            MaxPooling2D(
                padding='same',
                pool_size=(size, size),
                strides=(stride, stride))(prev_layer))
        prev_layer = all_layers[-1]

    elif section.startswith('avgpool'):
        if cfg_parser.items(section) != []:
            raise ValueError('{} with params unsupported.'.format(section))
        all_layers.append(GlobalAveragePooling2D()(prev_layer))
        prev_layer = all_layers[-1]

    elif section.startswith('route'):
        ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
        layers = [all_layers[i] for i in ids]
        if len(layers) > 1:
            print('Concatenating route layers:', layers)
            concatenate_layer = concatenate(layers)
            all_layers.append(concatenate_layer)
            prev_layer = concatenate_layer
        else:
            skip_layer = layers[0]  # only one layer to route
            all_layers.append(skip_layer)
            prev_layer = skip_layer

    elif section.startswith('reorg'):
        block_size = int(cfg_parser[section]['stride'])
        assert block_size == 2, 'Only reorg with stride 2 supported.'
        all_layers.append(
            Lambda(
                space_to_depth_x2,
                output_shape=space_to_depth_x2_output_shape,
                name='space_to_depth_x2')(prev_layer))
        prev_layer = all_layers[-1]

    elif section.startswith('region'):
        with open('anchors.txt', 'w') as f:
            print(cfg_parser[section]['anchors'], file=f)

    elif (section.startswith('net') or section.startswith('cost') or
          section.startswith('softmax')):
        pass  # Configs not currently handled during model definition.

    else:
        raise ValueError(
            'Unsupported section header type: {}'.format(section))


In [None]:
 # Create and save model.
model = Model(inputs=all_layers[0], outputs=all_layers[-1])
print(model.summary())
model.save('{}'.format(keras_output_path))
print('Saved Keras model to {}'.format(keras_output_path))
# Check to see if all weights have been read.
remaining_weights = len(weights_file.read()) / 4
weights_file.close()
print('Read {} of {} from Darknet weights.'.format(count, count +
                                                   remaining_weights))
if remaining_weights > 0:
    print('Warning: {} unused weights'.format(remaining_weights))

In [None]:
plot(model, to_file='{}.png'.format(keras_output_path), show_shapes=True)
print('Saved model plot to {}.png'.format(keras_output_path))

In [None]:
model.inputs

In [None]:
INPUT_NODE = model.inputs[0].name
INPUT_NODE

In [None]:
model.outputs

In [None]:
# ct.convert(model = keras_output_path, inputs=[ct.ImageType(scale=1 / 255.0)])
cml_model = ct.convert(model, inputs=[ct.ImageType(scale=1 / 255.0)], source= 'tensorflow')
# cml_model = ct.convert(
#     model = keras_output_path,
#     image_input_names = [INPUT_NODE],
#     preprocessing_args={
#         'image_scale': 1/255.0,
#             'is_bgr': False
#     },
#     minimum_ios_deployment_target='13'
# )


# ct.converters.onnx.convert(
#     model=onnx_model_path,
#     image_input_names=[INPUT_NODE],
#     preprocessing_args={
#         'image_scale': 1/255.0,
#             'is_bgr': False
#     },
#     minimum_ios_deployment_target='13', 
# )

In [None]:
print(cml_model)

In [None]:
cml_model.save(cml_model_path)

In [None]:
from PIL import Image

def load_and_scale_image(image_url):
    image = Image.open(urllib.request.urlopen(image_url))
    w,h = image.size
    min_dim = min(w,h)
    x0 = int((w - min_dim)/2)
    y0 = int((h - min_dim)/2)
    box = (x0, y0, x0 + min_dim, y0 + min_dim)
    return image.crop(box=box).resize((416,416))

def load_local_and_scale_image(image_path):
    image = Image.open(image_path)
    w,h = image.size
    min_dim = min(w,h)
    x0 = int((w - min_dim)/2)
    y0 = int((h - min_dim)/2)
    box = (x0, y0, x0 + min_dim, y0 + min_dim)
    return image.crop(box=box).resize((416,416))

In [None]:
GRID_SIZE = 13
CELL_SIZE = int(416 / GRID_SIZE)
BOXES_PER_CELL = 5

ANCHORS = [[0.57273, 0.677385], 
           [1.87446, 2.06253], 
           [3.33843, 5.47434], 
           [7.88282, 3.52778], 
           [9.77052, 9.16828]]

In [None]:
with open('coco.txt', 'r') as f:
    COCO_CLASSES = [c.strip() for c in f.readlines()]
    
COCO_CLASSES[:5]

In [None]:
def sigmoid(x):
    k = np.exp(-x)
    return 1 / (1 + k)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [None]:
def decode_preds(raw_preds: []):
    num_classes = len(COCO_CLASSES)
    decoded_preds = []
    for cy in range(GRID_SIZE):
        for cx in range(GRID_SIZE):
            for b in range(BOXES_PER_CELL):
                print(f'cy ={cy}--- cx ={cx} --- b={b}')
                box_shift = b*(num_classes + 5)
            
                tx = float(raw_preds[0, box_shift    , cy, cx])
                ty = float(raw_preds[0, box_shift + 1, cy, cx])
                tw = float(raw_preds[0, box_shift + 2, cy, cx])
                th = float(raw_preds[0, box_shift + 3, cy, cx])
                ts = float(raw_preds[0, box_shift + 4, cy, cx])

                x = (float(cx) + sigmoid(tx)) * CELL_SIZE
                y = (float(cy) + sigmoid(ty)) * CELL_SIZE
            
                w = np.exp(tw) * ANCHORS[b][0] * CELL_SIZE
                h = np.exp(th) * ANCHORS[b][1] * CELL_SIZE
            
                box_confidence = sigmoid(ts)
                classes_raw = raw_preds[0, box_shift + 5:box_shift + 5 + num_classes, cy, cx]
                classes_confidence = softmax(classes_raw)
            
                box_class_idx = np.argmax(classes_confidence)
                box_class_confidence = classes_confidence[box_class_idx]

                combined_confidence = box_confidence * box_class_confidence
            
                decoded_preds.append([box_class_idx, combined_confidence, x, y, w, h])            
    
    return sorted(decoded_preds, key=lambda p: p[1], reverse=True)

In [None]:
import urllib
image = load_local_and_scale_image('persons_1.jpeg')
image

In [None]:
image.size

In [None]:
type(image)

In [None]:
input_image = np.array(image)
input_image = np.expand_dims(input_image, axis=0)
input_image.shape

In [None]:
preds = cml_model.predict(data={'input_1': input_image})['Identity']