For packs detection we use COCO dataset pretrained model SSD (single shot detection) mobilenet V1.<br>
Training a model from the the beginning could take days even with powerfull system. So, we use <br>
knowledge transfer technique training already working model on new types of objects.<br>
Event though Tensorflow Object Detection can augment data, randomly crop parts of image with my<br>
slow hard drive, 16GB memory, and far from top GPU it is not possible to use images as they were provided<br>
So, what I'm going to do is to randomly crop images myself, shrink them to 300x300 and merge everything to<br>
tensorflow train and eval files.<br>
In data/images folder create detection folder with eval and train subfloders inside. We will put crops there

In [1]:
import cv2
import pandas as pd
import numpy as np
import os
import io
import tensorflow as tf

from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

  from ._conv import register_converters as _register_converters


In [2]:
# current images path 
img_path = 'data/images/ShelfImages/'
# cropped parts destination
cropped_path = 'data/images/detector/'
# Step 1 results path
data_path = 'data/'
# output destination
detector_data_path = 'pack_detector/data/'

In [3]:
# read rects and photos dataframes
photos = pd.read_pickle(f'{data_path}photos.pkl')
products = pd.read_pickle(f'{data_path}products.pkl')

In [4]:
# if TEST_CROP is True, every product on cropped image part will have
# bounding box for checking purpose. For training it should be False
TEST_CROP = False
# how many times we will try to crop each image
N_CROP_TRIALS = 20

In [5]:
# returns random value in [s, f]
def rand_between(s, f):
    if s == f:
        return s
    return np.random.randint(s, f)

In [6]:
train_products, eval_products = [], []
for img_file, is_train in photos[['file', 'is_train']].values:
    img = cv2.imread(f'{img_path}{img_file}')
    img_h, img_w, img_c = img.shape
    for n in range(N_CROP_TRIALS):
        # random crop rectangle
        x0 = rand_between(0, img_w - 1)
        x1 = rand_between(x0, img_w - 1)
        y0 = rand_between(0, img_h - 1)
        y1 = rand_between(y0, img_h - 1)
        # products totally inside crop rectangle
        crop_products = products[(products.file == img_file) & (products.xmin > x0) & 
                                 (products.ymin > y0) & (products.xmax < x1) & 
                                 (products.ymax < y1)]
        # no products inside crop rectangle? cropping trial failed...
        if len(crop_products) == 0:
            continue
        # name the crop
        crop_img_file = f'{img_file[:-4]}{x0}_{y0}_{x1}_{y1}.JPG'
        # crop and reshape to 300x300 or smaller keeping aspect ratio
        crop = img[y0:y1, x0:x1]
        h, w, c = crop.shape
        ratio = min(300/h, 300/w)
        crop = cv2.resize(crop, (0,0), fx=ratio, fy=ratio)[0:300, 0:300]
        h, w, c = crop.shape
        # add crop inner products to train_products or eval_products list
        for xmin, ymin, xmax, ymax in crop_products[['xmin', 'ymin', 'xmax', 'ymax']].values:
            xmin -= x0
            xmax -= x0
            ymin -= y0
            ymax -= y0

            xmin, xmax, ymin, ymax = [int(np.round(e * ratio)) for e in [xmin, xmax, ymin, ymax]]
            product = {'filename': crop_img_file, 'class':'pack', 
                       'width':w, 'height':h,
                       'xmin':xmin, 'ymin':ymin, 'xmax':xmax, 'ymax':ymax}
            if is_train:
                train_products.append(product)
            else:
                eval_products.append(product)
            if TEST_CROP:
                crop = cv2.rectangle(crop, (xmin, ymin), (xmax, ymax), (255,0,0), 5)
        # save crop top eval or train folder
        subpath = ['eval/', 'train/'][is_train]
        cv2.imwrite(f'{cropped_path}{subpath}{crop_img_file}', crop)                

In [7]:
train_df = pd.DataFrame(train_products).set_index('filename')
eval_df = pd.DataFrame(eval_products).set_index('filename')

In [8]:
def class_text_to_int(row_label):
    if row_label == 'pack':
        return 1
    else:
        None


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

In [9]:
def convert_to_tf_records(images_path, examples, dst_file):
    writer = tf.python_io.TFRecordWriter(dst_file)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, images_path)
        writer.write(tf_example.SerializeToString())
    writer.close()

In [10]:
convert_to_tf_records(f'{cropped_path}train/', train_df, f'{detector_data_path}train.record')
convert_to_tf_records(f'{cropped_path}eval/', eval_df, f'{detector_data_path}eval.record')

In [1]:
#python3 train.py --logtostderr --train_dir=pack_training/ --pipeline_config_path=pack_training/ssd_mobilenet_v2_coco.config
#~/work/models/research/object_detection/pack_detector/models/ssd_mobilenet_v1
#python3 train.py --logtostderr --train_dir=pack_detector/models/ssd_mobilenet_v1/train/ --pipeline_config_path=pack_detector/models/ssd_mobilenet_v1/ssd_mobilenet_v1_pack.config
#CUDA_VISIBLE_DEVICES="" python3 eval.py --logtostderr --checkpoint_dir=pack_detector/models/ssd_mobilenet_v1/train --pipeline_config_path=pack_detector/models/ssd_mobilenet_v1/ssd_mobilenet_v1_pack.config --eval_dir=pack_detector/models/ssd_mobilenet_v1/eval