In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("/home/bfortuner/workplace/VisionQuest")
from common import *

### Setup

In [None]:
# Untar files into data directory
# tar zxvf bbox_images.tar.gz
DATA_DIR = '/bigguy/data/volleyball/'
IMG_DIR = os.path.join(DATA_DIR, 'images_subset')
TF_RECORD_DIR = os.path.join(DATA_DIR, 'tf_records')
%mkdir {TF_RECORD_DIR}

In [None]:
# Remove 4 bad files
%rm {DATA_DIR}._* 

In [None]:
metadata_fpath = os.path.join(IMG_DIR, 'bbox_labels.csv')
metadata = pd.read_csv(metadata_fpath)
metadata['label_name'] = 'ball'
metadata['label_id'] = 1
fnames = metadata['filename']
fpaths = [os.path.join(IMG_DIR, f) for f in fnames]
metadata['fpath'] = fpaths

### Helpers

In [None]:
def plot_img(arr, fs=(6,6), title=None):
    plt.figure(figsize=fs)
    plt.imshow(arr.astype('uint8'))
    plt.title(title)
    plt.show()
    
def load_img(fpath):
    img = cv2.imread(img_fpath)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

### Explore

In [None]:
img_fpath = fpaths[random.randint(0,len(fpaths)-1)]
img = load_img(img_fpath)
print(img.shape)
plot_img(img, fs=(20,10))

In [None]:
print("boxes", len(metadata))
print("images", metadata['filename'].nunique())
metadata.head()

### Tensorflow API

In [None]:
import six.moves.urllib as urllib
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
import io

sys.path.append("/home/bfortuner/workplace/VisionQuest/utils/clients/tfmodels/research/")
import utils.clients.tfmodels.research.object_detection.utils.visualization_utils as vis_util
from utils.clients.tfmodels.research.object_detection.utils import label_map_util
from utils.clients.tfmodels.research.object_detection.utils import dataset_util

In [None]:
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
PATH_TO_LABELS = 'mscoco_label_map.pbtxt' #'volleyball_label_map.pbtxt'
NUM_CLASSES = 90

In [None]:
# Download model
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())

In [None]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

In [None]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [None]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)

In [None]:
PATH_TO_TEST_IMAGES_DIR = IMG_DIR
TEST_IMAGE_PATHS = fpaths[:2]
# Size, in inches, of the output images.
IMAGE_SIZE = (20, 14)

In [None]:

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            # Actual detection.
            (boxes, scores, classes, num) = sess.run(
              [detection_boxes, detection_scores, detection_classes, num_detections],
              feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                min_score_thresh=0.25,
                use_normalized_coordinates=True,
                line_thickness=1)
            plt.figure(figsize=IMAGE_SIZE)
            plt.imshow(image_np)

In [None]:
metadata.sort_values(by='filename').head()

In [None]:
def make_boxes(meta):
    boxes = {}
    for idx,row in meta.iterrows():
        box = json.loads(row.to_json())
        fname = row['filename']
        if fname in boxes:
            boxes[fname].append(box)
        else:
            boxes[fname] = [box]
    return boxes

def create_tf_example(img_fpath, boxes):
    filename = os.path.basename(img_fpath).encode('utf8')
    with tf.gfile.GFile(img_fpath, 'rb') as fid:
        encoded_img = fid.read()

    encoded_io = io.BytesIO(encoded_img)
    image = Image.open(encoded_io)
    (width, height) = image.size
    image_format = b'png'

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box ((1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in boxes:
        xmins.append(box['x1'] / width)
        xmaxs.append(box['x2'] / width)
        ymins.append(box['y1'] / height)
        ymaxs.append(box['y2'] / height)
        classes_text.append(box['label_name'].encode('utf8'))
        classes.append(box['label_id'])

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_img),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

def create_tf_records(metadata, writer):    
    bounding_boxes = make_boxes(metadata)    
    for fname,boxes in bounding_boxes.items():
        img_fpath = os.path.join(IMG_DIR, fname)
        tf_example = create_tf_example(img_fpath, boxes)
        writer.write(tf_example.SerializeToString())
    writer.close()
    
def create_train_val(metadata, val_size):
    train_df = metadata.iloc[:-val_size]
    val_df = metadata.iloc[-val_size:]
    writer_train = tf.python_io.TFRecordWriter(
        os.path.join(TF_RECORD_DIR, 'train.record'))
    writer_val = tf.python_io.TFRecordWriter(
        os.path.join(TF_RECORD_DIR, 'val.record'))
    create_tf_records(train_df, writer_train)
    create_tf_records(val_df, writer_val)

In [None]:
fname = metadata['filename'][0]
fpath = metadata['fpath'][0]
all_boxes = make_boxes(metadata)
boxes = all_boxes[fname]
print(boxes)
tf_example = create_tf_example(fpath, boxes)

In [None]:
create_train_val(metadata, val_size=300)

### Predict

In [None]:
boxes = np.squeeze(boxes)
classes = np.squeeze(classes).astype(np.int32)
scores = np.squeeze(scores)
boxes[10], scores[10], classes[10],category_index

In [None]:
vis_util.visualize_boxes_and_labels_on_image_array??

In [None]:
# # Actual detection.
# (boxes, scores, classes, num) = sess.run(
#   [detection_boxes, detection_scores, detection_classes, num_detections],
#   feed_dict={image_tensor: image_np_expanded})a
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    boxes,
    classes,
    scores,
    category_index,
    min_score_thresh=0.5,
    use_normalized_coordinates=True,
    line_thickness=1,
)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)

In [None]:
# Train



### Links

* https://towardsdatascience.com/how-to-train-your-own-object-detector-with-tensorflows-object-detector-api-bec72ecfe1d9
* https://github.com/christopher5106/FastAnnotationTool
* http://androidkt.com/train-object-detection/
* https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
* https://towardsdatascience.com/building-a-real-time-object-recognition-app-with-tensorflow-and-opencv-b7a2b4ebdc32