diff --git a/examples/images/preview_tiles.jpg b/examples/images/preview_tiles.jpg new file mode 100644 index 0000000..72b44c8 Binary files /dev/null and b/examples/images/preview_tiles.jpg differ diff --git a/examples/images/tensorboard_sc.jpg b/examples/images/tensorboard_sc.jpg new file mode 100644 index 0000000..9591130 Binary files /dev/null and b/examples/images/tensorboard_sc.jpg differ diff --git a/examples/images/tf_od_result.jpg b/examples/images/tf_od_result.jpg new file mode 100644 index 0000000..8550301 Binary files /dev/null and b/examples/images/tf_od_result.jpg differ diff --git a/examples/images/tf_sc.jpg b/examples/images/tf_sc.jpg new file mode 100644 index 0000000..c6a4585 Binary files /dev/null and b/examples/images/tf_sc.jpg differ diff --git a/examples/images/tf_tiles.jpg b/examples/images/tf_tiles.jpg new file mode 100644 index 0000000..46f2d2e Binary files /dev/null and b/examples/images/tf_tiles.jpg differ diff --git a/examples/utils/building_od.pbtxt b/examples/utils/building_od.pbtxt new file mode 100644 index 0000000..e38d224 --- /dev/null +++ b/examples/utils/building_od.pbtxt @@ -0,0 +1,4 @@ +item { + id: 1 + name: 'building' +} diff --git a/examples/utils/ssd_inception_v2_coco.config b/examples/utils/ssd_inception_v2_coco.config new file mode 100644 index 0000000..a2d0d5f --- /dev/null +++ b/examples/utils/ssd_inception_v2_coco.config @@ -0,0 +1,191 @@ +# SSD with Inception v2 configuration for MSCOCO Dataset. +# Users should configure the fine_tune_checkpoint field in the train config as +# well as the label_map_path and input_path fields in the train_input_reader and +# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that +# should be configured. + +model { + ssd { + num_classes: 90 + box_coder { + faster_rcnn_box_coder { + y_scale: 10.0 + x_scale: 10.0 + height_scale: 5.0 + width_scale: 5.0 + } + } + matcher { + argmax_matcher { + matched_threshold: 0.5 + unmatched_threshold: 0.5 + ignore_thresholds: false + negatives_lower_than_unmatched: true + force_match_for_each_row: true + } + } + similarity_calculator { + iou_similarity { + } + } + anchor_generator { + ssd_anchor_generator { + num_layers: 6 + min_scale: 0.2 + max_scale: 0.95 + aspect_ratios: 1.0 + aspect_ratios: 2.0 + aspect_ratios: 0.5 + aspect_ratios: 3.0 + aspect_ratios: 0.3333 + reduce_boxes_in_lowest_layer: true + } + } + image_resizer { + fixed_shape_resizer { + height: 300 + width: 300 + } + } + box_predictor { + convolutional_box_predictor { + min_depth: 0 + max_depth: 0 + num_layers_before_predictor: 0 + use_dropout: false + dropout_keep_probability: 0.8 + kernel_size: 3 + box_code_size: 4 + apply_sigmoid_to_scores: false + conv_hyperparams { + activation: RELU_6, + regularizer { + l2_regularizer { + weight: 0.00004 + } + } + initializer { + truncated_normal_initializer { + stddev: 0.03 + mean: 0.0 + } + } + } + } + } + feature_extractor { + type: 'ssd_inception_v2' + min_depth: 16 + depth_multiplier: 1.0 + conv_hyperparams { + activation: RELU_6, + regularizer { + l2_regularizer { + weight: 0.00004 + } + } + initializer { + truncated_normal_initializer { + stddev: 0.03 + mean: 0.0 + } + } + batch_norm { + train: true, + scale: true, + center: true, + decay: 0.9997, + epsilon: 0.001, + } + } + } + loss { + classification_loss { + weighted_sigmoid { + anchorwise_output: true + } + } + localization_loss { + weighted_smooth_l1 { + anchorwise_output: true + } + } + hard_example_miner { + num_hard_examples: 3000 + iou_threshold: 0.99 + loss_type: CLASSIFICATION + max_negatives_per_positive: 3 + min_negatives_per_image: 0 + } + classification_weight: 1.0 + localization_weight: 1.0 + } + normalize_loss_by_num_matches: true + post_processing { + batch_non_max_suppression { + score_threshold: 1e-8 + iou_threshold: 0.6 + max_detections_per_class: 100 + max_total_detections: 100 + } + score_converter: SIGMOID + } + } +} + +train_config: { + batch_size: 24 + optimizer { + rms_prop_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.004 + decay_steps: 800720 + decay_factor: 0.95 + } + } + momentum_optimizer_value: 0.9 + decay: 0.9 + epsilon: 1.0 + } + } + fine_tune_checkpoint: "ssd_inception_v2_coco_2017_11_17/model.ckpt" + from_detection_checkpoint: true + # Note: The below line limits the training process to 200K steps, which we + # empirically found to be sufficient enough to train the pets dataset. This + # effectively bypasses the learning rate schedule (the learning rate will + # never decay). Remove the below line to train indefinitely. + num_steps: 200000 + data_augmentation_options { + random_horizontal_flip { + } + } + data_augmentation_options { + ssd_random_crop { + } + } +} + +train_input_reader: { + tf_record_input_reader { + input_path: "data/train_buildings.record" + } + label_map_path: "data/building_od.pbtxt" +} + +eval_config: { + num_examples: 8000 + # Note: The below line limits the evaluation process to 10 evaluations. + # Remove the below line to evaluate indefinitely. + max_evals: 10 +} + +eval_input_reader: { + tf_record_input_reader { + input_path: "data/test_buildings.record" + } + label_map_path: "data/building_od.pbtxt" + shuffle: false + num_readers: 1 + num_epochs: 1 +} diff --git a/examples/utils/tf_od_predict.py b/examples/utils/tf_od_predict.py new file mode 100644 index 0000000..2209245 --- /dev/null +++ b/examples/utils/tf_od_predict.py @@ -0,0 +1,113 @@ +""" +This is adapted from Tensorflow (https://github.com/tensorflow/models/tree/master/research/object_detection); +Save this code under the directory `models/research/object_detection/` + +To use, run: +python tf_od_predict.py --model_name=building_od_ssd \ + --path_to_label=data/building_od.pbtxt \ + --test_image_path=test_images +""" + +import os +from os import makedirs, path as op +import sys +import glob +import six.moves.urllib as urllib +import tensorflow as tf +import tarfile + +from io import StringIO +import zipfile +import numpy as np +from collections import defaultdict +from matplotlib import pyplot as plt +from PIL import ImageDraw, Image + +sys.path.append("..") + +from utils import label_map_util +from utils import visualization_utils as vis_util + +flags = tf.app.flags +flags.DEFINE_string('model_name', '', 'Path to frozen detection graph') +flags.DEFINE_string('path_to_label', '', 'Path to label file') +flags.DEFINE_string('test_image_path', '', 'Path to test imgs and output diractory') +FLAGS = flags.FLAGS + +def load_image_into_numpy_array(image): + (im_width, im_height) = image.size + return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) + +def tf_od_pred(): + with detection_graph.as_default(): + with tf.Session(graph=detection_graph) as sess: + # Definite input and output Tensors for detection_graph + image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') + # Each box represents a part of the image where a particular object was detected. + detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') + # Each score represent how level of confidence for each of the objects. + # Score is shown on the result image, together with the class label. + detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') + detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') + num_detections = detection_graph.get_tensor_by_name('num_detections:0') + for image_path in test_imgs: + image = Image.open(image_path) + image_np = load_image_into_numpy_array(image) + # the array based representation of the image will be used later in order to prepare the + # result image with boxes and labels on it. + # Expand dimensions since the model expects images to have shape: [1, None, None, 3] + image_np_expanded = np.expand_dims(image_np, axis=0) + # Actual detection. + (boxes, scores, classes, num) = sess.run( + [detection_boxes, detection_scores, detection_classes, num_detections], + feed_dict={image_tensor: image_np_expanded}) + # draw_bounding_box_on_image(image, boxes, ) + # Visualization of the results of a detection. + vis_image = vis_util.visualize_boxes_and_labels_on_image_array( + image_np, + np.squeeze(boxes), + np.squeeze(classes).astype(np.int32), + np.squeeze(scores), + category_index, + use_normalized_coordinates=True, + line_thickness=1) + print("{} boxes in {} image tile!".format(len(boxes), image_path)) + image_pil = Image.fromarray(np.uint8(vis_image)).convert('RGB') + with tf.gfile.Open(image_path, 'w') as fid: + image_pil.save(fid, 'PNG') + + + +if __name__ =='__main__': + # load your own trained model inference graph. This inference graph was generated from + # export_inference_graph.py under model directory, see `models/research/object_detection/` + model_name = op.join(os.getcwd(), FLAGS.model_name) + # Path to frozen detection graph. + path_to_ckpt = op.join(model_name, 'frozen_inference_graph.pb') + # Path to the label file + path_to_label = op.join(os.getcwd(), FLAGS.path_to_label) + #only train on buildings + num_classes = 1 + #Directory to test images path + test_image_path = op.join(os.getcwd(), FLAGS.test_image_path) + test_imgs = glob.glob(test_image_path + "/*.jpg") + + ############ + #Load the frozen tensorflow model + ############# + + detection_graph = tf.Graph() + with detection_graph.as_default(): + od_graph_def = tf.GraphDef() + with tf.gfile.GFile(path_to_ckpt, 'rb') as fid: + serialized_graph = fid.read() + od_graph_def.ParseFromString(serialized_graph) + tf.import_graph_def(od_graph_def, name='') + + ############ + #Load the label file + ############# + label_map = label_map_util.load_labelmap(path_to_label) + categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=num_classes, use_display_name=True) + category_index = label_map_util.create_category_index(categories) + tf_od_pred() diff --git a/examples/utils/tf_records_generation.py b/examples/utils/tf_records_generation.py new file mode 100644 index 0000000..c9081aa --- /dev/null +++ b/examples/utils/tf_records_generation.py @@ -0,0 +1,175 @@ +""" +This code was modified on top of Google tensorflow +(https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) + +This code works similar to `label-maker package` when used with Label Maker and Tensor Flow object detection API. +To create a correct training data set for Tensor Flow Object Detection, we recommend you: + +1. After running `label-maker images`, do `git clone https://github.com/tensorflow/models.git` +2. Install TensorFlow object detection by following this: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md +3. From your Label Maker, copy `tiles` folder, this code `tf_records_generation.py` and `labels.py` to Tensorflow object detecrtion directory +4. From directory `tensorflow/models/research/` run: + +python tf_records_generation.py --label_input=labels.npz \ + --train_rd_path=data/train_buildings.record \ + --test_rd_path=data/test_buildings.record +""" + +import os +import io +import numpy as np +from os import makedirs, path as op +import shutil + +import pandas as pd +import tensorflow as tf + +from PIL import Image +from utils import dataset_util +from collections import namedtuple + +flags = tf.app.flags +flags.DEFINE_string('label_input', '', 'Path to the labels.npz input') +flags.DEFINE_string('train_rd_path', '', 'Path to output TFRecord') +flags.DEFINE_string('test_rd_path', '', 'Path to output TFRecord') +FLAGS = flags.FLAGS + +def class_text_to_int(row_label): + if row_label == 'building': + return 1 + return None + +def split(df, group): + data = namedtuple('data', ['filename', 'object']) + gb = df.groupby(group) + return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)] + +def create_tf_example(group, path): + """Creates a tf.Example proto from sample buillding image tile. + + Args: + encoded_building_image_data: The jpg encoded data of the building image. + + Returns: + example: The created tf.Example. + """ + with tf.gfile.GFile(op.join(path, '{}'.format(group.filename)), 'rb') as fid: + encoded_jpg = fid.read() + encoded_jpg_io = io.BytesIO(encoded_jpg) + image = Image.open(encoded_jpg_io) + width, height = image.size + filename = group.filename.encode('utf8') + image_format = b'jpg' + xmins = [] + xmaxs = [] + ymins = [] + ymaxs = [] + classes_text = [] + classes = [] + + for _, row in group.object.iterrows(): + xmins.append(row['xmin'] / width) + xmaxs.append(row['xmax'] / width) + ymins.append(row['ymin'] / height) + ymaxs.append(row['ymax'] / height) + classes_text.append(row['class'].encode('utf8')) + classes.append(class_text_to_int(row['class'])) + + tf_example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': dataset_util.int64_feature(height), + 'image/width': dataset_util.int64_feature(width), + 'image/filename': dataset_util.bytes_feature(filename), + 'image/source_id': dataset_util.bytes_feature(filename), + 'image/encoded': dataset_util.bytes_feature(encoded_jpg), + 'image/format': dataset_util.bytes_feature(image_format), + 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), + 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), + 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), + 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), + 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), + 'image/object/class/label': dataset_util.int64_list_feature(classes), + })) + return tf_example + + +def main(_): + labels = np.load(op.join(os.getcwd(), FLAGS.label_input)) + tile_names = [tile for tile in labels.files] + tile_names.sort() + tiles = np.array(tile_names) + + tf_tiles_info = [] + + for tile in tiles: + bboxes = labels[tile].tolist() + width = 256 + height = 256 + if bboxes: + for bbox in bboxes: + if bbox[4] == 1: + cl_str = "building" + bbox = [max(0, min(255, x)) for x in bbox[0:4]] + y = ["{}.jpg".format(tile), width, height, cl_str, bbox[0], bbox[1], bbox[2], bbox[3]] + tf_tiles_info.append(y) + + split_index = int(len(tf_tiles_info) * 0.8) + column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] + df = pd.DataFrame(tf_tiles_info, columns=column_name) + # shuffle the dataframe + df = df.sample(frac=1) + train_df = df[:split_index] + test_df = df[split_index:] + print("You have {} training tiles and {} test tiles ready".format( + len(set(train_df['filename'])), len(set(test_df['filename'])))) + + tiles_dir = op.join(os.getcwd(), 'tiles') + train_dir = op.join(os.getcwd(), 'images', 'train') + test_dir = op.join(os.getcwd(), 'images', 'test') + + if not op.isdir(train_dir): + makedirs(train_dir) + if not op.isdir(test_dir): + makedirs(test_dir) + + for tile in train_df['filename']: + tile_dir = op.join(tiles_dir, tile) + shutil.copy(tile_dir, train_dir) + + for tile in test_df['filename']: + tile_dir = op.join(tiles_dir, tile) + shutil.copy(tile_dir, test_dir) + ### for train + writer = tf.python_io.TFRecordWriter(FLAGS.train_rd_path) + grouped = split(train_df, 'filename') + + for group in grouped: + tf_example = create_tf_example(group, train_dir) + writer.write(tf_example.SerializeToString()) + writer.close() + output_train = op.join(os.getcwd(), FLAGS.train_rd_path) + print('Successfully created the TFRecords: {}'.format(output_train)) + + ### for test + writer = tf.python_io.TFRecordWriter(FLAGS.test_rd_path) + grouped = split(test_df, 'filename') + for group in grouped: + tf_example = create_tf_example(group, test_dir) + writer.write(tf_example.SerializeToString()) + + writer.close() + output_test = op.join(os.getcwd(), FLAGS.test_rd_path) + print('Successfully created the TFRecords: {}'.format(output_test)) + +def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale): + def score_converter_fn(logits): + cr = logit_scale + cr = tf.constant([[cr]], tf.float32) + print(logit_scale) + print(logits) + scaled_logits = tf.divide(logits, cr, name='scale_logits') #change logit_scale + return tf_score_converter_fn(scaled_logits, name='convert_scores') + score_converter_fn.__name__ = '%s_with_logit_scale' % (tf_score_converter_fn.__name__) + return score_converter_fn + +if __name__ == '__main__': + tf.app.run() diff --git a/examples/walkthrough-tensorflow-object-detection.md b/examples/walkthrough-tensorflow-object-detection.md new file mode 100644 index 0000000..e8921a1 --- /dev/null +++ b/examples/walkthrough-tensorflow-object-detection.md @@ -0,0 +1,159 @@ +# Example Use: A building detector with TensorFlow API + +Google TensorFlow Object Detection API is an open source framework built on top of TensorFlow that makes it easy to construct, train, and deploy object detection models. In this example, we use it to detect buildings in Vietnam. If you follow these steps, you'll be able to train a TensorFlow Object Detection model with the following results: + +

+ +

+ +First install Label Maker (`pip install label-maker`), [tippecanoe](https://github.com/mapbox/tippecanoe) and Pandas (`pip install pandas`). + +## Create the training dataset + +Vietnam has good imagery via the Mapbox Satellite layer, so we are going to use the same configuration file we used for [another walkthrough](walkthrough-classification-mxnet-sagemaker.md), which we used to train a building classifier with MXNet and Amazon SageMaker. + +Create `config.json` as shown in following JSON file. +```json +{ + "country": "vietnam", + "bounding_box": [105.42,20.75,106.41,21.53], + "zoom": 15, + "classes": [ + { "name": "Buildings", "filter": ["has", "building"] } + ], + "imagery": "http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN", + "background_ratio": 1, + "ml_type": "object-detection" +} +``` +If you're curious about these different options, check out [the README](../README.md#configuration) or the other examples. Now that we've configured our project, we'll start with the [CLI commands](../README.md#command-line-use). + +```shell +$ label-maker download +$ label-maker labels +``` +These commands will first download and retile the OpenStreetMap QA tiles. Then it will create a label file in `data/labels.npz` with the bounding box for each building. Finally, you can visualize the bounding boxes in the `data/labels` folder that label maker creates. + +

+ +

+ +You can preview how the building bounding boxes are drawn on top of the RGB image tiles with this command: + +```shell +$ label-maker preview -n 10 +``` +This will download ten images to the folder `data/examples/Buildings/` + +

+ +

+ +You can tell from the above image tiles that some buildings in Vietnam haven't been mapped yet which will impact our model prediction accuracy. If you’d like to help improve the labeling accuracy, [start mapping on OpenStreetMap](https://www.openstreetmap.org/#map=10/20.9755/105.4118). + +To download all the image tiles that contain buildings: +```shell +$ label-maker images +``` +You will have 385 image tiles in your folder `data/tiles`. You don't need to run `label-maker package` for the TensorFlow Object Detection task. We'll use some custom code (included) to write the images and labels to a different format. + +Now, you are ready to set up the TensorFlow Object Detection API. + +## Setup TensorFlow Object Detection API + +### Install TensorFlow object detection: +- Download the necessary scripts with `git clone https://github.com/tensorflow/models.git` +- Install TensorFlow Object Detection API by strictly following [these instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md). Once you've successfully run `python object_detection/builders/model_builder_test.py` you are ready for the next step. +- To access the necessary utility scripts, you'll need to run all the following commands from the `models/research/object_detection` directory from the cloned repo. **From here on we will refer the TensorFlow Object Detection directory `models/research/object_detection/` as the TOD directory.** + +### Create TFRecords for model training +Tensorflow API supports a variety of file formats. The TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use. We have a python code [in this repo](../utils/tf_records_generation.py) which converts the `labels.npz` file to a TFRecords file. + +Follow these steps to create TFRecords. + +- Copy `tf_records_generation.py` from [this repo](../utils/tf_records_generation.py) to the TOD directory, . +- Copy your `labels.npz` file and `tiles` folders from `data` to the TOD directory. +- From the TOD directory run: + +```shell +python3 tf_records_generation.py --label_input=labels.npz \ + --train_rd_path=data/train_buildings.record \ + --test_rd_path=data/test_buildings.record +``` +This will create `train_buildings.record` and `test_buildings.record` files in a folder called `data` in the TOD directory. It will also copy images in your `tiles` folder to an `images` folder and split them up into two separate folders of `train` and `test`. + +### Object detection model setup +Now we're ready to set up the model architecture. For this walkthrough, we'll download a pre-trained model from the [TensorFlow model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). We'll demonstrate using [`ssd_inception_v2_coco`](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) (download link): + - Download the model, unzip, and move the folder to the TOD directory + - Create a new folder `training` in the TOD directory. + - Copy a [model configuration file](utils/ssd_inception_v2_coco.config) to the `training directory`. If you aren't using `ssd_inception_v2_coco`, you'll need to update the configuration file to match your selected model. + - Copy a [class definitions file](/utils/building_od.pbtxt) to the `data` directory. + +Now your current directory should be `models/research/object_detection/` and in addition to the files included in that repo originally, your folder structure should look like this: + +``` +models/research/object_detection/ +├── ssd_inception_v2_coco/ +├── training/ +│ └── ssd_inception_v2_coco.config +├── data/ +│ ├── train_buildings.record +│ ├── test_buildings.record +│ └── building_od.pbtxt +└── images/ + ├── train/ + └── test/ + +``` + +# Train the TensorFlow object detection model +You are now ready to train the model. From the `models/research/object_detection` directory, run: + +```shell +python train.py --logtostderr \ + --train_dir=training/ \ + --pipeline_config_path=training/ssd_inception_v2_coco.config +``` +The model checkpoints and outputs for this task will save in the `training` folder. When the model is running successfully you will see: + +

+ +

+ +We ran this model for about **10,000 steps**, and it took **37 hours** (local CPU). If you want to run a faster model, we recommend trying `ssd_mobilenet_v1_coco` in TensorFlow model zoo, training on a GPU, or using Google's [Cloud Machine Learning Engine](https://cloud.google.com/ml-engine/). + +## Visualize the Model +Create the building detection model inference graph with: + +```shell +python export_inference_graph.py --input_type image_tensor \ + --pipeline_config_path training/ssd_inception_v2_coco.config \ + --trained_checkpoint_prefix training/model.ckpt-9575 + --output_directory building_od_ssd +``` +We can visualize this graph using [`tensorboard`](https://github.com/tensorflow/tensorboard): + +```shell +tensorboard --logdir='training' +``` + +Go to `http://127.0.0.1:6006/` in your web browser and you will see: + +

+ +

+ +## Prediction +Now let's run the model over our test tiles to predict where buildings are. Copy [this script](utils/tf_od_predict.py) to the TOD directory then run: + +```shell +python tf_od_predict.py --model_name=building_od_ssd \ + --path_to_label=data/building_od.pbtxt \ + --test_image_path=images/test +``` + +This code will read through all your test images in `images/test` folder and output the final prediction into the same folder. You will see a final prediction like this: + +

+ +