Skip to content

Commit

Permalink
Remote
Browse files Browse the repository at this point in the history
  • Loading branch information
James McClain committed Aug 1, 2018
1 parent 1d5e222 commit 79ab472
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 62 deletions.
Expand Up @@ -13,8 +13,7 @@
from rastervision.core.crs_transformer import CRSTransformer
from rastervision.core.box import Box
from rastervision.core.class_map import ClassMap, ClassItem
from rastervision.utils.files import (NotReadableError,
NotWritableError)
from rastervision.utils.files import (NotReadableError, NotWritableError)


class DoubleCRSTransformer(CRSTransformer):
Expand Down
143 changes: 94 additions & 49 deletions src/rastervision/ml_backends/tf_deeplab.py
@@ -1,6 +1,9 @@
import io
import os
import glob
import numpy as np
import shutil
import tarfile
import tempfile
import tensorflow as tf
import uuid
Expand All @@ -10,6 +13,7 @@
from subprocess import Popen
from tensorflow.core.example.example_pb2 import Example
from typing import (List, Tuple)
from urllib.parse import urlparse

from object_detection.utils import dataset_util
from rastervision.core.box import Box
Expand All @@ -18,9 +22,11 @@
from rastervision.core.scene import Scene
from rastervision.core.training_data import TrainingData
from rastervision.ml_backends.tf_object_detection_api import (
terminate_at_exit, TRAIN, VALIDATION)
from rastervision.utils.files import make_dir
write_tf_record, terminate_at_exit, TRAIN, VALIDATION)
from rastervision.utils.misc import save_img
from rastervision.utils.files import (get_local_path, upload_if_needed,
make_dir, download_if_needed,
sync_dir)


def numpy_to_png(array: np.ndarray) -> str:
Expand Down Expand Up @@ -57,23 +63,6 @@ def png_to_numpy(png: str, dtype=np.uint8) -> np.ndarray:
return np.array(im)


def write_tf_record(tf_examples: List[Example], output_path: str) -> None:
"""Write an array of TFRecords to the given output path.
Args:
tf_examples: An array of TFRecords; a
list(tensorflow.core.example.example_pb2.Example)
output_path: The path where the records should be stored.
Returns:
None
"""
with tf.python_io.TFRecordWriter(output_path) as writer:
for tf_example in tf_examples:
writer.write(tf_example.SerializeToString())


def make_tf_examples(training_data: TrainingData,
class_map: ClassMap) -> List[Example]:
"""Take training data and a class map and return a list of TFRecords.
Expand Down Expand Up @@ -227,16 +216,20 @@ def fn(n):
return tf.train.Example(features=features)


def get_record_uri(uri: str, split: str) -> str:
return join(uri, '{}-0.record'.format(split))


class TFDeeplab(MLBackend):
"""MLBackend-derived type that implements the TensorFlow DeepLab
backend.
"""

def __init__(self):
"""Constructor"""
# persist scene training packages for when output_uri is remote
self.scene_training_packages = []
"""Constructor."""
self.temp_dir_obj = tempfile.TemporaryDirectory()
self.temp_dir = self.temp_dir_obj.name

def process_scene_data(self, scene: Scene, data: TrainingData,
class_map: ClassMap, options) -> str:
Expand All @@ -253,15 +246,17 @@ def process_scene_data(self, scene: Scene, data: TrainingData,
the config file.
Returns:
The path to the generated file.
The local path to the generated file.
"""
base_uri = options.output_uri
make_dir(base_uri)

tf_examples = make_tf_examples(data, class_map)

base_uri = options.output_uri
split = '{}-{}'.format(scene.id, uuid.uuid4())
record_path = join(base_uri, '{}.record'.format(split))
record_path = get_local_path(record_path, self.temp_dir)

make_dir(record_path, use_dirname=True)
write_tf_record(tf_examples, record_path)

return record_path
Expand All @@ -273,7 +268,6 @@ def process_sceneset_results(self, training_results: List[str],
(one for training data and one for validation data).
Args:
training_results: A list of paths to TFRecords containing
training data.
valiation_results: A list of paths to TFRecords
Expand All @@ -287,38 +281,84 @@ def process_sceneset_results(self, training_results: List[str],
None
"""
base_uri = options.output_uri

training_record_path = join(base_uri, '{}-0.record'.format(TRAIN))
validation_record_path = join(base_uri,
'{}-0.record'.format(VALIDATION))
merge_tf_records(training_record_path, training_results)
merge_tf_records(validation_record_path, validation_results)
training_record_path = get_record_uri(base_uri, TRAIN)
training_record_path_local = get_local_path(training_record_path,
self.temp_dir)
validation_record_path = get_record_uri(base_uri, VALIDATION)
validation_record_path_local = get_local_path(validation_record_path,
self.temp_dir)

make_dir(training_record_path_local, use_dirname=True)
make_dir(validation_record_path_local, use_dirname=True) # sic
merge_tf_records(training_record_path_local, training_results)
merge_tf_records(validation_record_path_local, validation_results)
upload_if_needed(training_record_path_local, training_record_path)
upload_if_needed(validation_record_path_local, validation_record_path)

if options.debug:
training_zip_path = join(base_uri, '{}'.format(TRAIN))
training_zip_path_local = get_local_path(training_zip_path,
self.temp_dir)
validation_zip_path = join(base_uri, '{}'.format(VALIDATION))
validation_zip_path_local = get_local_path(validation_zip_path,
self.temp_dir)

with tempfile.TemporaryDirectory() as debug_dir:
make_debug_images(training_record_path, debug_dir)
shutil.make_archive(training_zip_path, 'zip', debug_dir)
make_debug_images(training_record_path_local, debug_dir)
shutil.make_archive(training_zip_path_local, 'zip', debug_dir)
with tempfile.TemporaryDirectory() as debug_dir:
make_debug_images(validation_record_path, debug_dir)
shutil.make_archive(validation_zip_path, 'zip', debug_dir)
make_debug_images(validation_record_path_local, debug_dir)
shutil.make_archive(validation_zip_path_local, 'zip',
debug_dir)
upload_if_needed('{}.zip'.format(training_zip_path_local),
'{}.zip'.format(training_zip_path))
upload_if_needed('{}.zip'.format(validation_zip_path_local),
'{}.zip'.format(validation_zip_path))

def train(self, class_map: ClassMap, options) -> None:
"""Train a DeepLab model using the options provided in the
`segmentation_options` section of the workflow config file.
def train(self, class_map, options):
import pdb
pdb.set_trace()
Args:
class_map: A mapping between integral and textual classes.
options: Options provided in the `segmentation_options`
section of the workflow configuration file.
Returns:
None
"""
soptions = options.segmentation_options

train_logdir = options.output_uri
dataset_dir = options.training_data_uri
train_py = soptions.train_py
tf_initial_checkpoints = soptions.tf_initial_checkpoint

# Setup local input and output directories
train_logdir = options.output_uri
# XXX inspite of the prohibition, it might make sense to log
# directly to s3 in the remote case.
train_logdir_local = get_local_path(train_logdir, self.temp_dir)
dataset_dir = options.training_data_uri
dataset_dir_local = get_local_path(dataset_dir, self.temp_dir)
make_dir(train_logdir_local)
make_dir(dataset_dir_local) # sic
download_if_needed(get_record_uri(dataset_dir, TRAIN), self.temp_dir)

# Download and untar initial checkpoint.
tf_initial_checkpoints_uri = soptions.tf_initial_checkpoints_uri
make_dir(self.temp_dir)
download_if_needed(tf_initial_checkpoints_uri, self.temp_dir)
tfic_tarball = get_local_path(tf_initial_checkpoints_uri,
self.temp_dir)
tfic_dir = os.path.dirname(tfic_tarball)
with tarfile.open(tfic_tarball, 'r:gz') as tar:
tar.extractall(tfic_dir)
tfic_index = glob.glob('{}/*/*.index'.format(tfic_dir))[0]

# Build array of argments that will be used to run the DeepLab
# training script.
args = ['python', train_py]
args.append('--train_logdir={}'.format(train_logdir))
args.append(
'--tf_initial_checkpoint={}'.format(tf_initial_checkpoints))
args.append('--dataset_dir={}'.format(dataset_dir))
args.append('--train_logdir={}'.format(train_logdir_local))
args.append('--tf_initial_checkpoint={}'.format(tfic_index))
args.append('--dataset_dir={}'.format(dataset_dir_local))
args.append('--training_number_of_steps={}'.format(
soptions.training_number_of_steps))
if len(soptions.train_split) > 0:
Expand All @@ -336,12 +376,17 @@ def train(self, class_map, options):
if len(soptions.dataset):
args.append('--dataset="{}"'.format(soptions.dataset))

# Train
train_process = Popen(args)
terminate_at_exit(train_process)
train_process.wait()

# XXX tensorboard
if urlparse(train_logdir).scheme == 's3':
sync_dir(train_logdir_local, train_logdir, delete=True)

train_process.wait()
# XXX tensorboard

def predict(self, chip, options):
import pdb
pdb.set_trace()
return 1
16 changes: 15 additions & 1 deletion src/rastervision/ml_backends/tf_object_detection_api.py
Expand Up @@ -16,6 +16,9 @@
import numpy as np
from google.protobuf import text_format

from tensorflow.core.example.example_pb2 import Example
from typing import List

from object_detection.utils import dataset_util
from object_detection.protos.string_int_label_map_pb2 import (
StringIntLabelMap, StringIntLabelMapItem)
Expand Down Expand Up @@ -84,7 +87,18 @@ def create_tf_example(image, window, labels, class_map, chip_id=''):
return tf_example


def write_tf_record(tf_examples, output_path):
def write_tf_record(tf_examples: List[Example], output_path: str) -> None:
"""Write an array of TFRecords to the given output path.
Args:
tf_examples: An array of TFRecords; a
list(tensorflow.core.example.example_pb2.Example)
output_path: The path where the records should be stored.
Returns:
None
"""
with tf.python_io.TFRecordWriter(output_path) as writer:
for tf_example in tf_examples:
writer.write(tf_example.SerializeToString())
Expand Down
2 changes: 0 additions & 2 deletions src/rastervision/ml_tasks/semantic_segmentation.py
Expand Up @@ -39,8 +39,6 @@ def get_train_windows(self, scene: Scene, options) -> List[Box]:
return windows

def get_train_labels(self, window, scene, options):
import pdb
pdb.set_trace()
label_store = scene.ground_truth_label_store
chip = label_store.src._get_chip(window)
fn = label_store.fn
Expand Down
2 changes: 1 addition & 1 deletion src/rastervision/protos/train.proto
Expand Up @@ -19,7 +19,7 @@ message TrainConfig {

message SegmentationOptions {
optional string train_py = 1 [default="/opt/tf-models/deeplab/train.py"];
required string tf_initial_checkpoint = 2;
required string tf_initial_checkpoints_uri = 2;
optional int32 training_number_of_steps = 3 [default=1];
optional string train_split = 4;
optional string model_variant = 5;
Expand Down
@@ -0,0 +1,94 @@
{
"train_scenes": [
{
"id": "2-10",
"raster_source": {
"geotiff_files": {
"uris": [
"/opt/data/top_potsdam_2_10_RGBIR.tif"
]
}
},
"ground_truth_label_store": {
"segmentation_raster_file": {
"src": {
"geotiff_files": {
"uris": [
"/opt/data/top_potsdam_2_10_label_georeferenced.tif"
]
}
},
"src_classes": [ 6, 1 ],
"dst_classes": [ 1, 0 ]
}
}
}
],
"test_scenes": [
{
"id": "2-11",
"raster_source": {
"geotiff_files": {
"uris": [
"/opt/data/top_potsdam_2_11_RGBIR.tif"
]
}
},
"ground_truth_label_store": {
"segmentation_raster_file": {
"src": {
"geotiff_files": {
"uris": [
"/opt/data/top_potsdam_2_11_label_georeferenced.tif"
]
}
},
"src_classes": [ 6 ],
"dst_classes": [ 1 ]
}
}
}
],
"machine_learning": {
"task": "SEMANTIC_SEGMENTATION",
"backend": "TF_DEEPLAB",
"class_items": [
{
"id": 1,
"name": "car"
}
]
},
"make_training_chips_options": {
"segmentation_options": {
}
},
"train_options": {
"pretrained_model_uri": "/dev/null",
"backend_config_uri": "/dev/null",
"sync_interval": 600,
"segmentation_options": {
"tf_initial_checkpoints_uri": "{rv_root}/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz"
}
},
"predict_options": {
"segmentation_options": {
}
},
"eval_options": {
},
"debug": true,
"chip_size": 300,
"raster_transformer": {
"channel_order": [0, 1, 2]
},
"remote_uri_map": {
"rv_root": "s3://raster-vision-mcclain",
"raw": "s3://raster-vision-mcclain/raw-data"
},
"raw_dataset_key": "cowc-potsdam",
"dataset_key": "test",
"model_key": "deeplab",
"prediction_key": "test-set",
"eval_key": "default"
}
Expand Up @@ -68,7 +68,7 @@
"backend_config_uri": "/dev/null",
"sync_interval": 600,
"segmentation_options": {
"tf_initial_checkpoint": "/opt/data/deeplabv3_mnv2_pascal_train_aug/model.ckpt-30000.index"
"tf_initial_checkpoints_uri": "/opt/data/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz"
}
},
"predict_options": {
Expand Down

0 comments on commit 79ab472

Please sign in to comment.