Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelization of commands (CHIP and PREDICT) #671

Merged
merged 41 commits into from Mar 25, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
af61e9f
Allow file existance to check for directories
lossyrob Jan 29, 2019
ff06499
Move TF Deeplab to use chip directory instead of individual chip files.
lossyrob Jan 29, 2019
c71f505
Split update_for_command and report_io.
lossyrob Jan 30, 2019
53e3a6d
Introduce command splitting, apply to predict and chip.
lossyrob Jan 30, 2019
d6f9eef
Don't create command configurations that don't need to run.
lossyrob Feb 7, 2019
f367a4c
Relax restriction in validation of chip command config.
lossyrob Feb 7, 2019
d764a01
Updated Keras chip classification backend to utilize split commands.
lossyrob Feb 17, 2019
d6eeae4
Added option for integration tests to use non-temp rv_root.
lossyrob Feb 17, 2019
39f9a94
Add utility for flipping a scene and it's labels.
lossyrob Feb 17, 2019
d444263
Fix formatting issues.
lossyrob Feb 17, 2019
905928f
Fix integration test file placement for chip classification.
lossyrob Feb 17, 2019
fcbd012
Merge remote-tracking branch 'upstream/develop' into rde/feature/para…
lossyrob Feb 17, 2019
bd3c884
Fix group sizing issue for command splitting.
lossyrob Feb 17, 2019
5fb6aee
Work around the issue of max 20 parent jobs in AWS Batch.
lossyrob Feb 17, 2019
4e5dce2
Style fix.
lossyrob Feb 17, 2019
e75ad58
Merge remote-tracking branch 'upstream/develop' into rde/feature/para…
lossyrob Feb 27, 2019
f72b9be
Allow easier modification to integration test for debugging.
lossyrob Mar 1, 2019
705c2c8
Modify object detection intergration test for 2 scenes.
lossyrob Mar 1, 2019
017fa77
Implement parallelization for object detection.
lossyrob Mar 1, 2019
c883ef3
Fix formatting.
lossyrob Mar 1, 2019
40c6761
Add docs for parallelization.
lossyrob Mar 1, 2019
37713f0
Use splits in mock test run.
lossyrob Mar 2, 2019
bc86b29
Have codecov post new comment to clarify coverage change on update
lossyrob Mar 3, 2019
187f2db
Increase test coverage.
lossyrob Mar 3, 2019
fef5b0b
Modify chip classification to use pretrained model.
lossyrob Mar 3, 2019
f733072
Modify integration test flip scene utility, flip semseg scene2
lossyrob Mar 3, 2019
09163a8
Avoid double building PRs that are branches on main repo.
lossyrob Mar 3, 2019
0a6275a
Format code.
lossyrob Mar 3, 2019
a6d81a4
Merge remote-tracking branch 'upstream/develop' into rde/feature/para…
lossyrob Mar 4, 2019
2efa736
Fix contributing documentation.
lossyrob Mar 7, 2019
94698c9
Document report_io and update_for_command
lossyrob Mar 7, 2019
cf540c4
Add parallelization and update_for_command refactor to changelog.
lossyrob Mar 7, 2019
3d98352
Merge remote-tracking branch 'upstream/develop' into rde/feature/para…
lossyrob Mar 9, 2019
98a6b2c
Merge remote-tracking branch 'upstream/develop' into rde/feature/para…
lossyrob Mar 21, 2019
cd1c7e7
Fix logic with training data in TF Object Detection.
lossyrob Mar 25, 2019
17632cb
Fix typos in docs.
lossyrob Mar 25, 2019
a87eaa9
Reword doc string.
lossyrob Mar 25, 2019
1675750
Use default dict.
lossyrob Mar 25, 2019
f21edcd
Use str_to_file
lossyrob Mar 25, 2019
3175f80
Fix formatting.
lossyrob Mar 25, 2019
2c6e560
Refactor grouped usage.
lossyrob Mar 25, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion rastervision/filesystem/filesystem.py
Expand Up @@ -33,7 +33,14 @@ def matches_uri(uri: str, mode: str) -> bool:

@staticmethod
@abstractmethod
def file_exists(uri: str) -> bool:
def file_exists(uri: str, include_dir: bool = True) -> bool:
"""Check if a file exists.
Args:
uri: The URI to check
include_dir: Include directories in check, if this filesystem
supports directory reads. Otherwise only
return true if a single file exists at the URI.
"""
pass # pragma: no cover

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion rastervision/filesystem/http_filesystem.py
Expand Up @@ -16,7 +16,7 @@ def matches_uri(uri: str, mode: str) -> bool:
return parsed_uri.scheme in ['http', 'https']

@staticmethod
def file_exists(uri: str) -> bool:
def file_exists(uri: str, include_dir: bool = True) -> bool:
try:
response = urllib.request.urlopen(uri)
if response.getcode() == 200:
Expand Down
4 changes: 2 additions & 2 deletions rastervision/filesystem/local_filesystem.py
Expand Up @@ -39,8 +39,8 @@ def matches_uri(uri: str, mode: str) -> bool:
return True

@staticmethod
def file_exists(uri: str) -> bool:
return os.path.isfile(uri)
def file_exists(uri: str, include_dir: bool = True) -> bool:
return (os.path.isfile(uri) or (include_dir and os.path.isdir(uri)))

@staticmethod
def read_str(file_uri: str) -> str:
Expand Down
12 changes: 8 additions & 4 deletions rastervision/filesystem/s3_filesystem.py
Expand Up @@ -78,19 +78,23 @@ def matches_uri(uri: str, mode: str) -> bool:
return parsed_uri.scheme == 's3'

@staticmethod
def file_exists(uri: str) -> bool:
def file_exists(uri: str, include_dir: bool = True) -> bool:
# Lazily load boto
import botocore

s3 = S3FileSystem.get_session().resource('s3')
s3 = S3FileSystem.get_session().client('s3')
parsed_uri = urlparse(uri)
bucket = parsed_uri.netloc
key = parsed_uri.path[1:]

try:
s3.Object(bucket, key).load()
response = s3.list_objects_v2(Bucket=bucket, Prefix=key, MaxKeys=1)
if response['KeyCount'] == 0:
return False
response_key = response['Contents'][0]['Key']
return (response_key == key) or include_dir
except botocore.exceptions.ClientError as e:
return False
return True

@staticmethod
def read_str(uri: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions rastervision/utils/files.py
Expand Up @@ -156,10 +156,10 @@ def download_or_copy(uri, target_dir, fs=None):
return local_path


def file_exists(uri, fs=None):
def file_exists(uri, fs=None, include_dir=True):
if not fs:
fs = FileSystem.get_file_system(uri, 'r')
return fs.file_exists(uri)
return fs.file_exists(uri, include_dir)


def list_paths(uri, ext='', fs=None):
Expand Down
32 changes: 32 additions & 0 deletions tests/utils/test_files.py
Expand Up @@ -337,6 +337,23 @@ def test_list_paths_s3(self):
list_paths(s3_directory)
self.assertEqual(len(list_paths(s3_directory)), 1)

def test_file_exists(self):
path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
directory = os.path.dirname(path)
make_dir(directory, check_empty=False)

with open(path, 'w+') as file:
file.write(self.lorem)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use str_to_file instead.

upload_or_copy(path, s3_path)

self.assertTrue(file_exists(s3_directory, include_dir=True))
self.assertTrue(file_exists(s3_path, include_dir=False))
self.assertFalse(file_exists(s3_directory, include_dir=False))
self.assertFalse(file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))



class TestLocalMisc(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -428,6 +445,21 @@ def test_last_modified(self):

self.assertTrue(isinstance(stamp, datetime.datetime))

def test_file_exists(self):
fs = FileSystem.get_file_system(self.temp_dir.name, 'r')

path1 = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
dir1 = os.path.dirname(path1)
make_dir(dir1, check_empty=False)

with open(path1, 'w+') as file:
file.write(self.lorem)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use str_to_file instead.


self.assertTrue(fs.file_exists(dir1, include_dir=True))
self.assertTrue(fs.file_exists(path1, include_dir=False))
self.assertFalse(fs.file_exists(dir1, include_dir=False))
self.assertFalse(fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))


class TestHttpMisc(unittest.TestCase):
def setUp(self):
Expand Down