Skip to content

Commit

Permalink
Subsets in dataset iterators
Browse files Browse the repository at this point in the history
Summary: For the new API, filtering iterators over sequences by subsets is quite helpful. The change is backwards compatible.

Reviewed By: bottler

Differential Revision: D42739669

fbshipit-source-id: d150a404aeaf42fd04a81304c63a4cba203f897d
  • Loading branch information
shapovalov authored and facebook-github-bot committed Jan 26, 2023
1 parent 54eb76d commit 11959e0
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 7 deletions.
18 changes: 12 additions & 6 deletions pytorch3d/implicitron/dataset/dataset_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def __len__(self) -> int:
raise NotImplementedError()

def get_frame_numbers_and_timestamps(
self, idxs: Sequence[int]
self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None
) -> List[Tuple[int, float]]:
"""
If the sequences in the dataset are videos rather than
Expand All @@ -251,7 +251,9 @@ def get_frame_numbers_and_timestamps(
frames.
Args:
idx: frame index in self
idxs: frame index in self
subset_filter: If given, an index in idxs is ignored if the
corresponding frame is not in any of the named subsets.
Returns:
tuple of
Expand Down Expand Up @@ -291,7 +293,7 @@ def category_to_sequence_names(self) -> Dict[str, List[str]]:
return dict(c2seq)

def sequence_frames_in_order(
self, seq_name: str
self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
) -> Iterator[Tuple[float, int, int]]:
"""Returns an iterator over the frame indices in a given sequence.
We attempt to first sort by timestamp (if they are available),
Expand All @@ -308,7 +310,9 @@ def sequence_frames_in_order(
"""
# pyre-ignore[16]
seq_frame_indices = self._seq_to_idx[seq_name]
nos_timestamps = self.get_frame_numbers_and_timestamps(seq_frame_indices)
nos_timestamps = self.get_frame_numbers_and_timestamps(
seq_frame_indices, subset_filter
)

yield from sorted(
[
Expand All @@ -317,11 +321,13 @@ def sequence_frames_in_order(
]
)

def sequence_indices_in_order(self, seq_name: str) -> Iterator[int]:
def sequence_indices_in_order(
self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
) -> Iterator[int]:
"""Same as `sequence_frames_in_order` but returns the iterator over
only dataset indices.
"""
for _, _, idx in self.sequence_frames_in_order(seq_name):
for _, _, idx in self.sequence_frames_in_order(seq_name, subset_filter):
yield idx

# frame_data_type is the actual type of frames returned by the dataset.
Expand Down
8 changes: 7 additions & 1 deletion pytorch3d/implicitron/dataset/json_index_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,10 +888,16 @@ def _local_path(self, path: str) -> str:
return self.path_manager.get_local_path(path)

def get_frame_numbers_and_timestamps(
self, idxs: Sequence[int]
self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None
) -> List[Tuple[int, float]]:
out: List[Tuple[int, float]] = []
for idx in idxs:
if (
subset_filter is not None
and self.frame_annots[idx]["subset"] not in subset_filter
):
continue

# pyre-ignore[16]
frame_annotation = self.frame_annots[idx]["frame_annotation"]
out.append(
Expand Down
38 changes: 38 additions & 0 deletions tests/implicitron/test_data_json_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,41 @@ def test_loaders(self):
self.assertEqual(len(data_sets.train), 81)
self.assertEqual(len(data_sets.val), 102)
self.assertEqual(len(data_sets.test), 102)

def test_visitor_subsets(self):
args = get_default_args(ImplicitronDataSource)
args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
dataset_args.category = "skateboard"
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
dataset_args.test_restrict_sequence_id = 0
dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 1

data_source = ImplicitronDataSource(**args)
datasets, _ = data_source.get_datasets_and_dataloaders()
dataset = datasets.test

sequences = list(dataset.sequence_names())
self.assertEqual(len(sequences), 1)
i = 0
for seq in sequences:
last_ts = float("-Inf")
seq_frames = list(dataset.sequence_frames_in_order(seq))
self.assertEqual(len(seq_frames), 102)
for ts, _, idx in seq_frames:
self.assertEqual(i, idx)
i += 1
self.assertGreaterEqual(ts, last_ts)
last_ts = ts

last_ts = float("-Inf")
known_frames = list(dataset.sequence_frames_in_order(seq, "test_known"))
self.assertEqual(len(known_frames), 81)
for ts, _, _ in known_frames:
self.assertGreaterEqual(ts, last_ts)
last_ts = ts

known_indices = list(dataset.sequence_indices_in_order(seq, "test_known"))
self.assertEqual(len(known_indices), 81)

break # testing only the first sequence

0 comments on commit 11959e0

Please sign in to comment.