Skip to content

Commit

Permalink
Merge pull request #2508 from activeloopai/fy_opt_dataloader
Browse files Browse the repository at this point in the history
[AL-2365] Optimize dataloader creation
  • Loading branch information
FayazRahman committed Jul 31, 2023
2 parents fe1a109 + bb372bf commit 5570876
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
25 changes: 16 additions & 9 deletions deeplake/core/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ def __getitem__(self, item: IndexValue):
elif isinstance(item, slice):
return IndexEntry(merge_slices(self.value, item))
elif isinstance(item, (tuple, list)):
new_value = tuple(slice_at_int(self.value, idx) for idx in item)
if self.is_trivial():
new_value = tuple(item)
else:
new_value = tuple(slice_at_int(self.value, idx) for idx in item)
return IndexEntry(new_value)
elif isinstance(self.value, (tuple, list)):
if isinstance(item, int) or isinstance(item, slice):
Expand Down Expand Up @@ -252,17 +255,21 @@ def length(self, parent_length: int) -> int:

def validate(self, parent_length: int):
"""Checks that the index is not accessing values outside the range of the parent."""

# Slices are okay, as an out-of-range slice will just yield no samples
# Check each index of a tuple
if isinstance(self.value, tuple):
for idx in self.value:
IndexEntry(idx).validate(parent_length)
if isinstance(self.value, slice):
return

# Check ints that are too large (positive or negative)
if isinstance(self.value, int):
if self.value >= parent_length or self.value < -parent_length:
value_to_check = self.value
if isinstance(value_to_check, int):
value_to_check = (value_to_check,)

# Check each index of a tuple for ints that are too large (positive or negative)
if isinstance(value_to_check, tuple):
value_arr = np.array(value_to_check)
if np.any((value_arr >= parent_length) | (value_arr < -parent_length)):
raise IndexError(
f"Index {self.value} is out of range for tensors with length {parent_length}"
f"Index {value_to_check} is out of range for tensors with length {parent_length}"
)

def downsample(self, factor: int, length: int):
Expand Down
8 changes: 2 additions & 6 deletions deeplake/util/scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,8 @@ def create_fetching_schedule(dataset, primary_tensor_name, shuffle_within_chunks
if isinstance(index_struct, set):
schedule = [int(idx) for idx in schedule if idx in index_struct]
elif isinstance(index_struct, dict):
nested_schedule = [
[int(idx)] * index_struct[idx] for idx in schedule if idx in index_struct
]
schedule = []
for indexes_list in nested_schedule:
schedule.extend(indexes_list)
idxs = filter(lambda idx: idx in index_struct, schedule)
schedule = [int(idx) for idx in idxs for _ in range(index_struct[idx])]
return schedule


Expand Down

0 comments on commit 5570876

Please sign in to comment.