Skip to content

Commit

Permalink
DynamoDB: Fix ScannedCount calculation for Limit-ed calls (#7085)
Browse files Browse the repository at this point in the history
  • Loading branch information
bblommers committed Dec 1, 2023
1 parent ad316dd commit bb23df4
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 39 deletions.
99 changes: 60 additions & 39 deletions moto/dynamodb/models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from moto.dynamodb.models.utilities import dynamo_json_dump
from moto.moto_api._internal import mock_random

RESULT_SIZE_LIMIT = 1000000 # DynamoDB has a 1MB size limit


class SecondaryIndex(BaseModel):
def __init__(
Expand Down Expand Up @@ -813,7 +815,8 @@ def scan(
index_name: Optional[str] = None,
projection_expression: Optional[List[List[str]]] = None,
) -> Tuple[List[Item], int, Optional[Dict[str, Any]]]:
results = []
results: List[Item] = []
result_size = 0
scanned_count = 0

if index_name:
Expand All @@ -822,16 +825,32 @@ def scan(
else:
items = self.all_items()

last_evaluated_key = None
processing_previous_page = exclusive_start_key is not None
for item in items:
scanned_count += 1
# Cycle through the previous page of results
# When we encounter our start key, we know we've reached the end of the previous page
if processing_previous_page:
if self._item_equals_dct(item, exclusive_start_key):
processing_previous_page = False
continue

# Check wether we've reached the limit of our result set
# That can be either in number, or in size
reached_length_limit = len(results) == limit
reached_size_limit = (result_size + item.size()) > RESULT_SIZE_LIMIT
if reached_length_limit or reached_size_limit:
last_evaluated_key = self._get_last_evaluated_key(
results[-1], index_name
)
break

passes_all_conditions = True
for (
attribute_name,
(comparison_operator, comparison_objs),
) in filters.items():
for attribute_name in filters:
attribute = item.attrs.get(attribute_name)

if attribute:
(comparison_operator, comparison_objs) = filters[attribute_name]
# Attribute found
if not attribute.compare(comparison_operator, comparison_objs):
passes_all_conditions = False
Expand All @@ -846,17 +865,17 @@ def scan(
break

if passes_all_conditions:
results.append(item)

results = copy.deepcopy(results)
if index_name:
index = self.get_index(index_name)
results = [index.project(r) for r in results]
if index_name:
index = self.get_index(index_name)
results.append(index.project(copy.deepcopy(item)))
else:
results.append(copy.deepcopy(item))
result_size += item.size()

results, last_evaluated_key = self._trim_results(
results, limit, exclusive_start_key, scanned_index=index_name
)
scanned_count += 1

# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.FilterExpression
# the filter expression should be evaluated after the query.
if filter_expression is not None:
results = [item for item in results if filter_expression.expr(item)]

Expand All @@ -865,6 +884,13 @@ def scan(

return results, scanned_count, last_evaluated_key

def _item_equals_dct(self, item: Item, dct: Dict[str, Any]) -> bool:
hash_key = DynamoType(dct.get(self.hash_key_attr)) # type: ignore[arg-type]
range_key = dct.get(self.range_key_attr) if self.range_key_attr else None
if range_key is not None:
range_key = DynamoType(range_key)
return item.hash_key == hash_key and item.range_key == range_key

def _trim_results(
self,
results: List[Item],
Expand All @@ -873,45 +899,40 @@ def _trim_results(
scanned_index: Optional[str] = None,
) -> Tuple[List[Item], Optional[Dict[str, Any]]]:
if exclusive_start_key is not None:
hash_key = DynamoType(exclusive_start_key.get(self.hash_key_attr)) # type: ignore[arg-type]
range_key = (
exclusive_start_key.get(self.range_key_attr)
if self.range_key_attr
else None
)
if range_key is not None:
range_key = DynamoType(range_key)
for i in range(len(results)):
if (
results[i].hash_key == hash_key
and results[i].range_key == range_key
):
if self._item_equals_dct(results[i], exclusive_start_key):
results = results[i + 1 :]
break

last_evaluated_key = None
size_limit = 1000000 # DynamoDB has a 1MB size limit
item_size = sum(res.size() for res in results)
if item_size > size_limit:
if item_size > RESULT_SIZE_LIMIT:
item_size = idx = 0
while item_size + results[idx].size() < size_limit:
while item_size + results[idx].size() < RESULT_SIZE_LIMIT:
item_size += results[idx].size()
idx += 1
limit = min(limit, idx) if limit else idx
if limit and len(results) > limit:
results = results[:limit]
last_evaluated_key = {self.hash_key_attr: results[-1].hash_key}
if self.range_key_attr is not None and results[-1].range_key is not None:
last_evaluated_key[self.range_key_attr] = results[-1].range_key

if scanned_index:
index = self.get_index(scanned_index)
idx_col_list = [i["AttributeName"] for i in index.schema]
for col in idx_col_list:
last_evaluated_key[col] = results[-1].attrs[col]
last_evaluated_key = self._get_last_evaluated_key(
last_result=results[-1], index_name=scanned_index
)

return results, last_evaluated_key

def _get_last_evaluated_key(
self, last_result: Item, index_name: Optional[str]
) -> Dict[str, Any]:
last_evaluated_key = {self.hash_key_attr: last_result.hash_key}
if self.range_key_attr is not None and last_result.range_key is not None:
last_evaluated_key[self.range_key_attr] = last_result.range_key
if index_name:
index = self.get_index(index_name)
idx_col_list = [i["AttributeName"] for i in index.schema]
for col in idx_col_list:
last_evaluated_key[col] = last_result.attrs[col]
return last_evaluated_key

def delete(self, account_id: str, region_name: str) -> None:
from moto.dynamodb.models import dynamodb_backends

Expand Down
18 changes: 18 additions & 0 deletions tests/test_dynamodb/test_dynamodb_table_with_range_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,24 +1170,42 @@ def test_scan_by_index():
assert res["Count"] == 3
assert len(res["Items"]) == 3

res = dynamodb.scan(TableName="test", Limit=1)
assert res["Count"] == 1
assert res["ScannedCount"] == 1

res = dynamodb.scan(TableName="test", ExclusiveStartKey=res["LastEvaluatedKey"])
assert res["Count"] == 2
assert res["ScannedCount"] == 2

res = dynamodb.scan(TableName="test", IndexName="test_gsi")
assert res["Count"] == 2
assert res["ScannedCount"] == 2
assert len(res["Items"]) == 2

res = dynamodb.scan(TableName="test", IndexName="test_gsi", Limit=1)
assert res["Count"] == 1
assert res["ScannedCount"] == 1
assert len(res["Items"]) == 1
last_eval_key = res["LastEvaluatedKey"]
assert last_eval_key["id"]["S"] == "1"
assert last_eval_key["gsi_col"]["S"] == "1"
assert last_eval_key["gsi_range_key"]["S"] == "1"

res = dynamodb.scan(
TableName="test", IndexName="test_gsi", ExclusiveStartKey=last_eval_key
)
assert res["Count"] == 1
assert res["ScannedCount"] == 1

res = dynamodb.scan(TableName="test", IndexName="test_lsi")
assert res["Count"] == 2
assert res["ScannedCount"] == 2
assert len(res["Items"]) == 2

res = dynamodb.scan(TableName="test", IndexName="test_lsi", Limit=1)
assert res["Count"] == 1
assert res["ScannedCount"] == 1
assert len(res["Items"]) == 1
last_eval_key = res["LastEvaluatedKey"]
assert last_eval_key["id"]["S"] == "1"
Expand Down
4 changes: 4 additions & 0 deletions tests/test_dynamodb/test_dynamodb_table_without_range_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,11 +523,15 @@ def test_scan_pagination():
page1 = table.scan(Limit=6)
assert page1["Count"] == 6
assert len(page1["Items"]) == 6
page1_results = set([r["username"] for r in page1["Items"]])
assert page1_results == {"user0", "user3", "user1", "user2", "user5", "user4"}

page2 = table.scan(Limit=6, ExclusiveStartKey=page1["LastEvaluatedKey"])
assert page2["Count"] == 4
assert len(page2["Items"]) == 4
assert "LastEvaluatedKey" not in page2
page2_results = set([r["username"] for r in page2["Items"]])
assert page2_results == {"user6", "user7", "user8", "user9"}

results = page1["Items"] + page2["Items"]
usernames = set([r["username"] for r in results])
Expand Down

0 comments on commit bb23df4

Please sign in to comment.