Skip to content

Commit

Permalink
fixes vllm-project#1556 double free
Browse files Browse the repository at this point in the history
  • Loading branch information
br3no committed Mar 12, 2024
1 parent 654865e commit dfd3019
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 2 deletions.
45 changes: 45 additions & 0 deletions tests/core/test_block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,48 @@ def test_reset():
# Resetting block manager frees all allocated blocks.
block_manager.reset()
assert block_manager.get_num_free_gpu_blocks() == original_blocks

def test_sliding_window_multi_seq():
block_size = 1
num_cpu_blocks = 8
num_gpu_blocks = 8
block_manager = BlockSpaceManager(block_size,
num_cpu_blocks,
num_gpu_blocks,
sliding_window=2,
watermark=0)

parent = Sequence(1, "one two three", [0, 1, 2], block_size)
seq_group = SequenceGroup("1", [parent], SamplingParams(),
time.time(), None)
block_manager.allocate(seq_group)

# Fork prompt and copy block tables.
child = parent.fork(2)
block_manager.fork(parent, child)

# assert both parent and child share all blocks
assert block_manager.get_block_table(
parent) == block_manager.get_block_table(child)

token_id = 4
# Append token to child. Block is shared so copy on write occurs.
child.append_token_id(token_id, {token_id: Logprob(0.0)})
block_manager.append_slot(child)

token_id = 5
parent.append_token_id(token_id, {token_id: Logprob(0.0)})
block_manager.append_slot(parent)

block_table_parent = block_manager.get_block_table(parent)
block_table_child = block_manager.get_block_table(child)

assert block_table_parent != block_table_child

# assert both blocks are sharing the second-last block
assert block_table_parent[-2] == block_table_child[-2]

# assert freeing the sequences does not lead to a "double free" error
block_manager.free(parent)
block_manager.free(child)

9 changes: 7 additions & 2 deletions vllm/core/block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
# Thus, it is always safe from OOM.
src_block_table = self.block_tables[parent_seq.seq_id]
self.block_tables[child_seq.seq_id] = src_block_table.copy()
for block in src_block_table:
for block in set(src_block_table):
block.ref_count += 1

def _get_physical_blocks(
Expand Down Expand Up @@ -393,7 +393,12 @@ def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
return block_number_mapping

def _free_block_table(self, block_table: BlockTable) -> None:
for block in set(block_table):
blocks_to_free = (
block_table[-self.block_sliding_window :]
if self.block_sliding_window is not None
else block_table
)
for block in set(blocks_to_free):
if block.device == Device.GPU:
self.gpu_allocator.free(block)
else:
Expand Down

0 comments on commit dfd3019

Please sign in to comment.