Skip to content

Commit

Permalink
Fix dummy cache allocation (#574)
Browse files Browse the repository at this point in the history
* Fix dummy cache allocation

* Try mps device selecting

* Rechain reloc
  • Loading branch information
artek0chumak committed Apr 16, 2024
1 parent d6f4f80 commit 30f522d
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion src/petals/server/throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def measure_compute_rps(
block = block.to(dtype)
block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True)

cache = (DUMMY_KEY_PAST.to(dtype), DUMMY_KEY_PAST.to(dtype))
cache = (DUMMY_KEY_PAST.to(dtype=dtype, device=device), DUMMY_KEY_PAST.to(dtype=dtype, device=device))
elapsed = 0
dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype)

Expand Down

0 comments on commit 30f522d

Please sign in to comment.