Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gemma/gemma_args.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ struct RuntimeConfig {

// These defaults are overridden by InferenceArgs::CopyTo(*this):
// Max tokens per batch during prefill.
size_t prefill_tbatch_size = 256;
size_t prefill_tbatch_size = kMaxBatchSize;
// Max queries per batch (one token from each) during decode.
size_t decode_qbatch_size = 16;

Expand Down Expand Up @@ -225,7 +225,7 @@ struct InferenceArgs : public ArgsBase<InferenceArgs> {
visitor(max_generated_tokens, "max_generated_tokens", size_t{4096},
"Maximum number of tokens to generate.");

visitor(prefill_tbatch_size, "prefill_tbatch", size_t{256},
visitor(prefill_tbatch_size, "prefill_tbatch", size_t{kMaxBatchSize},
"Prefill: max tokens per batch.");
visitor(decode_qbatch_size, "decode_qbatch", size_t{16},
"Decode: max queries per batch.");
Expand Down
6 changes: 3 additions & 3 deletions ops/matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ HWY_INLINE_VAR constexpr size_t kNR = 4;
HWY_INLINE_VAR constexpr size_t kMaxMR = 4;

// For `MMTilesC`.
HWY_INLINE_VAR constexpr size_t kMaxMC = 512;
HWY_INLINE_VAR constexpr size_t kMaxNC = 16384;
HWY_INLINE_VAR constexpr size_t kMaxMC = 256;
HWY_INLINE_VAR constexpr size_t kMaxNC = 6 * 1024;

// Upper bound for per-worker B storage on the stack. Chosen such that one row
// of BF16 A and B fit in 32 KiB L1, but there may be `kMaxMR` and `kNR`.
HWY_INLINE_VAR constexpr size_t kMaxKC = 8 * 1024;
HWY_INLINE_VAR constexpr size_t kMaxKC = 6 * 1024;

// Policy classes for parallelism, implementing some of `Parallelism`.

Expand Down
Loading