diff --git a/csrc/src/flash_bwd_launch_template.h b/csrc/src/flash_bwd_launch_template.h index 1bf12db..b16a957 100644 --- a/csrc/src/flash_bwd_launch_template.h +++ b/csrc/src/flash_bwd_launch_template.h @@ -183,7 +183,6 @@ void run_mha_bwd_hdim96(Flash_bwd_params ¶ms, cudaStream_t stream) { if (status_ != cudaSuccess) { C10_CUDA_CHECK(status_); } - // printf("max_smem_per_block = %d\n", max_smem_per_block); if (max_smem_per_block >= 116 * 1024) { // H100 and A100 // 116KB, 1 CTAs in A100, 1 CTAs in H100. run_flash_bwd, Is_causal>(params, stream);