From baf9913fc15c6882fd4a5676b311abc07301cca6 Mon Sep 17 00:00:00 2001 From: Loser Cheems Date: Tue, 1 Jul 2025 21:38:56 +0800 Subject: [PATCH] Improves code formatting consistency in comments Aligns inline comment spacing to use consistent indentation across kernel traits definitions for better readability --- csrc/src/kernel_traits.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/csrc/src/kernel_traits.h b/csrc/src/kernel_traits.h index 69473ef..19dc319 100644 --- a/csrc/src/kernel_traits.h +++ b/csrc/src/kernel_traits.h @@ -236,7 +236,7 @@ struct Flash_bwd_kernel_traits : public Base { using TiledMmadQ = TiledMMA< typename Base::MMA_Atom_Arch, - Layout, Int, _1>>, // 2x4x1 or 4x2x1 thread group + Layout, Int, _1>>, // 2x4x1 or 4x2x1 thread group Tile, Int<16 * kNWarps / AtomLayoutMdQ>, _16>>; using SmemLayoutAtomQdO = decltype( @@ -340,36 +340,36 @@ struct Flash_bwd_kernel_traits : public Base { using GmemTiledCopyQKV = decltype( make_tiled_copy(Copy_Atom{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per read + Layout>{})); // Val layout, 8 vals per read using GmemTiledCopydO = decltype( make_tiled_copy(Copy_Atom, elem_type>{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store + Layout>{})); // Val layout, 8 vals per store using GmemTiledCopydKV = decltype( make_tiled_copy(Copy_Atom, elem_type>{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store + Layout>{})); // Val layout, 8 vals per store using GmemTiledCopydQ = decltype( make_tiled_copy(Copy_Atom, elem_type>{}, GmemLayoutAtom{}, - Layout>{})); // Val layout, 8 vals per store + Layout>{})); // Val layout, 8 vals per store using GmemLayoutAtomdQaccum = std::conditional_t< kBlockKSmem == 32, - Layout, // Thread layout, 8 threads per row + Layout, // Thread layout, 8 threads per row Stride< _8, _1>>, - Layout, // Thread layout, 16 threads per row + Layout, // Thread layout, 16 threads per row Stride< _16, _1>> >; using GmemTiledCopydQaccum = decltype( make_tiled_copy(Copy_Atom, ElementAccum>{}, GmemLayoutAtomdQaccum{}, - Layout>{})); // Val layout, 4 vals per store + Layout>{})); // Val layout, 4 vals per store using GmemTiledCopydQaccumAtomicAdd = decltype( make_tiled_copy(Copy_Atom, ElementAccum>{}, - Layout, // Thread layout, 8 threads per row + Layout, // Thread layout, 8 threads per row Stride<_32, _1>>{}, - Layout>{})); // Val layout, 1 val per store + Layout>{})); // Val layout, 1 val per store }; //////////////////////////////////////////////////////////////////////////////////////////////////// \ No newline at end of file