From 54d11f8cf5bf389e7a79151a72c6881b966ad2bb Mon Sep 17 00:00:00 2001 From: Loser Cheems Date: Mon, 7 Jul 2025 10:58:28 +0800 Subject: [PATCH] Reorganizes flash attention files into instantiations directory Moves all flash forward pass implementation files to a dedicated instantiations subdirectory to improve code organization and maintainability. Affects 48 CUDA files covering various head dimensions (32, 64, 96, 128, 192, 256), data types (fp16, bf16), causal variants, and split variants for SM80 architecture. --- .../{ => instantiations}/flash_fwd_hdim128_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim128_bf16_sm80.cu | 0 .../{ => instantiations}/flash_fwd_hdim128_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim128_fp16_sm80.cu | 0 .../{ => instantiations}/flash_fwd_hdim192_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim192_bf16_sm80.cu | 0 .../{ => instantiations}/flash_fwd_hdim192_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim192_fp16_sm80.cu | 0 .../{ => instantiations}/flash_fwd_hdim256_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim256_bf16_sm80.cu | 0 .../{ => instantiations}/flash_fwd_hdim256_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim256_fp16_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_hdim32_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim32_bf16_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_hdim32_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim32_fp16_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_hdim64_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim64_bf16_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_hdim64_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim64_fp16_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_hdim96_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim96_bf16_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_hdim96_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_hdim96_fp16_sm80.cu | 0 .../flash_fwd_split_hdim128_bf16_causal_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_split_hdim128_bf16_sm80.cu | 0 .../flash_fwd_split_hdim128_fp16_causal_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_split_hdim128_fp16_sm80.cu | 0 .../flash_fwd_split_hdim192_bf16_causal_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_split_hdim192_bf16_sm80.cu | 0 .../flash_fwd_split_hdim192_fp16_causal_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_split_hdim192_fp16_sm80.cu | 0 .../flash_fwd_split_hdim256_bf16_causal_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_split_hdim256_bf16_sm80.cu | 0 .../flash_fwd_split_hdim256_fp16_causal_sm80.cu | 0 .../src/{ => instantiations}/flash_fwd_split_hdim256_fp16_sm80.cu | 0 .../flash_fwd_split_hdim32_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_split_hdim32_bf16_sm80.cu | 0 .../flash_fwd_split_hdim32_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_split_hdim32_fp16_sm80.cu | 0 .../flash_fwd_split_hdim64_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_split_hdim64_bf16_sm80.cu | 0 .../flash_fwd_split_hdim64_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_split_hdim64_fp16_sm80.cu | 0 .../flash_fwd_split_hdim96_bf16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_split_hdim96_bf16_sm80.cu | 0 .../flash_fwd_split_hdim96_fp16_causal_sm80.cu | 0 csrc/src/{ => instantiations}/flash_fwd_split_hdim96_fp16_sm80.cu | 0 48 files changed, 0 insertions(+), 0 deletions(-) rename csrc/src/{ => instantiations}/flash_fwd_hdim128_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim128_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim128_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim128_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim192_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim192_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim192_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim192_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim256_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim256_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim256_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim256_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim32_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim32_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim32_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim32_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim64_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim64_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim64_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim64_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim96_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim96_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim96_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_hdim96_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim128_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim128_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim128_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim128_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim192_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim192_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim192_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim192_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim256_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim256_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim256_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim256_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim32_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim32_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim32_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim32_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim64_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim64_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim64_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim64_fp16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim96_bf16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim96_bf16_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim96_fp16_causal_sm80.cu (100%) rename csrc/src/{ => instantiations}/flash_fwd_split_hdim96_fp16_sm80.cu (100%) diff --git a/csrc/src/flash_fwd_hdim128_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim128_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim128_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim128_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_fp16_sm80.cu