diff --git a/csrc/src/flash_fwd_hdim128_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim128_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim128_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim128_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim128_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim128_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim128_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim192_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim192_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim192_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim192_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim256_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim256_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim256_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim256_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim32_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim32_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim32_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim32_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim64_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim64_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim64_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim64_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_hdim96_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_hdim96_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_hdim96_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_hdim96_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim128_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim128_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim128_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim128_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim192_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim192_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim192_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim192_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim256_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim256_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim256_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim256_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim32_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim32_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim32_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim32_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim64_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim64_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim64_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim64_fp16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_bf16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_bf16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_bf16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_bf16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_bf16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_bf16_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_fp16_causal_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_fp16_causal_sm80.cu diff --git a/csrc/src/flash_fwd_split_hdim96_fp16_sm80.cu b/csrc/src/instantiations/flash_fwd_split_hdim96_fp16_sm80.cu similarity index 100% rename from csrc/src/flash_fwd_split_hdim96_fp16_sm80.cu rename to csrc/src/instantiations/flash_fwd_split_hdim96_fp16_sm80.cu