Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions ggml/src/ggml-vulkan/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ if (Vulkan_FOUND)
../../include/ggml-vulkan.h
)

# Compile a test shader to determine whether GL_NV_cooperative_matrix2 is supported.
# If it's not, there will be an error to stderr.
# If it's supported, set a define to indicate that we should compile those shaders
execute_process(COMMAND ${Vulkan_GLSLC_EXECUTABLE} -o - -fshader-stage=compute --target-env=vulkan1.3 "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp"
OUTPUT_VARIABLE glslc_output
ERROR_VARIABLE glslc_error)

if (${glslc_error} MATCHES ".*extension not supported: GL_NV_cooperative_matrix2.*")
message(STATUS "GL_NV_cooperative_matrix2 not supported by glslc")
else()
message(STATUS "GL_NV_cooperative_matrix2 supported by glslc")
add_compile_definitions(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
endif()

target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

Expand Down
19 changes: 13 additions & 6 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1513,7 +1513,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
compiles.push_back(std::async(ggml_vk_create_pipeline_func, std::ref(device), std::ref(pipeline), name, spv_size, spv_data, entrypoint, parameter_count, push_constant_size, wg_denoms, specialization_constants, align, disable_robustness));
};

#if defined(VK_NV_cooperative_matrix2)
#if defined(VK_NV_cooperative_matrix2) && defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
if (device->coopmat2) {

auto const &fa_wg_denoms = [&](uint32_t D, uint32_t clamp, ggml_type type, bool small_rows) -> std::array<uint32_t, 3> {
Expand Down Expand Up @@ -1611,7 +1611,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
#undef CREATE_MM
#undef CREATE_MM2
} else
#endif // defined(VK_NV_cooperative_matrix2)
#endif // defined(VK_NV_cooperative_matrix2) && defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
if (device->coopmat_support) {
// Create 6 variants, {s,m,l}x{unaligned,aligned}
#define CREATE_MM(PIPELINE_NAME, NAMELC, F16ACC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID) \
Expand Down Expand Up @@ -2153,7 +2153,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
device->coopmat_support = device->coopmat_support && coopmat_features.cooperativeMatrix;

if (coopmat2_support) {
#if defined(VK_NV_cooperative_matrix2)
#if defined(VK_NV_cooperative_matrix2) && defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
if (coopmat2_features.cooperativeMatrixWorkgroupScope &&
coopmat2_features.cooperativeMatrixFlexibleDimensions &&
coopmat2_features.cooperativeMatrixReductions &&
Expand Down Expand Up @@ -2414,14 +2414,19 @@ static void ggml_vk_print_gpu_info(size_t idx) {
bool fp16_storage = false;
bool fp16_compute = false;
bool coopmat_support = false;
bool coopmat2_support = false;

for (auto properties : ext_props) {
if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) {
fp16_storage = true;
} else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) {
fp16_compute = true;
} else if (strcmp("VK_KHR_cooperative_matrix", properties.extensionName) == 0) {
} else if (strcmp("VK_KHR_cooperative_matrix", properties.extensionName) == 0 &&
!getenv("GGML_VK_DISABLE_COOPMAT")) {
coopmat_support = true;
} else if (strcmp("VK_NV_cooperative_matrix2", properties.extensionName) == 0 &&
!getenv("GGML_VK_DISABLE_COOPMAT2")) {
coopmat2_support = true;
}
}

Expand Down Expand Up @@ -2472,9 +2477,11 @@ static void ggml_vk_print_gpu_info(size_t idx) {

coopmat_support = coopmat_support && coopmat_features.cooperativeMatrix;

std::string matrix_cores = coopmat2_support ? "NV_coopmat2" : coopmat_support ? "KHR_coopmat" : "none";

std::string device_name = props2.properties.deviceName.data();
GGML_LOG_DEBUG("ggml_vulkan: %zu = %s (%s) | uma: %d | fp16: %d | warp size: %zu | matrix cores: %d\n",
idx, device_name.c_str(), driver_props.driverName.data(), uma, fp16, subgroup_size, coopmat_support);
GGML_LOG_DEBUG("ggml_vulkan: %zu = %s (%s) | uma: %d | fp16: %d | warp size: %zu | matrix cores: %s\n",
idx, device_name.c_str(), driver_props.driverName.data(), uma, fp16, subgroup_size, matrix_cores.c_str());

if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
GGML_LOG_DEBUG("ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want.\n");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#version 460

#extension GL_NV_cooperative_matrix2 : require

void main()
{
}
4 changes: 2 additions & 2 deletions ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,14 +342,14 @@ void process_shaders() {
matmul_shaders(true, matmul_id, true, false, false);
matmul_shaders(true, matmul_id, true, false, true);

#if defined(VK_NV_cooperative_matrix2)
#if defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
// Coopmat2, fp32acc and fp16acc
matmul_shaders(true, matmul_id, false, true, false);
matmul_shaders(true, matmul_id, false, true, true);
#endif
}

#if defined(VK_NV_cooperative_matrix2)
#if defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
// flash attention
for (const auto& f16acc : {false, true}) {
std::string acctype = f16acc ? "float16_t" : "float";
Expand Down
Loading