Skip to content

Commit

Permalink
Add VkFFT support and profiler
Browse files Browse the repository at this point in the history
  • Loading branch information
srcejon committed Aug 13, 2023
1 parent b7de8b8 commit 5e71da4
Show file tree
Hide file tree
Showing 37 changed files with 2,417 additions and 35 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Expand Up @@ -30,6 +30,8 @@ option(BUNDLE "Enable distribution bundle" OFF)
set(ARCH_OPT "native" CACHE STRING "Specify instruction set to use. Will be passed directly as `-march` or `/arch:` argument on supported compilers. \
'native' option will figure out host machine compatibilities and set flags accordingly (even with MSVC).")
option(ENABLE_QT6 "Build with Qt6 rather than Qt5" OFF)
option(ENABLE_PROFILER "Enable runtime profiler" OFF)
set(VKFFT_BACKEND 1 CACHE STRING "vkFFT Backend: 0 - Vulkan, 1 - CUDA")

# Sampling devices enablers
option(ENABLE_AIRSPY "Enable AirSpy support" ON)
Expand Down Expand Up @@ -574,6 +576,10 @@ else()
message(STATUS "Compiling for 16 bit Rx DSP chain")
endif()

if (ENABLE_PROFILER)
add_compile_definitions(ENABLE_PROFILER)
endif()

# Set compiler options based on target architecture and selected extensions
include(CompilerOptions)

Expand Down
14 changes: 14 additions & 0 deletions external/CMakeLists.txt
Expand Up @@ -832,6 +832,20 @@ if(ENABLE_FEATURE_SATELLITETRACKER OR ENABLE_CHANNELRX_DEMODAPT)
endif ()
endif ()

# VkFFT (header only library)
ExternalProject_Add(vkfft
GIT_REPOSITORY https://github.com/DTolm/VkFFT.git
GIT_TAG v1.3.1
PREFIX "${EXTERNAL_BUILD_LIBRARIES}/vkfft"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
BUILD_BYPRODUCTS ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
ExternalProject_Get_Property(vkfft source_dir)
set(VKFFT_INCLUDE_DIR "${source_dir}" CACHE INTERNAL "")

# requirements needed by many packages on windows
if (WIN32)
ExternalProject_Add(pthreads4w
Expand Down
134 changes: 126 additions & 8 deletions sdrbase/CMakeLists.txt
Expand Up @@ -19,18 +19,54 @@ if(FFTW3F_FOUND)
add_definitions(-DUSE_FFTW)
include_directories(${FFTW3F_INCLUDE_DIRS})
set(sdrbase_FFTW3F_LIB ${FFTW3F_LIBRARIES})
else(FFTW3F_FOUND)
endif(FFTW3F_FOUND)

# Kiss FFT is always available
set(sdrbase_SOURCES
${sdrbase_SOURCES}
dsp/kissengine.cpp
dsp/kissfft.h
)
set(sdrbase_HEADERS
${sdrbase_HEADERS}
dsp/kissengine.h
)
add_definitions(-DUSE_KISSFFT)

# Vulkan SDK: https://vulkan.lunarg.com/
# Windows Vulkan SDK is missing glslang_c_interface.h
# See bug: https://vulkan.lunarg.com/issue/view/63d158a85df11200d569b2ab
# Copy it from Linux SDK
find_package(Vulkan)
if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0))
set(sdrbase_SOURCES
${sdrbase_SOURCES}
dsp/kissengine.cpp
dsp/kissfft.h
dsp/vulkanvkfftengine.cpp
dsp/vulkanvkfftengine.h
)
set(sdrbase_HEADERS
${sdrbase_HEADERS}
dsp/kissengine.h
endif()

# CUDA Toolkit: https://developer.nvidia.com/cuda-downloads
find_package(CUDA 9.0)
if(CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1))
enable_language(CUDA)
set(sdrbase_SOURCES
${sdrbase_SOURCES}
dsp/cudavkfftengine.cpp
dsp/cudavkfftengine.h
)
add_definitions(-DUSE_KISSFFT)
endif(FFTW3F_FOUND)
endif()

if(Vulkan_FOUND OR CUDA_FOUND)
set(sdrbase_SOURCES
${sdrbase_SOURCES}
dsp/vkfftengine.cpp
dsp/vkfftengine.h
dsp/vkfftutils.cpp
dsp/vkfftutils.h
)
include_directories(${VKFFT_INCLUDE_DIR})
endif()

if (LIBSIGMF_FOUND)
set(sdrbase_SOURCES
Expand Down Expand Up @@ -207,6 +243,7 @@ set(sdrbase_SOURCES
util/planespotters.cpp
util/png.cpp
util/prettyprint.cpp
util/profiler.cpp
util/radiosonde.cpp
util/rtpsink.cpp
util/syncmessenger.cpp
Expand Down Expand Up @@ -442,6 +479,7 @@ set(sdrbase_HEADERS
util/planespotters.h
util/png.h
util/prettyprint.h
util/profiler.h
util/radiosonde.h
util/rtpsink.h
util/syncmessenger.h
Expand Down Expand Up @@ -507,6 +545,86 @@ if(DEFINED LIBSIGMF_DEPENDS)
add_dependencies(sdrbase "${LIBSIGMF_DEPENDS}")
endif()

if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0))
target_compile_definitions(sdrbase PUBLIC -DVK_API_VERSION=11)
target_include_directories(sdrbase PUBLIC ${Vulkan_INCLUDE_DIR} ${Vulkan_INCLUDE_DIR}/glslang/Include)
add_compile_definitions(sdrbase VKFFT_BACKEND=0)

find_library(VULKAN_SPIRV_LIB SPIRV HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPVREMAPPER_LIB SPVRemapper HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPIRV_TOOLS_LIB SPIRV-Tools HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPIRV_TOOLS_OPT_LIB SPIRV-Tools-Opt HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPIRV_TOOLS_SHARED_LIB SPIRV-Tools-Shared HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_HLSL_LIB HLSL HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_OGLCOMPILER_LIB OGLCompiler HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_OSDEPENDENT_LIB OSDependent HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_GLSLANG_LIB glslang HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_GLSLANG_RES_LIB glslang-default-resource-limits HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_MACHINEINDEPENDENT_LIB MachineIndependent HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_GENERICCODEGEN_LIB GenericCodeGen HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)

find_library(VULKAN_SPIRVD_LIB SPIRVd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPVREMAPPERD_LIB SPVRemapperd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPIRV_TOOLSD_LIB SPIRV-Toolsd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPIRV_TOOLS_OPTD_LIB SPIRV-Tools-Optd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_SPIRV_TOOLS_SHAREDD_LIB SPIRV-Tools-Sharedd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_HLSLD_LIB HLSLd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_OGLCOMPILERD_LIB OGLCompilerd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_OSDEPENDENTD_LIB OSDependentd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_GLSLANGD_LIB glslangd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_GLSLANG_RESD_LIB glslang-default-resource-limitsd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_MACHINEINDEPENDENTD_LIB MachineIndependentd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)
find_library(VULKAN_GENERICCODEGEND_LIB GenericCodeGend HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED)

target_link_libraries(sdrbase
optimized ${VULKAN_SPIRV_LIB}
optimized ${VULKAN_SPVREMAPPER_LIB}
optimized ${VULKAN_SPIRV_TOOLS_LIB}
optimized ${VULKAN_SPIRV_TOOLS_OPT_LIB}
optimized ${VULKAN_SPIRV_TOOLS_SHARED_LIB}
optimized ${VULKAN_HLSL_LIB}
optimized ${VULKAN_OGLCOMPILER_LIB}
optimized ${VULKAN_OSDEPENDENT_LIB}
optimized ${VULKAN_GLSLANG_LIB}
optimized ${VULKAN_GLSLANG_RES_LIB}
optimized ${VULKAN_MACHINEINDEPENDENT_LIB}
optimized ${VULKAN_GENERICCODEGEN_LIB}
optimized Vulkan::Vulkan
debug ${VULKAN_SPIRVD_LIB}
debug ${VULKAN_SPVREMAPPERD_LIB}
debug ${VULKAN_SPIRV_TOOLSD_LIB}
debug ${VULKAN_SPIRV_TOOLS_OPTD_LIB}
debug ${VULKAN_SPIRV_TOOLS_SHAREDD_LIB}
debug ${VULKAN_HLSLD_LIB}
debug ${VULKAN_OGLCOMPILERD_LIB}
debug ${VULKAN_OSDEPENDENTD_LIB}
debug ${VULKAN_GLSLANGD_LIB}
debug ${VULKAN_GLSLANG_RESD_LIB}
debug ${VULKAN_MACHINEINDEPENDENTD_LIB}
debug ${VULKAN_GENERICCODEGEND_LIB}
Vulkan::Vulkan
)
endif()

if(CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1))
set_property(TARGET sdrbase PROPERTY CUDA_ARCHITECTURES 60 70 75 80 86)
add_compile_definitions(sdrbase VKFFT_BACKEND=1)
target_compile_options(sdrbase PUBLIC
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:
-DVKFFT_BACKEND=1
-gencode arch=compute_60,code=compute_60
-gencode arch=compute_70,code=compute_70
-gencode arch=compute_75,code=compute_75
-gencode arch=compute_80,code=compute_80
-gencode arch=compute_86,code=compute_86>")
set_target_properties(sdrbase PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(sdrbase PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64 REQUIRED)
find_library(CUDA_LIB cuda HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64 REQUIRED)
target_link_libraries(sdrbase ${CUDA_LIBRARIES} ${CUDA_LIB} ${CUDA_NVRTC_LIB})
target_include_directories(sdrbase PUBLIC ${CUDA_INCLUDE_DIRS})
endif()

target_link_libraries(sdrbase
${OPUS_LIBRARIES}
${sdrbase_FFTW3F_LIB}
Expand Down
154 changes: 154 additions & 0 deletions sdrbase/dsp/cudavkfftengine.cpp
@@ -0,0 +1,154 @@
///////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2023 Jon Beniston, M7RCE //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation as version 3 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License V3 for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
///////////////////////////////////////////////////////////////////////////////////

#include <QDebug>

#include "dsp/cudavkfftengine.h"

CUDAvkFFTEngine::CUDAvkFFTEngine()
{
VkFFTResult resFFT;
resFFT = gpuInit();
if (resFFT != VKFFT_SUCCESS)
{
qDebug() << "CUDAvkFFTEngine::CUDAvkFFTEngine: Failed to initialise GPU" << getVkFFTErrorString(resFFT);
delete vkGPU;
vkGPU = nullptr;
}
}

CUDAvkFFTEngine::~CUDAvkFFTEngine()
{
if (vkGPU)
{
freeAll();
cuCtxDestroy(vkGPU->context);
}
}

const QString CUDAvkFFTEngine::m_name = "vkFFT (CUDA)";

QString CUDAvkFFTEngine::getName() const
{
return m_name;
}

VkFFTResult CUDAvkFFTEngine::gpuInit()
{
CUresult res = CUDA_SUCCESS;
cudaError_t res2 = cudaSuccess;
res = cuInit(0);
if (res != CUDA_SUCCESS) {
return VKFFT_ERROR_FAILED_TO_INITIALIZE;
}
res2 = cudaSetDevice((int)vkGPU->device_id);
if (res2 != cudaSuccess) {
return VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID;
}
res = cuDeviceGet(&vkGPU->device, (int)vkGPU->device_id);
if (res != CUDA_SUCCESS) {
return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
}
res = cuDevicePrimaryCtxRetain(&vkGPU->context, (int)vkGPU->device);
if (res != CUDA_SUCCESS) {
return VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT;
}
return VKFFT_SUCCESS;
}

VkFFTResult CUDAvkFFTEngine::gpuAllocateBuffers()
{
cudaError_t res;
CUDAPlan *plan = reinterpret_cast<CUDAPlan *>(m_currentPlan);

// Allocate DMA accessible pinned memory, which may be faster than malloc'ed memory
res = cudaHostAlloc(&plan->m_in, sizeof(Complex) * plan->n, cudaHostAllocMapped);
if (res != cudaSuccess) {
return VKFFT_ERROR_FAILED_TO_ALLOCATE;
}
res = cudaHostAlloc(&plan->m_out, sizeof(Complex) * plan->n, cudaHostAllocMapped);
if (res != cudaSuccess) {
return VKFFT_ERROR_FAILED_TO_ALLOCATE;
}

// Allocate GPU memory
res = cudaMalloc((void**)&plan->m_buffer, sizeof(cuFloatComplex) * plan->n * 2);
if (res != cudaSuccess) {
return VKFFT_ERROR_FAILED_TO_ALLOCATE;
}

plan->m_configuration->buffer = (void**)&plan->m_buffer;

return VKFFT_SUCCESS;
}

VkFFTResult CUDAvkFFTEngine::gpuConfigure()
{
return VKFFT_SUCCESS;
}

void CUDAvkFFTEngine::transform()
{
if (m_currentPlan)
{
CUDAPlan *plan = reinterpret_cast<CUDAPlan *>(m_currentPlan);
cudaError_t res = cudaSuccess;
void* buffer = ((void**)&plan->m_buffer)[0];

// Transfer input from CPU to GPU memory
PROFILER_START()
res = cudaMemcpy(buffer, plan->m_in, plan->m_bufferSize, cudaMemcpyHostToDevice);
PROFILER_STOP(QString("%1 TX %2").arg(getName()).arg(m_currentPlan->n))
if (res != cudaSuccess) {
qDebug() << "CUDAvkFFTEngine::transform: cudaMemcpy host to device failed";
}

// Perform FFT
PROFILER_RESTART()
VkFFTLaunchParams launchParams = {};
VkFFTResult resFFT = VkFFTAppend(plan->m_app, plan->m_inverse ? 1 : -1, &launchParams);
PROFILER_STOP(QString("%1 FFT %2").arg(getName()).arg(m_currentPlan->n))
if (resFFT != VKFFT_SUCCESS) {
qDebug() << "CUDAvkFFTEngine::transform: VkFFTAppend failed:" << getVkFFTErrorString(resFFT);
}

// Transfer result from GPU to CPU memory
PROFILER_RESTART()
res = cudaMemcpy(plan->m_out, buffer, plan->m_bufferSize, cudaMemcpyDeviceToHost);
PROFILER_STOP(QString("%1 RX %2").arg(getName()).arg(m_currentPlan->n))
if (res != cudaSuccess) {
qDebug() << "CUDAvkFFTEngine::transform: cudaMemcpy device to host failed";
}
}

}

vkFFTEngine::Plan *CUDAvkFFTEngine::gpuAllocatePlan()
{
return new CUDAPlan();
}

void CUDAvkFFTEngine::gpuDeallocatePlan(Plan *p)
{
CUDAPlan *plan = reinterpret_cast<CUDAPlan *>(p);

cudaFree(plan->m_in);
plan->m_in = nullptr;
cudaFree(plan->m_out);
plan->m_out = nullptr;
cudaFree(plan->m_buffer);
}

0 comments on commit 5e71da4

Please sign in to comment.