diff --git a/Core/versioning.h b/Core/versioning.h index 84f0534..63c9556 100644 --- a/Core/versioning.h +++ b/Core/versioning.h @@ -2,5 +2,5 @@ #define MAJOR_VERSION "1" #define MINOR_VERSION "0" #define REVISION_VERSION "2" -#define BUILD_NUMBER "1" -#define FULL_VERSION MAJOR_VERSION "." MINOR_VERSION "." REVISION_VERSION "." BUILD_NUMBER \ No newline at end of file +#define BUILD_NUMBER "2" +#define FULL_VERSION MAJOR_VERSION "." MINOR_VERSION "." REVISION_VERSION "." BUILD_NUMBER diff --git a/Cuda/AddBitmapWithAlignment.cu b/Cuda/AddBitmapWithAlignment.cu index 6398235..6f53ff9 100644 --- a/Cuda/AddBitmapWithAlignment.cu +++ b/Cuda/AddBitmapWithAlignment.cu @@ -1,4 +1,5 @@ #include "AddBitmapWithAlignment.cuh" +#include "CudaUtils.cuh" #include ACMB_CUDA_NAMESPACE_BEGIN diff --git a/Cuda/AddBitmapWithAlignment.cuh b/Cuda/AddBitmapWithAlignment.cuh index 95e5ed8..2b30dec 100644 --- a/Cuda/AddBitmapWithAlignment.cuh +++ b/Cuda/AddBitmapWithAlignment.cuh @@ -1,6 +1,5 @@ #pragma once #include "CudaBasic.h" -#include "CudaUtils.cuh" ACMB_CUDA_NAMESPACE_BEGIN diff --git a/Cuda/CudaStacker.cpp b/Cuda/CudaStacker.cpp index c7544d0..cfcc440 100644 --- a/Cuda/CudaStacker.cpp +++ b/Cuda/CudaStacker.cpp @@ -4,6 +4,7 @@ #include "CudaBasic.h" #include "./../Core/bitmap.h" +static constexpr bool CUDA_SYNCHRONIZE = false; ACMB_CUDA_NAMESPACE_BEGIN struct StackData @@ -56,10 +57,9 @@ void Stacker::Init() void Stacker::CallAddBitmapHelper( IBitmapPtr pBitmap ) { -#ifdef CUDA_SYNCHRONIZE - if ( cudaDeviceSynchronize() != cudaSuccess ) - throw std::runtime_error( "error in CUDA kernel occured" ); -#endif + if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\ + throw std::runtime_error( "error in CUDA kernel occured" );\ + const size_t size = _width * _height * ChannelCount( _pixelFormat ); #define TRY_ADD_BITMAP( format ) \ @@ -67,7 +67,10 @@ if (_pixelFormat == format) { \ using DynamicArrayT = typename std::conditional_t::bytesPerChannel == 1, DynamicArrayU8, DynamicArrayU16>;\ auto& bitmap = std::get( _stackData->_cudaBitmap );\ bitmap.fromVector( std::static_pointer_cast< Bitmap >( pBitmap )->GetData() );\ - return AddBitmapHelper( bitmap.data(), _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data(), size ); } + AddBitmapHelper( bitmap.data(), _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data(), size );\ + if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\ + throw std::runtime_error( "error in CUDA kernel occured" );\ + return;} TRY_ADD_BITMAP( PixelFormat::Gray8 ); TRY_ADD_BITMAP( PixelFormat::Gray16 ); @@ -76,18 +79,13 @@ if (_pixelFormat == format) { \ TRY_ADD_BITMAP( PixelFormat::Bayer16 ); throw std::runtime_error( "pixel format must be known" ); -#ifdef CUDA_SYNCHRONIZE - if ( cudaDeviceSynchronize() != cudaSuccess ) - throw std::runtime_error( "error in CUDA kernel occured" ); -#endif + } void Stacker::CallAddBitmapWithAlignmentHelper( IBitmapPtr pBitmap ) { -#ifdef CUDA_SYNCHRONIZE - if ( cudaDeviceSynchronize() != cudaSuccess ) - throw std::runtime_error( "error in CUDA kernel occured" ); -#endif + if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess ) + throw std::runtime_error( "error in CUDA kernel occured" ); #define TRY_ADD_BITMAP_WITH_ALIGNMENT( format ) \ if (_pixelFormat == format) { \ using DynamicArrayT = typename std::conditional_t::bytesPerChannel == 1, DynamicArrayU8, DynamicArrayU16>;\ @@ -95,7 +93,10 @@ if (_pixelFormat == format) { \ auto& bitmap = std::get( _stackData->_cudaBitmap );\ auto& helper = std::get( _helper );\ bitmap.fromVector( std::static_pointer_cast< Bitmap >( pBitmap )->GetData() );\ - return helper.Run( bitmap.data(), _width, _height, PixelFormatTraits::channelCount, _grid, _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data() ); } + helper.Run( bitmap.data(), _width, _height, PixelFormatTraits::channelCount, _grid, _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data() );\ + if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\ + throw std::runtime_error( "error in CUDA kernel occured" );\ + return;} TRY_ADD_BITMAP_WITH_ALIGNMENT( PixelFormat::Gray8 ); TRY_ADD_BITMAP_WITH_ALIGNMENT( PixelFormat::Gray16 ); @@ -104,24 +105,21 @@ if (_pixelFormat == format) { \ TRY_ADD_BITMAP_WITH_ALIGNMENT( PixelFormat::Bayer16 ); throw std::runtime_error( "pixel format must be known" ); -#ifdef CUDA_SYNCHRONIZE - if ( cudaDeviceSynchronize() != cudaSuccess ) - throw std::runtime_error( "error in CUDA kernel occured" ); -#endif } IBitmapPtr Stacker::CallGeneratingResultHelper() { -#ifdef CUDA_SYNCHRONIZE - if ( cudaDeviceSynchronize() != cudaSuccess ) + if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess ) throw std::runtime_error( "error in CUDA kernel occured" ); -#endif + const size_t size = _width * _height * ChannelCount( _pixelFormat ); #define TRY_GENERATE_RESULT( format ) \ if (_pixelFormat == format ) { \ using DynamicArrayT = typename std::conditional_t::bytesPerChannel == 1, DynamicArrayU8, DynamicArrayU16>;\ auto& bitmap = std::get( _stackData->_cudaBitmap );\ GeneratingResultKernel(_stackData->_means.data(), bitmap.data(), size );\ + if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\ + throw std::runtime_error( "error in CUDA kernel occured" );\ IBitmapPtr res = IBitmap::Create( _width, _height, _pixelFormat );\ bitmap.toVector(std::static_pointer_cast>(res)->GetData());\ return res; } \ @@ -133,10 +131,6 @@ if (_pixelFormat == format ) { \ TRY_GENERATE_RESULT( PixelFormat::Bayer16 ); throw std::runtime_error( "pixel format must be known" ); -#ifdef CUDA_SYNCHRONIZE - if ( cudaDeviceSynchronize() != cudaSuccess ) - throw std::runtime_error( "error in CUDA kernel occured" ); -#endif } ACMB_CUDA_NAMESPACE_END diff --git a/Cuda/acmb-cuda.pro b/Cuda/acmb-cuda.pro index e0e25c4..3ad91a5 100644 --- a/Cuda/acmb-cuda.pro +++ b/Cuda/acmb-cuda.pro @@ -17,10 +17,12 @@ DEPENDPATH += /usr/local/include HEADERS += \ AddBitmap.h \ + AddBitmapWithAlignment.cuh \ AddBitmapWithAlignment.h \ CudaBasic.h \ CudaBasic.hpp \ CudaStacker.h \ + CudaUtils.cuh \ GenerateResult.h\ CudaInfo.h @@ -37,9 +39,9 @@ CUDA_SOURCES += \ CUDA_DIR = /usr/lib/cuda # GPU architecture #https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#virtual-architecture-feature-list -CUDA_VARCH = compute_87 +CUDA_VARCH = compute_80 #https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list-dc -rdc=true -CUDA_GPU_ARCH = sm_87 +CUDA_GPU_ARCH = sm_80 # nvcc flags (ptxas option verbose is always useful) NVCCFLAGS = --compiler-options -use-fast-math --Wno-deprecated-gpu-targets @@ -77,7 +79,7 @@ cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -g -G -gencode arch=$$CUDA_VARCH,code=$ 2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2 } else { -cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O3 -gencode arch=$$CUDA_VARCH,code=$$CUDA_GPU_ARCH -c $$NVCCFLAGS \ +cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O2 -gencode arch=$$CUDA_VARCH,code=$$CUDA_GPU_ARCH -c $$NVCCFLAGS \ $$CUDA_INC $$LIBS ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} \ 2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2 } @@ -91,7 +93,7 @@ CONFIG(debug, debug|release) { cuda.depend_command = $$CUDA_DIR/bin/nvcc -g -G -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed \"s/^.*: //\" } else { - cuda.depend_command = $$CUDA_DIR/bin/nvcc -O3 -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed \"s/^.*: //\" + cuda.depend_command = $$CUDA_DIR/bin/nvcc -O2 -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed \"s/^.*: //\" } # Tell Qt that we want add more stuff to the Makefile diff --git a/GUI/main.cpp b/GUI/main.cpp index 95aef6e..9d6a21a 100644 --- a/GUI/main.cpp +++ b/GUI/main.cpp @@ -393,7 +393,7 @@ int main(int, char**) _wsetlocale( LC_ALL, L".UTF8" ); // Create application window //ImGui_ImplWin32_EnableDpiAwareness(); - WNDCLASSEXW wc = { sizeof(wc), CS_CLASSDC, WndProc, 0L, 0L, GetModuleHandle(nullptr), nullptr, nullptr, nullptr, nulglfwSetWindowIcon(window, 1, images); lptr, L"ACMB", nullptr }; + WNDCLASSEXW wc = { sizeof(wc), CS_CLASSDC, WndProc, 0L, 0L, GetModuleHandle(nullptr), nullptr, nullptr, nullptr, nullptr, L"ACMB", nullptr }; ::RegisterClassExW(&wc); CRect rcDesktop; diff --git a/Tests/TestCudaStacker.cpp b/Tests/TestCudaStacker.cpp new file mode 100644 index 0000000..f2ada1a --- /dev/null +++ b/Tests/TestCudaStacker.cpp @@ -0,0 +1,26 @@ +#include "test.h" +#include "testtools.h" +#include "../Core/bitmap.h" +#include "../Core/pipeline.h" +#include "../Cuda/CudaStacker.h" +#include "../Registrator/stacker.h" + +ACMB_TESTS_NAMESPACE_BEGIN + +BEGIN_SUITE( CudaStacker ) + +BEGIN_TEST( TwoBitmaps ) + +std::vector pipelines; +pipelines.emplace_back( std::make_shared>( 10, 10, MakeRGB24( 254, 0, 0 ) ) ); +pipelines.emplace_back( std::make_shared>( 10, 10, MakeRGB24( 0, 0, 254 ) ) ); + +Stacker stacker( pipelines, StackMode::DarkOrFlat ); +cuda::Stacker cudaStacker( pipelines, StackMode::DarkOrFlat ); +EXPECT_TRUE( BitmapsAreEqual( stacker.Stack(), cudaStacker.Stack() ) ); + +END_TEST + +END_SUITE + +ACMB_TESTS_NAMESPACE_END diff --git a/Tests/TestStacker.cpp b/Tests/TestStacker.cpp index 2436c78..fdd5469 100644 --- a/Tests/TestStacker.cpp +++ b/Tests/TestStacker.cpp @@ -3,7 +3,6 @@ #include "../Core/pipeline.h" #include "../Codecs/Raw/RawDecoder.h" #include "../Registrator/stacker.h" -#include "../Transforms/converter.h" #include "../Transforms/BitmapSubtractor.h" #include "../Transforms/BitmapDivisor.h" #include diff --git a/Tests/acmb-tests.pro b/Tests/acmb-tests.pro index f619b25..c17fcdd 100644 --- a/Tests/acmb-tests.pro +++ b/Tests/acmb-tests.pro @@ -10,6 +10,7 @@ LIBS += -ltbb LIBS += -L/usr/local/lib/ -lCCfits LIBS += -L/usr/local/lib/ -lcfitsio LIBS += -lx265 +LIBS += -lcudart HEADERS += \ TestRunner.h \ @@ -24,6 +25,7 @@ SOURCES += \ TestChannelEqualizer.cpp \ TestConverter.cpp \ TestCropTransform.cpp \ + TestCudaStacker.cpp \ TestDeaberrateTransform.cpp \ TestDebayerTransform.cpp \ TestFastAligner.cpp \ @@ -53,11 +55,13 @@ SOURCES += \ testtools.cpp LIBS += -L$$OUT_PWD/../ -lacmb-lib +LIBS += -L$$OUT_PWD/../Cuda/ -lacmb-cuda INCLUDEPATH += $$PWD/../ DEPENDPATH += $$PWD/../ PRE_TARGETDEPS += $$OUT_PWD/../libacmb-lib.a +PRE_TARGETDEPS += $$OUT_PWD/../Cuda/libacmb-cuda.a INCLUDEPATH += /usr/local/include DEPENDPATH += /usr/local/include diff --git a/Tests/acmb-tests.vcxproj b/Tests/acmb-tests.vcxproj index 0ff3a39..89238ca 100644 --- a/Tests/acmb-tests.vcxproj +++ b/Tests/acmb-tests.vcxproj @@ -105,16 +105,16 @@ - %(AdditionalDependencies);acmb-cuda.lib;tbb12.lib; - %(AdditionalLibraryDirectories);./../x64/Release;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_release/; + %(AdditionalDependencies);acmb-cuda.lib;tbb12.lib;cudart_static.lib; + %(AdditionalLibraryDirectories);./../x64/Release;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_release/;$(CUDA_PATH_V12_0)/lib/x64 Console /LTCG %(AdditionalOptions) - %(AdditionalLibraryDirectories);./../x64/Debug;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_debug/; - %(AdditionalDependencies);acmb-cuda.lib;tbb12_debug.lib; + %(AdditionalLibraryDirectories);./../x64/Debug;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_debug/;$(CUDA_PATH_V12_0)/lib/x64 + %(AdditionalDependencies);acmb-cuda.lib;tbb12_debug.lib;cudart_static.lib; Console @@ -128,6 +128,7 @@ + diff --git a/Tests/acmb-tests.vcxproj.filters b/Tests/acmb-tests.vcxproj.filters index c2fdaaa..a4b81d5 100644 --- a/Tests/acmb-tests.vcxproj.filters +++ b/Tests/acmb-tests.vcxproj.filters @@ -120,6 +120,9 @@ Source Files + + Source Files +