Skip to content

Commit

Permalink
Merge branch 'fix_linux_cuda'
Browse files Browse the repository at this point in the history
  • Loading branch information
astrowander committed Dec 26, 2023
2 parents cfc02a1 + 14a151f commit 652238c
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 38 deletions.
4 changes: 2 additions & 2 deletions Core/versioning.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
#define MAJOR_VERSION "1"
#define MINOR_VERSION "0"
#define REVISION_VERSION "2"
#define BUILD_NUMBER "1"
#define FULL_VERSION MAJOR_VERSION "." MINOR_VERSION "." REVISION_VERSION "." BUILD_NUMBER
#define BUILD_NUMBER "2"
#define FULL_VERSION MAJOR_VERSION "." MINOR_VERSION "." REVISION_VERSION "." BUILD_NUMBER
1 change: 1 addition & 0 deletions Cuda/AddBitmapWithAlignment.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "AddBitmapWithAlignment.cuh"
#include "CudaUtils.cuh"
#include <float.h>
ACMB_CUDA_NAMESPACE_BEGIN

Expand Down
1 change: 0 additions & 1 deletion Cuda/AddBitmapWithAlignment.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once
#include "CudaBasic.h"
#include "CudaUtils.cuh"

ACMB_CUDA_NAMESPACE_BEGIN

Expand Down
44 changes: 19 additions & 25 deletions Cuda/CudaStacker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "CudaBasic.h"
#include "./../Core/bitmap.h"

static constexpr bool CUDA_SYNCHRONIZE = false;
ACMB_CUDA_NAMESPACE_BEGIN

struct StackData
Expand Down Expand Up @@ -56,18 +57,20 @@ void Stacker::Init()

void Stacker::CallAddBitmapHelper( IBitmapPtr pBitmap )
{
#ifdef CUDA_SYNCHRONIZE
if ( cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#endif
if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\
throw std::runtime_error( "error in CUDA kernel occured" );\

const size_t size = _width * _height * ChannelCount( _pixelFormat );

#define TRY_ADD_BITMAP( format ) \
if (_pixelFormat == format) { \
using DynamicArrayT = typename std::conditional_t<PixelFormatTraits<format>::bytesPerChannel == 1, DynamicArrayU8, DynamicArrayU16>;\
auto& bitmap = std::get<DynamicArrayT>( _stackData->_cudaBitmap );\
bitmap.fromVector( std::static_pointer_cast< Bitmap<format> >( pBitmap )->GetData() );\
return AddBitmapHelper( bitmap.data(), _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data(), size ); }
AddBitmapHelper( bitmap.data(), _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data(), size );\
if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\
throw std::runtime_error( "error in CUDA kernel occured" );\
return;}

TRY_ADD_BITMAP( PixelFormat::Gray8 );
TRY_ADD_BITMAP( PixelFormat::Gray16 );
Expand All @@ -76,26 +79,24 @@ if (_pixelFormat == format) { \
TRY_ADD_BITMAP( PixelFormat::Bayer16 );

throw std::runtime_error( "pixel format must be known" );
#ifdef CUDA_SYNCHRONIZE
if ( cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#endif

}

void Stacker::CallAddBitmapWithAlignmentHelper( IBitmapPtr pBitmap )
{
#ifdef CUDA_SYNCHRONIZE
if ( cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#endif
if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#define TRY_ADD_BITMAP_WITH_ALIGNMENT( format ) \
if (_pixelFormat == format) { \
using DynamicArrayT = typename std::conditional_t<PixelFormatTraits<format>::bytesPerChannel == 1, DynamicArrayU8, DynamicArrayU16>;\
using HelperT = typename std::conditional_t<PixelFormatTraits<format>::bytesPerChannel == 1, AddBitmapWithAlignmentHelperU8, AddBitmapWithAlignmentHelperU16>;\
auto& bitmap = std::get<DynamicArrayT>( _stackData->_cudaBitmap );\
auto& helper = std::get<HelperT>( _helper );\
bitmap.fromVector( std::static_pointer_cast< Bitmap<format> >( pBitmap )->GetData() );\
return helper.Run( bitmap.data(), _width, _height, PixelFormatTraits<format>::channelCount, _grid, _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data() ); }
helper.Run( bitmap.data(), _width, _height, PixelFormatTraits<format>::channelCount, _grid, _stackData->_means.data(), _stackData->_devs.data(), _stackData->_counts.data() );\
if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\
throw std::runtime_error( "error in CUDA kernel occured" );\
return;}

TRY_ADD_BITMAP_WITH_ALIGNMENT( PixelFormat::Gray8 );
TRY_ADD_BITMAP_WITH_ALIGNMENT( PixelFormat::Gray16 );
Expand All @@ -104,24 +105,21 @@ if (_pixelFormat == format) { \
TRY_ADD_BITMAP_WITH_ALIGNMENT( PixelFormat::Bayer16 );

throw std::runtime_error( "pixel format must be known" );
#ifdef CUDA_SYNCHRONIZE
if ( cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#endif
}

IBitmapPtr Stacker::CallGeneratingResultHelper()
{
#ifdef CUDA_SYNCHRONIZE
if ( cudaDeviceSynchronize() != cudaSuccess )
if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#endif

const size_t size = _width * _height * ChannelCount( _pixelFormat );
#define TRY_GENERATE_RESULT( format ) \
if (_pixelFormat == format ) { \
using DynamicArrayT = typename std::conditional_t<PixelFormatTraits<format>::bytesPerChannel == 1, DynamicArrayU8, DynamicArrayU16>;\
auto& bitmap = std::get<DynamicArrayT>( _stackData->_cudaBitmap );\
GeneratingResultKernel(_stackData->_means.data(), bitmap.data(), size );\
if ( CUDA_SYNCHRONIZE && cudaDeviceSynchronize() != cudaSuccess )\
throw std::runtime_error( "error in CUDA kernel occured" );\
IBitmapPtr res = IBitmap::Create( _width, _height, _pixelFormat );\
bitmap.toVector(std::static_pointer_cast<Bitmap<format>>(res)->GetData());\
return res; } \
Expand All @@ -133,10 +131,6 @@ if (_pixelFormat == format ) { \
TRY_GENERATE_RESULT( PixelFormat::Bayer16 );

throw std::runtime_error( "pixel format must be known" );
#ifdef CUDA_SYNCHRONIZE
if ( cudaDeviceSynchronize() != cudaSuccess )
throw std::runtime_error( "error in CUDA kernel occured" );
#endif
}

ACMB_CUDA_NAMESPACE_END
10 changes: 6 additions & 4 deletions Cuda/acmb-cuda.pro
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ DEPENDPATH += /usr/local/include

HEADERS += \
AddBitmap.h \
AddBitmapWithAlignment.cuh \
AddBitmapWithAlignment.h \
CudaBasic.h \
CudaBasic.hpp \
CudaStacker.h \
CudaUtils.cuh \
GenerateResult.h\
CudaInfo.h

Expand All @@ -37,9 +39,9 @@ CUDA_SOURCES += \
CUDA_DIR = /usr/lib/cuda
# GPU architecture
#https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#virtual-architecture-feature-list
CUDA_VARCH = compute_87
CUDA_VARCH = compute_80
#https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list-dc -rdc=true
CUDA_GPU_ARCH = sm_87
CUDA_GPU_ARCH = sm_80

# nvcc flags (ptxas option verbose is always useful)
NVCCFLAGS = --compiler-options -use-fast-math --Wno-deprecated-gpu-targets
Expand Down Expand Up @@ -77,7 +79,7 @@ cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -g -G -gencode arch=$$CUDA_VARCH,code=$
2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
}
else {
cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O3 -gencode arch=$$CUDA_VARCH,code=$$CUDA_GPU_ARCH -c $$NVCCFLAGS \
cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O2 -gencode arch=$$CUDA_VARCH,code=$$CUDA_GPU_ARCH -c $$NVCCFLAGS \
$$CUDA_INC $$LIBS ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} \
2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
}
Expand All @@ -91,7 +93,7 @@ CONFIG(debug, debug|release) {
cuda.depend_command = $$CUDA_DIR/bin/nvcc -g -G -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed \"s/^.*: //\"
}
else {
cuda.depend_command = $$CUDA_DIR/bin/nvcc -O3 -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed \"s/^.*: //\"
cuda.depend_command = $$CUDA_DIR/bin/nvcc -O2 -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed \"s/^.*: //\"
}

# Tell Qt that we want add more stuff to the Makefile
Expand Down
2 changes: 1 addition & 1 deletion GUI/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ int main(int, char**)
_wsetlocale( LC_ALL, L".UTF8" );
// Create application window
//ImGui_ImplWin32_EnableDpiAwareness();
WNDCLASSEXW wc = { sizeof(wc), CS_CLASSDC, WndProc, 0L, 0L, GetModuleHandle(nullptr), nullptr, nullptr, nullptr, nulglfwSetWindowIcon(window, 1, images); lptr, L"ACMB", nullptr };
WNDCLASSEXW wc = { sizeof(wc), CS_CLASSDC, WndProc, 0L, 0L, GetModuleHandle(nullptr), nullptr, nullptr, nullptr, nullptr, L"ACMB", nullptr };
::RegisterClassExW(&wc);

CRect rcDesktop;
Expand Down
26 changes: 26 additions & 0 deletions Tests/TestCudaStacker.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "test.h"
#include "testtools.h"
#include "../Core/bitmap.h"
#include "../Core/pipeline.h"
#include "../Cuda/CudaStacker.h"
#include "../Registrator/stacker.h"

ACMB_TESTS_NAMESPACE_BEGIN

BEGIN_SUITE( CudaStacker )

BEGIN_TEST( TwoBitmaps )

std::vector<Pipeline> pipelines;
pipelines.emplace_back( std::make_shared<Bitmap<PixelFormat::RGB24>>( 10, 10, MakeRGB24( 254, 0, 0 ) ) );
pipelines.emplace_back( std::make_shared<Bitmap<PixelFormat::RGB24>>( 10, 10, MakeRGB24( 0, 0, 254 ) ) );

Stacker stacker( pipelines, StackMode::DarkOrFlat );
cuda::Stacker cudaStacker( pipelines, StackMode::DarkOrFlat );
EXPECT_TRUE( BitmapsAreEqual( stacker.Stack(), cudaStacker.Stack() ) );

END_TEST

END_SUITE

ACMB_TESTS_NAMESPACE_END
1 change: 0 additions & 1 deletion Tests/TestStacker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "../Core/pipeline.h"
#include "../Codecs/Raw/RawDecoder.h"
#include "../Registrator/stacker.h"
#include "../Transforms/converter.h"
#include "../Transforms/BitmapSubtractor.h"
#include "../Transforms/BitmapDivisor.h"
#include <filesystem>
Expand Down
4 changes: 4 additions & 0 deletions Tests/acmb-tests.pro
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ LIBS += -ltbb
LIBS += -L/usr/local/lib/ -lCCfits
LIBS += -L/usr/local/lib/ -lcfitsio
LIBS += -lx265
LIBS += -lcudart

HEADERS += \
TestRunner.h \
Expand All @@ -24,6 +25,7 @@ SOURCES += \
TestChannelEqualizer.cpp \
TestConverter.cpp \
TestCropTransform.cpp \
TestCudaStacker.cpp \
TestDeaberrateTransform.cpp \
TestDebayerTransform.cpp \
TestFastAligner.cpp \
Expand Down Expand Up @@ -53,11 +55,13 @@ SOURCES += \
testtools.cpp

LIBS += -L$$OUT_PWD/../ -lacmb-lib
LIBS += -L$$OUT_PWD/../Cuda/ -lacmb-cuda

INCLUDEPATH += $$PWD/../
DEPENDPATH += $$PWD/../

PRE_TARGETDEPS += $$OUT_PWD/../libacmb-lib.a
PRE_TARGETDEPS += $$OUT_PWD/../Cuda/libacmb-cuda.a

INCLUDEPATH += /usr/local/include
DEPENDPATH += /usr/local/include
9 changes: 5 additions & 4 deletions Tests/acmb-tests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,16 @@
</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>%(AdditionalDependencies);acmb-cuda.lib;tbb12.lib;</AdditionalDependencies>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);./../x64/Release;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_release/;</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies);acmb-cuda.lib;tbb12.lib;cudart_static.lib;</AdditionalDependencies>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);./../x64/Release;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_release/;$(CUDA_PATH_V12_0)/lib/x64</AdditionalLibraryDirectories>
<SubSystem>Console</SubSystem>
<AdditionalOptions>/LTCG %(AdditionalOptions)</AdditionalOptions>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);./../x64/Debug;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_debug/;</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies);acmb-cuda.lib;tbb12_debug.lib;</AdditionalDependencies>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);./../x64/Debug;./../Libs/oneTBB/build/msvc_19.32_cxx_64_md_debug/;$(CUDA_PATH_V12_0)/lib/x64</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies);acmb-cuda.lib;tbb12_debug.lib;cudart_static.lib;</AdditionalDependencies>
<SubSystem>Console</SubSystem>
</Link>
<ClCompile>
Expand All @@ -128,6 +128,7 @@
<ClCompile Include="main.cpp" />
<ClCompile Include="TestBitmapDivisor.cpp" />
<ClCompile Include="TestCropTransform.cpp" />
<ClCompile Include="TestCudaStacker.cpp" />
<ClCompile Include="TestDebayerTransform.cpp" />
<ClCompile Include="TestFitsDecoder.cpp" />
<ClCompile Include="TestFitsEncoder.cpp" />
Expand Down
3 changes: 3 additions & 0 deletions Tests/acmb-tests.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@
<ClCompile Include="TestH265Encoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="TestCudaStacker.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="test.h">
Expand Down

0 comments on commit 652238c

Please sign in to comment.