diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b40e553c..6febafd4b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,6 +63,11 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Setup cmake + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: '3.31.6' + # Enable tmate debugging of manually-triggered workflows if the input option was provided - name: Setup tmate session uses: mxschmitt/action-tmate@v3 @@ -203,6 +208,11 @@ jobs: with: name: dependencies-${{ matrix.os }}-${{ matrix.cxx }}-${{ matrix.mpi }}-${{ matrix.omp }} + - name: Setup cmake + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: '3.31.6' + - name: Unpack dependencies run: tar xfv dependencies.tar @@ -284,6 +294,11 @@ jobs: with: name: dependencies-${{ matrix.os }}-${{ matrix.cxx }}-${{ matrix.mpi }}-${{ matrix.omp }} + - name: Setup cmake + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: '3.31.6' + - name: Unpack dependencies run: tar xfv dependencies.tar - name: Install Dependencies on Ubunutu @@ -312,7 +327,7 @@ jobs: cmake .. --fresh -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -Donnxrt=ON -Dhdf5=ON -Ddocasa=OFF -Ddompi=${{matrix.mpi}} -Dopenmp=${{matrix.omp}} -Dtests=OFF -Dexamples=ON -Dbenchmarks=ON make -j$(nproc --ignore 1) install - + doc: needs: dependencies @@ -341,6 +356,11 @@ jobs: with: name: dependencies-${{ matrix.os }}-${{ matrix.cxx }}-${{ matrix.mpi }}-${{ matrix.omp }} + - name: Setup cmake + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: '3.31.6' + - name: Unpack dependencies run: tar xfv dependencies.tar diff --git a/cmake_files/dependencies.cmake b/cmake_files/dependencies.cmake index 6694f1b3b..ba6ee4b9b 100644 --- a/cmake_files/dependencies.cmake +++ b/cmake_files/dependencies.cmake @@ -18,14 +18,13 @@ else() endif() find_package(CFitsIO REQUIRED) +find_package(yaml-cpp REQUIRED) if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.30.0") cmake_policy(SET CMP0167 NEW) endif() find_package(Boost COMPONENTS system filesystem REQUIRED) -find_package(yaml-cpp REQUIRED) - find_package(sopt REQUIRED) set(PURIFY_ONNXRT FALSE) if (onnxrt) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index badc12160..15a137385 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -187,8 +187,36 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &stat m_fb = factory::fb_factory>( factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, - m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, - false, 1e-3, 1e-2, 50, tf_model_path, nondiff_func_type::Denoiser); + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3), true, true, false, + 1e-3, 1e-2, 50, tf_model_path, nondiff_func_type::Denoiser); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeGrid)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + + // TODO: Wavelets are constructed but not used in the factory method + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx"; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3), true, true, false, + 1e-3, 1e-2, 50, tf_model_path, nondiff_func_type::Denoiser); // Benchmark the application of the algorithm while (state.KeepRunning()) { @@ -205,14 +233,31 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeImage) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) - ->Args({1024, static_cast(1e8), 4, 10, 1}) - ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->Args({2048, static_cast(1e6), 4, 10, 1}) + ->Args({2048, static_cast(1e7), 4, 10, 1}) + ->Args({4096, static_cast(1e6), 4, 10, 1}) + ->Args({4096, static_cast(1e7), 4, 10, 1}) ->UseManualTime() ->MinTime(60.0) ->MinWarmUpTime(10.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeGrid) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 2}) + ->Args({1024, static_cast(1e7), 4, 10, 2}) + ->Args({2048, static_cast(1e6), 4, 10, 2}) + ->Args({2048, static_cast(1e7), 4, 10, 2}) + ->Args({4096, static_cast(1e6), 4, 10, 2}) + ->Args({4096, static_cast(1e7), 4, 10, 2}) + ->UseManualTime() + ->MinTime(9.0) + ->MinWarmUpTime(1.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + #endif BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage) @@ -220,8 +265,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) - ->Args({1024, static_cast(1e8), 4, 10, 1}) - ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->Args({2048, static_cast(1e6), 4, 10, 1}) + ->Args({2048, static_cast(1e7), 4, 10, 1}) + ->Args({4096, static_cast(1e6), 4, 10, 1}) + ->Args({4096, static_cast(1e7), 4, 10, 1}) ->UseManualTime() ->MinTime(60.0) ->MinWarmUpTime(10.0) @@ -233,8 +280,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid) ->Args({128, 10000, 4, 10, 2}) ->Args({1024, static_cast(1e6), 4, 10, 2}) ->Args({1024, static_cast(1e7), 4, 10, 2}) - ->Args({1024, static_cast(1e8), 4, 10, 2}) - ->Args({1024, static_cast(1e9), 4, 10, 2}) + ->Args({2048, static_cast(1e6), 4, 10, 2}) + ->Args({2048, static_cast(1e7), 4, 10, 2}) + ->Args({4096, static_cast(1e6), 4, 10, 2}) + ->Args({4096, static_cast(1e7), 4, 10, 2}) ->UseManualTime() ->MinTime(60.0) ->MinWarmUpTime(10.0) @@ -246,8 +295,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) - ->Args({1024, static_cast(1e8), 4, 10, 1}) - ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->Args({2048, static_cast(1e6), 4, 10, 1}) + ->Args({2048, static_cast(1e7), 4, 10, 1}) + ->Args({4096, static_cast(1e6), 4, 10, 1}) + ->Args({4096, static_cast(1e7), 4, 10, 1}) ->UseManualTime() ->MinTime(120.0) ->MinWarmUpTime(10.0) @@ -259,8 +310,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid) ->Args({128, 10000, 4, 10, 2}) ->Args({1024, static_cast(1e6), 4, 10, 2}) ->Args({1024, static_cast(1e7), 4, 10, 2}) - ->Args({1024, static_cast(1e8), 4, 10, 2}) - ->Args({1024, static_cast(1e9), 4, 10, 2}) + ->Args({2048, static_cast(1e6), 4, 10, 2}) + ->Args({2048, static_cast(1e7), 4, 10, 2}) + ->Args({4096, static_cast(1e6), 4, 10, 2}) + ->Args({4096, static_cast(1e7), 4, 10, 2}) ->UseManualTime() ->MinTime(120.0) ->MinWarmUpTime(10.0) diff --git a/cpp/benchmarks/utilities.cc b/cpp/benchmarks/utilities.cc index e43c8898d..437c2d1a0 100644 --- a/cpp/benchmarks/utilities.cc +++ b/cpp/benchmarks/utilities.cc @@ -93,24 +93,28 @@ std::tuple dirty_measurements( return std::make_tuple(uv_data, sigma); } -utilities::vis_params random_measurements(t_int size, const t_real max_w, const t_int id) { +utilities::vis_params random_measurements(t_int size, const t_real max_w, const t_int id, + const bool cache_visibilities) { + utilities::vis_params uv_data; + std::stringstream filename; filename << "random_" << size << "_"; filename << std::to_string(id) << ".vis"; std::string const vis_file = visibility_filename(filename.str()); std::ifstream vis_file_str(vis_file); - utilities::vis_params uv_data; - if (vis_file_str.good()) { + if (cache_visibilities and vis_file_str.good()) { PURIFY_INFO("Reading random visibilities from file {}", vis_file); uv_data = utilities::read_visibility(vis_file, true); uv_data.units = utilities::vis_units::radians; } else { - PURIFY_INFO("Generating random visibilities and writing to {}", vis_file); + PURIFY_INFO("Generating random visibilities"); t_real const sigma_m = constant::pi / 3; uv_data = utilities::random_sample_density(size, 0, sigma_m, max_w); uv_data.units = utilities::vis_units::radians; - utilities::write_visibility(uv_data, vis_file, true); + if (cache_visibilities) { + utilities::write_visibility(uv_data, vis_file, true); + } } return uv_data; } diff --git a/cpp/benchmarks/utilities.h b/cpp/benchmarks/utilities.h index 4f204cb5f..531b8bfb7 100644 --- a/cpp/benchmarks/utilities.h +++ b/cpp/benchmarks/utilities.h @@ -28,7 +28,8 @@ std::tuple dirty_measurements( Image const& ground_truth_image, t_uint number_of_vis, t_real snr, const t_real& cellsize); -utilities::vis_params random_measurements(t_int size, const t_real max_w = 100, const t_int id = 0); +utilities::vis_params random_measurements(t_int size, const t_real max_w = 100, const t_int id = 0, + const bool cache_visibilities = false); #ifdef PURIFY_MPI double duration(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end,