diff --git a/example/hessian.cpp b/example/hessian.cpp new file mode 100644 index 0000000000..aca22f4ce6 --- /dev/null +++ b/example/hessian.cpp @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace gil = boost::gil; + +// some images might produce artifacts +// when converted to grayscale, +// which was previously observed on +// canny edge detector for test input +// used for this example. +// the algorithm here follows sRGB gamma definition +// taken from here (luminance calculation): +// https://en.wikipedia.org/wiki/Grayscale +gil::gray8_image_t to_grayscale(gil::rgb8_view_t original) +{ + gil::gray8_image_t output_image(original.dimensions()); + auto output = gil::view(output_image); + constexpr double max_channel_intensity = (std::numeric_limits::max)(); + for (long int y = 0; y < original.height(); ++y) + { + for (long int x = 0; x < original.width(); ++x) + { + // scale the values into range [0, 1] and calculate linear intensity + auto& p = original(x, y); + double red_intensity = p.at(std::integral_constant{}) + / max_channel_intensity; + double green_intensity = p.at(std::integral_constant{}) + / max_channel_intensity; + double blue_intensity = p.at(std::integral_constant{}) + / max_channel_intensity; + auto linear_luminosity = 0.2126 * red_intensity + + 0.7152 * green_intensity + + 0.0722 * blue_intensity; + + // perform gamma adjustment + double gamma_compressed_luminosity = 0; + if (linear_luminosity < 0.0031308) + { + gamma_compressed_luminosity = linear_luminosity * 12.92; + } else + { + gamma_compressed_luminosity = 1.055 * std::pow(linear_luminosity, 1 / 2.4) - 0.055; + } + + // since now it is scaled, descale it back + output(x, y) = gamma_compressed_luminosity * max_channel_intensity; + } + } + + return output_image; +} + +void apply_gaussian_blur(gil::gray8_view_t input_view, gil::gray8_view_t output_view) +{ + constexpr static auto filter_height = 5ull; + constexpr static auto filter_width = 5ull; + constexpr static double filter[filter_height][filter_width] = + { + 2, 4, 6, 4, 2, + 4, 9, 12, 9, 4, + 5, 12, 15, 12, 5, + 4, 9, 12, 9, 4, + 2, 4, 5, 4, 2, + }; + constexpr double factor = 1.0 / 159; + constexpr double bias = 0.0; + + const auto height = input_view.height(); + const auto width = input_view.width(); + for (std::ptrdiff_t x = 0; x < width; ++x) + { + for (std::ptrdiff_t y = 0; y < height; ++y) + { + double intensity = 0.0; + for (std::ptrdiff_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (std::ptrdiff_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + int image_x = x - filter_width / 2 + filter_x; + int image_y = y - filter_height / 2 + filter_y; + if (image_x >= input_view.width() || image_x < 0 || + image_y >= input_view.height() || image_y < 0) + { + continue; + } + const auto& pixel = input_view(image_x, image_y); + intensity += pixel.at(std::integral_constant{}) + * filter[filter_y][filter_x]; + } + } + auto& pixel = output_view(gil::point_t(x, y)); + pixel = (std::min)((std::max)(int(factor * intensity + bias), 0), 255); + } + + } +} + +void calculate_gradients(gil::gray8_view_t input, + gil::gray16_view_t x_gradient, + gil::gray16_view_t y_gradient) +{ + using x_coord_t = gil::gray16_view_t::x_coord_t; + using y_coord_t = gil::gray16_view_t::y_coord_t; + using pixel_t = std::remove_reference::type; + using channel_t = typename std::remove_reference< + decltype( + std::declval().at( + std::integral_constant{} + ) + ) + >::type; + + constexpr double x_kernel[3][3] = + { + {1, 0, -1}, + {2, 0, -2}, + {1, 0, -1} + }; + constexpr double y_kernel[3][3] = + { + {1, 2, 1}, + {0, 0, 0}, + {-1, -2, -1} + }; + constexpr auto chosen_channel = std::integral_constant{}; + for (y_coord_t y = 1; y < input.height() - 1; ++y) + { + for (x_coord_t x = 1; x < input.width() - 1; ++x) + { + gil::gray16_pixel_t x_result; + boost::gil::static_transform(x_result, x_result, + [](channel_t) { return static_cast(0); }); + gil::gray16_pixel_t y_result; + boost::gil::static_transform(y_result, y_result, + [](channel_t) { return static_cast(0); }); + for (y_coord_t y_filter = 0; y_filter < 2; ++y_filter) + { + for (x_coord_t x_filter = 0; x_filter < 2; ++x_filter) + { + auto adjusted_y = y + y_filter - 1; + auto adjusted_x = x + x_filter - 1; + x_result.at(std::integral_constant{}) += + input(adjusted_x, adjusted_y).at(chosen_channel) + * x_kernel[y_filter][x_filter]; + y_result.at(std::integral_constant{}) += + input(adjusted_x, adjusted_y).at(chosen_channel) + * y_kernel[y_filter][x_filter]; + } + } + x_gradient(x, y) = static_cast(x_result.at(chosen_channel)); + y_gradient(x, y) = static_cast(y_result.at(chosen_channel)); + } + } +} + +std::vector suppress( + gil::gray32f_view_t harris_response, + double harris_response_threshold) +{ + std::vector corner_points; + for (gil::gray32f_view_t::coord_t y = 1; y < harris_response.height() - 1; ++y) + { + for (gil::gray32f_view_t::coord_t x = 1; x < harris_response.width() - 1; ++x) + { + auto value = [](gil::gray32f_pixel_t pixel) { + return pixel.at(std::integral_constant{}); + }; + double values[9] = { + value(harris_response(x - 1, y - 1)), + value(harris_response(x, y - 1)), + value(harris_response(x + 1, y - 1)), + value(harris_response(x - 1, y)), + value(harris_response(x, y)), + value(harris_response(x + 1, y)), + value(harris_response(x - 1, y + 1)), + value(harris_response(x, y + 1)), + value(harris_response(x + 1, y + 1)) + }; + + auto maxima = *std::max_element( + values, + values + 9, + [](double lhs, double rhs) + { + return lhs < rhs; + } + ); + + if (maxima == value(harris_response(x, y)) + && std::count(values, values + 9, maxima) == 1 + && maxima >= harris_response_threshold) + { + corner_points.emplace_back(x, y); + } + } + } + + return corner_points; +} + +int main(int argc, char* argv[]) { + if (argc != 5) + { + std::cout << "usage: " << argv[0] << " " + " \n"; + return -1; + } + + long int window_size = std::stoi(argv[2]); + long hessian_determinant_threshold = std::stol(argv[3]); + + gil::rgb8_image_t input_image; + + gil::read_image(argv[1], input_image, gil::png_tag{}); + + auto input_view = gil::view(input_image); + auto grayscaled = to_grayscale(input_view); + gil::gray8_image_t smoothed_image(grayscaled.dimensions()); + auto smoothed = gil::view(smoothed_image); + apply_gaussian_blur(gil::view(grayscaled), smoothed); + gil::gray16_image_t x_gradient_image(grayscaled.dimensions()); + gil::gray16_image_t y_gradient_image(grayscaled.dimensions()); + + auto x_gradient = gil::view(x_gradient_image); + auto y_gradient = gil::view(y_gradient_image); + calculate_gradients(smoothed, x_gradient, y_gradient); + + gil::gray32f_image_t m11(x_gradient.dimensions()); + gil::gray32f_image_t m12_21(x_gradient.dimensions()); + gil::gray32f_image_t m22(x_gradient.dimensions()); + gil::compute_hessian_entries( + x_gradient, + y_gradient, + gil::view(m11), + gil::view(m12_21), + gil::view(m22) + ); + + gil::gray32f_image_t hessian_response(x_gradient.dimensions()); + gil::gray32f_image_t gaussian_kernel(gil::point_t(5, 5)); + gil::generate_gaussian_kernel(gil::view(gaussian_kernel), 0.84089642); + gil::compute_hessian_responses( + gil::view(m11), + gil::view(m12_21), + gil::view(m22), + gil::view(gaussian_kernel), + gil::view(hessian_response) + ); + + auto corner_points = suppress(gil::view(hessian_response), hessian_determinant_threshold); + for (auto point: corner_points) { + input_view(point) = gil::rgb8_pixel_t(0, 0, 0); + input_view(point).at(std::integral_constant{}) = 255; + } + gil::write_view(argv[4], input_view, gil::png_tag{}); +} diff --git a/include/boost/gil/image_processing/hessian.hpp b/include/boost/gil/image_processing/hessian.hpp new file mode 100644 index 0000000000..5d36599f3d --- /dev/null +++ b/include/boost/gil/image_processing/hessian.hpp @@ -0,0 +1,76 @@ +#ifndef BOOST_GIL_IMAGE_PROCESSING_HESSIAN_HPP +#define BOOST_GIL_IMAGE_PROCESSING_HESSIAN_HPP + +#include +#include +#include + +namespace boost { namespace gil { + +/// \brief Computes Hessian response +/// +/// Computes Hessian response based on computed entries of Hessian matrix, e.g. second order +/// derivates in x and y, and derivatives in both x, y. +/// d stands for derivative, and x or y stand for derivative direction. For example, +/// ddxx means taking two derivatives (gradients) in horizontal direction. +/// Weights change perception of surroinding pixels. +/// Additional filtering is strongly advised. +template +inline void compute_hessian_responses( + GradientView ddxx, + GradientView dxdy, + GradientView ddyy, + Weights weights, + OutputView dst) +{ + if (ddxx.dimensions() != ddyy.dimensions() + || ddyy.dimensions() != dxdy.dimensions() + || dxdy.dimensions() != dst.dimensions() + || weights.width() != weights.height() + || weights.width() % 2 != 1) + { + throw std::invalid_argument("dimensions of views are not the same" + " or weights don't have equal width and height" + " or weights' dimensions are not odd"); + } + // Use pixel type of output, as values will be written to output + using pixel_t = typename std::remove_reference()(0, 0))>::type; + + using channel_t = typename std::remove_reference + < + decltype(std::declval().at(std::integral_constant{})) + >::type; + + auto half_size = weights.width() / 2; + for (auto y = half_size; y < dst.height() - half_size; ++y) + { + for (auto x = half_size; x < dst.width() - half_size; ++x) + { + auto ddxx_i = channel_t(); + auto ddyy_i = channel_t(); + auto dxdy_i = channel_t(); + constexpr std::integral_constant chosen_channel{}; + for (typename OutputView::coord_t w_y = 0; w_y < weights.height(); ++w_y) + { + for (typename OutputView::coord_t w_x = 0; w_x < weights.width(); ++w_x) + { + ddxx_i += ddxx(x + w_x - half_size, y + w_y - half_size) + .at(std::integral_constant{}) + * weights(w_x, w_y).at(std::integral_constant{}); + ddyy_i += ddyy(x + w_x - half_size, y + w_y - half_size) + .at(std::integral_constant{}) + * weights(w_x, w_y).at(std::integral_constant{}); + dxdy_i += dxdy(x + w_x - half_size, y + w_y - half_size) + .at(std::integral_constant{}) + * weights(w_x, w_y).at(std::integral_constant{}); + } + } + auto determinant = ddxx_i * ddyy_i - dxdy_i * dxdy_i; + dst(x, y).at(std::integral_constant{}) = determinant; + } + } +} + +}} // namespace boost::gil + +#endif diff --git a/include/boost/gil/image_processing/numeric.hpp b/include/boost/gil/image_processing/numeric.hpp index 912de8584f..d0ae0ac020 100644 --- a/include/boost/gil/image_processing/numeric.hpp +++ b/include/boost/gil/image_processing/numeric.hpp @@ -68,6 +68,84 @@ inline void compute_tensor_entries( } } +/// \brief Compute xy gradient, and second order x and y gradients +/// \ingroup ImageProcessingMath +/// +/// Hessian matrix is defined as a matrix of partial derivates +/// for 2d case, it is [[ddxx, dxdy], [dxdy, ddyy]. +/// d stands for derivative, and x or y stand for direction. +/// For example, dx stands for derivative (gradient) in horizontal +/// direction, and ddxx means second order derivative in horizon direction +/// https://en.wikipedia.org/wiki/Hessian_matrix +template +inline void compute_hessian_entries( + GradientView dx, + GradientView dy, + OutputView ddxx, + OutputView dxdy, + OutputView ddyy) +{ + using x_coord_t = typename OutputView::x_coord_t; + using y_coord_t = typename OutputView::y_coord_t; + using pixel_t = typename std::remove_reference::type; + using channel_t = typename std::remove_reference< + decltype( + std::declval().at( + std::integral_constant{} + ) + ) + >::type; + + constexpr double x_kernel[3][3] = + { + {1, 0, -1}, + {2, 0, -2}, + {1, 0, -1} + }; + constexpr double y_kernel[3][3] = + { + {1, 2, 1}, + {0, 0, 0}, + {-1, -2, -1} + }; + constexpr auto chosen_channel = std::integral_constant{}; + for (y_coord_t y = 1; y < ddxx.height() - 1; ++y) + { + for (x_coord_t x = 1; x < ddxx.width() - 1; ++x) + { + pixel_t ddxx_i; + static_transform(ddxx_i, ddxx_i, + [](channel_t) { return static_cast(0); }); + pixel_t dxdy_i; + static_transform(dxdy_i, dxdy_i, + [](channel_t) { return static_cast(0); }); + pixel_t ddyy_i; + static_transform(ddyy_i, ddyy_i, + [](channel_t) { return static_cast(0); }); + for (y_coord_t y_filter = 0; y_filter < 2; ++y_filter) + { + for (x_coord_t x_filter = 0; x_filter < 2; ++x_filter) + { + auto adjusted_y = y + y_filter - 1; + auto adjusted_x = x + x_filter - 1; + ddxx_i.at(std::integral_constant{}) += + dx(adjusted_x, adjusted_y).at(chosen_channel) + * x_kernel[y_filter][x_filter]; + dxdy_i.at(std::integral_constant{}) += + dx(adjusted_x, adjusted_y).at(chosen_channel) + * y_kernel[y_filter][x_filter]; + ddyy_i.at(std::integral_constant{}) += + dy(adjusted_x, adjusted_y).at(chosen_channel) + * y_kernel[y_filter][x_filter]; + } + } + ddxx(x, y) = ddxx_i; + dxdy(x, y) = dxdy_i; + ddyy(x, y) = ddyy_i; + } + } +} + /// \brief Generate mean kernel /// \ingroup ImageProcessingMath /// diff --git a/test/core/image_processing/CMakeLists.txt b/test/core/image_processing/CMakeLists.txt index 25eda96536..9e75ae4d2e 100644 --- a/test/core/image_processing/CMakeLists.txt +++ b/test/core/image_processing/CMakeLists.txt @@ -30,7 +30,8 @@ endforeach() foreach(_name lanczos_scaling simple_kernels - harris) + harris + hessian) set(_test t_core_image_processing_${_name}) set(_target test_core_image_processing_${_name}) diff --git a/test/core/image_processing/Jamfile b/test/core/image_processing/Jamfile index 19e2348c62..f1c32fc0ac 100644 --- a/test/core/image_processing/Jamfile +++ b/test/core/image_processing/Jamfile @@ -20,3 +20,4 @@ run threshold_otsu.cpp ; run lanczos_scaling.cpp ; run simple_kernels.cpp ; run harris.cpp ; +run hessian.cpp ; diff --git a/test/core/image_processing/hessian.cpp b/test/core/image_processing/hessian.cpp new file mode 100644 index 0000000000..dd96e409f8 --- /dev/null +++ b/test/core/image_processing/hessian.cpp @@ -0,0 +1,72 @@ +// +// Copyright 2019 Olzhas Zhumabek +// +// Use, modification and distribution are subject to the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +#include +#include +#include +#include +#include + +namespace gil = boost::gil; + +bool are_equal(gil::gray32f_view_t expected, gil::gray32f_view_t actual) { + if (expected.dimensions() != actual.dimensions()) + return false; + + for (long int y = 0; y < expected.height(); ++y) + { + for (long int x = 0; x < expected.width(); ++x) + { + if (expected(x, y) != actual(x, y)) + { + return false; + } + } + } + + return true; +} + +void test_blank_image() +{ + const gil::point_t dimensions(20, 20); + gil::gray16_image_t dx(dimensions, gil::gray16_pixel_t(0), 0); + gil::gray16_image_t dy(dimensions, gil::gray16_pixel_t(0), 0); + + gil::gray32f_image_t m11(dimensions); + gil::gray32f_image_t m12_21(dimensions); + gil::gray32f_image_t m22(dimensions); + gil::gray32f_image_t expected(dimensions, gil::gray32f_pixel_t(0), 0); + gil::compute_hessian_entries( + gil::view(dx), + gil::view(dy), + gil::view(m11), + gil::view(m12_21), + gil::view(m22) + ); + BOOST_TEST(are_equal(gil::view(expected), gil::view(m11))); + BOOST_TEST(are_equal(gil::view(expected), gil::view(m12_21))); + BOOST_TEST(are_equal(gil::view(expected), gil::view(m22))); + + gil::gray32f_image_t hessian_response(dimensions, gil::gray32f_pixel_t(0), 0); + gil::gray32f_image_t unnormalized_mean(gil::point_t(5, 5)); + gil::generate_unnormalized_mean(gil::view(unnormalized_mean)); + gil::compute_hessian_responses( + gil::view(m11), + gil::view(m12_21), + gil::view(m22), + gil::view(unnormalized_mean), + gil::view(hessian_response) + ); + BOOST_TEST(are_equal(gil::view(expected), gil::view(hessian_response))); +} + +int main(int argc, char* argv[]) +{ + test_blank_image(); + return boost::report_errors(); +}