Skip to content

Commit

Permalink
Merge pull request opencv#21530 from anna-khakimova:ak/simd_divrc
Browse files Browse the repository at this point in the history
* GAPI Fluid: SIMD for DivRC kernel.

* Fluid: Div kernel's SIMD refactoring

* SIMD for DivRC 3 channel case

* Applied comments
  • Loading branch information
Anna Khakimova authored and Sajjad Ali committed Mar 27, 2023
1 parent a093d8b commit 6caf3f0
Show file tree
Hide file tree
Showing 6 changed files with 732 additions and 256 deletions.
4 changes: 4 additions & 0 deletions modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,10 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance)

// FIXIT Unstable input data for divide
initMatsRandU(type, sz, dtype, false);
//This condition need as workaround the bug in the OpenCV.
//It reinitializes divider matrix without zero values for CV_16S DST type.
if (dtype == CV_16S || (type == CV_16S && dtype == -1))
cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));

// OpenCV code ///////////////////////////////////////////////////////////
cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);
Expand Down
4 changes: 2 additions & 2 deletions modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest,
INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest,
Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
Values(szSmall128, szVGA, sz720p, sz1080p),
Values(CV_8UC1, CV_8UC3, CV_32FC1),
Values(-1, CV_8U, CV_32F),
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
Values(1.0),
Values(cv::compile_args(CORE_FLUID))));

Expand Down
77 changes: 55 additions & 22 deletions modules/gapi/src/backends/fluid/gfluidcore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,8 +936,8 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
}

template<typename DST, typename SRC>
static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
float scale=1)
CV_ALWAYS_INLINE void run_arithm_rs(Buffer &dst, const View &src, const float scalar[],
Arithm arithm, float scale=1)
{
const auto *in = src.InLine<SRC>(0);
auto *out = dst.OutLine<DST>();
Expand All @@ -955,15 +955,23 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A
w = subrc_simd(scalar, in, out, length, chan);
#endif
for (; w < length; ++w)
{
out[w] = subr<DST>(in[w], scalar[w % chan]);
}
break;
}
// TODO: optimize division
case ARITHM_DIVIDE:
for (int w=0; w < width; w++)
for (int c=0; c < chan; c++)
out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale);
{
int w = 0;
#if CV_SIMD
w = divrc_simd(scalar, in, out, length, chan, scale);
#endif
for (; w < length; ++w)
{
out[w] = div<DST>(scalar[w % chan], in[w], scale);
}
break;
}
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
}
}
Expand Down Expand Up @@ -1319,7 +1327,9 @@ CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch,
#endif

for (; w < length; ++w)
{
out[w] = div<DST>(in[w], scalar[w % chan], scale);
}
}

GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
Expand Down Expand Up @@ -1402,32 +1412,55 @@ GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
}
};

GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, true)
{
static const int Window = 1;

static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/,
Buffer &dst)
static void run(const cv::Scalar& _scalar, const View& src, double _scale, int /*dtype*/,
Buffer& dst, Buffer& scratch)
{
const float scalar[4] = {
static_cast<float>(_scalar[0]),
static_cast<float>(_scalar[1]),
static_cast<float>(_scalar[2]),
static_cast<float>(_scalar[3])
};
GAPI_Assert(src.meta().chan <= 4);

if (dst.y() == 0)
{
const int chan = src.meta().chan;
float* _scratch = scratch.OutLine<float>();

scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
}

const float* scalar = scratch.OutLine<float>();
const float scale = static_cast<float>(_scale);

// DST SRC OP __VA_ARGS__
UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);

CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
}

static void initScratch(const GScalarDesc&, const GMatDesc&, double, int, Buffer& scratch)
{
initScratchBuffer(scratch);
}

static void resetScratch(Buffer& /*scratch*/)
{
}
};

//-------------------
Expand Down
27 changes: 27 additions & 0 deletions modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,33 @@ ABSDIFFC_SIMD(float)

#undef ABSDIFFC_SIMD

#define DIVRC_SIMD(SRC, DST) \
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
const int length, const int chan, const float scale) \
{ \
CV_CPU_DISPATCH(divrc_simd, (scalar, in, out, length, chan, scale), \
CV_CPU_DISPATCH_MODES_ALL); \
}

DIVRC_SIMD(uchar, uchar)
DIVRC_SIMD(ushort, uchar)
DIVRC_SIMD(short, uchar)
DIVRC_SIMD(float, uchar)
DIVRC_SIMD(short, short)
DIVRC_SIMD(ushort, short)
DIVRC_SIMD(uchar, short)
DIVRC_SIMD(float, short)
DIVRC_SIMD(ushort, ushort)
DIVRC_SIMD(uchar, ushort)
DIVRC_SIMD(short, ushort)
DIVRC_SIMD(float, ushort)
DIVRC_SIMD(uchar, float)
DIVRC_SIMD(ushort, float)
DIVRC_SIMD(short, float)
DIVRC_SIMD(float, float)

#undef DIVRC_SIMD

int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width)
{
Expand Down
23 changes: 23 additions & 0 deletions modules/gapi/src/backends/fluid/gfluidcore_func.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,29 @@ ABSDIFFC_SIMD(float)

#undef ABSDIFFC_SIMD

#define DIVRC_SIMD(SRC, DST) \
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
const int length, const int chan, const float scale);

DIVRC_SIMD(uchar, uchar)
DIVRC_SIMD(ushort, uchar)
DIVRC_SIMD(short, uchar)
DIVRC_SIMD(float, uchar)
DIVRC_SIMD(short, short)
DIVRC_SIMD(ushort, short)
DIVRC_SIMD(uchar, short)
DIVRC_SIMD(float, short)
DIVRC_SIMD(ushort, ushort)
DIVRC_SIMD(uchar, ushort)
DIVRC_SIMD(short, ushort)
DIVRC_SIMD(float, ushort)
DIVRC_SIMD(uchar, float)
DIVRC_SIMD(ushort, float)
DIVRC_SIMD(short, float)
DIVRC_SIMD(float, float)

#undef DIVRC_SIMD

int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width);

Expand Down
Loading

0 comments on commit 6caf3f0

Please sign in to comment.