Skip to content

Commit

Permalink
vapoursynth: refactor code.
Browse files Browse the repository at this point in the history
  • Loading branch information
chikuzen committed May 27, 2016
1 parent 9e1a8d5 commit d8d711b
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 199 deletions.
156 changes: 78 additions & 78 deletions vapoursynth/readme.md
Original file line number Diff line number Diff line change
@@ -1,78 +1,78 @@
#yadifmod2
## Yet Another Deinterlacing Filter mod for VapourSynth

yadifmod2 = yadif + yadifmod

### Info:

version 0.0.0

### Requirement:
- VapourSynth r30 or later.

### Syntax:

ym2.yadifmod2(clip clip[, int order, int field, int mode, clip edeint, int opt])

#### clip -

Constant format only.
All formats except half precision are supported.

#### order -

Set the field order.

0 = bff
1(default) = tff

#### field -

Controls which field to keep when using same rate output.

-1(default) = set eqal to order
0 = keep bottom field
1 = keep top field

This parameter doesn't do anything when using double rate output.

#### mode -

Controls double rate vs same rate output, and whether or not the spatial interlacing check is performed.

0(default) = same rate, do spatial check
1 = double rate, do spatial check
2 = same rate, no spatial check
3 = double rate, no spatial check

#### edeint -

Clip from which to take spatial predictions.

If this is not set, yadifmod2 will generate spatial predictions itself as same as yadif.

This clip must be the same width, height, and format as the input clip.
If using same rate output, this clip should have the same number of frames as the input.
If using double rate output, this clip should have twice as many frames as the input.

#### opt -

Controls which cpu optimizations are used.

0 = Use C++ routine.
1 = Use SSE2 + SSE routine if possible. When SSE2 can't be used, fallback to 0.
2 = Use SSSE3 + SSE2 + SSE routine if possible. When SSSE3 can't be used, fallback to 1.
3 = Use SSE4.1 + SSSE3 + SSE2 + SSE routine if possible. When SSE4.1 can't be used, fallback to 2.
4 = Use SSE4.1 + SSSE3 + SSE2 + AVX routine if possible. When AVX can't be used, fallback to 3.
others(default) = Use AVX2 + AVX routine if possible. When AVX2 can't be used, fallback to 4.

### Changelog:

0.0.0(20160515)
initial release


###Source code:

https://github.com/chikuzen/yadifmod2/

#yadifmod2
## Yet Another Deinterlacing Filter mod for VapourSynth

yadifmod2 = yadif + yadifmod

### Info:

version 0.0.0

### Requirement:
- VapourSynth r30 or later.

### Syntax:

ym2.yadifmod2(clip clip[, int order, int field, int mode, clip edeint, int opt])

#### clip -

Constant format only.
All formats except half precision are supported.

#### order -

Set the field order.

0 = bff
1 = tff(default)

#### field -

Controls which field to keep when using same rate output.

-1 = set eqal to order(default)
0 = keep bottom field
1 = keep top field

This parameter doesn't do anything when using double rate output.

#### mode -

Controls double rate vs same rate output, and whether or not the spatial interlacing check is performed.

0 = same rate, do spatial check(default)
1 = double rate, do spatial check
2 = same rate, no spatial check
3 = double rate, no spatial check

#### edeint -

Clip from which to take spatial predictions.

If this is not set, yadifmod2 will generate spatial predictions itself as same as yadif.

This clip must be the same width, height, and format as the input clip.
If using same rate output, this clip should have the same number of frames as the input.
If using double rate output, this clip should have twice as many frames as the input.

#### opt -

Controls which cpu optimizations are used.

0 = Use C++ routine.
1 = Use SSE2 + SSE routine if possible. When SSE2 can't be used, fallback to 0.
2 = Use SSSE3 + SSE2 + SSE routine if possible. When SSSE3 can't be used, fallback to 1.
3 = Use SSE4.1 + SSSE3 + SSE2 + SSE routine if possible. When SSE4.1 can't be used, fallback to 2.
4 = Use SSE4.1 + SSSE3 + SSE2 + AVX routine if possible. When AVX can't be used, fallback to 3.
others = Use AVX2 + AVX routine if possible. When AVX2 can't be used, fallback to 4.(default)

### Changelog:

0.0.0(20160515)
initial release


###Source code:

https://github.com/chikuzen/yadifmod2/

5 changes: 3 additions & 2 deletions vapoursynth/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ VapourSynthPluginInit(VSConfigPlugin conf, VSRegisterFunction reg, VSPlugin* p)
VAPOURSYNTH_API_VERSION, 1, p);
reg("yadifmod2",
"clip:clip;order:int:opt;field:int:opt;mode:int:opt;edeint:clip:opt;"
"opt:int:opt",
"opt:int:opt;",
create_filter, nullptr, p);
}
}

131 changes: 127 additions & 4 deletions vapoursynth/src/proc_filter.h → vapoursynth/src/proc_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,49 +26,58 @@
*/


#ifndef YADIFMOD2_PROC_FILTER_H
#define YADIFMOD2_PROC_FILTER_H

#include <algorithm>
#include <map>
#include <tuple>
#include "arch.h"
#include "yadifmod2.h"



static F_INLINE int average(const int x, const int y) noexcept
{
return (x + y + 1) / 2;
}


F_INLINE float average(const float x, const float y) noexcept
{
return (x + y) * 0.5f;
}


static F_INLINE int average2(const int x, const int y) noexcept
{
return (x + y) / 2;
}


static F_INLINE float average2(const float x, const float y) noexcept
{
return (x + y) * 0.5f;
}


static F_INLINE int absdiff(const int x, const int y) noexcept
{
return x > y ? x - y : y - x;
}


static F_INLINE float absdiff(const float x, const float y) noexcept
{
return x > y ? x - y : y - x;
}


template <typename T0, typename T1>
static F_INLINE T0 clamp(const T1 val, const T1 minimum, const T1 maximum) noexcept
{
return static_cast<T0>(std::min(std::max(val, minimum), maximum));
}


template <typename T>
static void
interpolate(uint8_t* dstp, const uint8_t* srcp, int stride, size_t width) noexcept
Expand All @@ -81,13 +90,15 @@ interpolate(uint8_t* dstp, const uint8_t* srcp, int stride, size_t width) noexce
}
}


template <typename T0, typename T1>
static F_INLINE T1 calc_score(const T0* ct, const T0* cb, int n) noexcept
{
return absdiff(ct[-1 + n], cb[-1 - n]) + absdiff(ct[n], cb[-n])
+ absdiff(ct[1 + n], cb[1 - n]);
}


template <typename T0, typename T1>
static inline T1 calc_spatial_pred(const T0* ct, const T0* cb) noexcept
{
Expand Down Expand Up @@ -198,6 +209,7 @@ proc_cpp(const uint8_t* currp, const uint8_t* prevp, const uint8_t* nextp,
}
}


#if defined(__SSE2__)

#include "simd.h"
Expand Down Expand Up @@ -249,7 +261,6 @@ static F_INLINE V calc_spatial_pred(const uint8_t* ct, const uint8_t* cb)




template <typename V, typename T, size_t STEP, arch_t ARCH, bool SP_CHECK, bool HAS_EDEINT>
static void
proc_simd(const uint8_t* currp, const uint8_t* prevp, const uint8_t* nextp,
Expand Down Expand Up @@ -337,4 +348,116 @@ proc_simd(const uint8_t* currp, const uint8_t* prevp, const uint8_t* nextp,
}
#endif // __SSE2__

#endif //YADIFMOD2_PROC_FILTER_H

proc_filter_t
get_main_proc(int bps, bool spcheck, bool edeint, arch_t arch)
{
using std::make_tuple;

std::map<std::tuple<int, int, bool, arch_t>, proc_filter_t> table;

table[make_tuple( 8, true, true, NO_SIMD)] = proc_cpp<uint8_t, int, true, true>;
table[make_tuple( 8, true, false, NO_SIMD)] = proc_cpp<uint8_t, int, true, false>;
table[make_tuple( 8, false, true, NO_SIMD)] = proc_cpp<uint8_t, int, false, true>;
table[make_tuple( 8, false, false, NO_SIMD)] = proc_cpp<uint8_t, int, false, false>;

table[make_tuple(16, true, true, NO_SIMD)] = proc_cpp<uint16_t, int, true, true>;
table[make_tuple(16, true, false, NO_SIMD)] = proc_cpp<uint16_t, int, true, false>;
table[make_tuple(16, false, true, NO_SIMD)] = proc_cpp<uint16_t, int, false, true>;
table[make_tuple(16, false, false, NO_SIMD)] = proc_cpp<uint16_t, int, false, false>;

table[make_tuple(32, true, true, NO_SIMD)] = proc_cpp<float, float, true, true>;
table[make_tuple(32, true, false, NO_SIMD)] = proc_cpp<float, float, true, false>;
table[make_tuple(32, false, true, NO_SIMD)] = proc_cpp<float, float, false, true>;
table[make_tuple(32, false, false, NO_SIMD)] = proc_cpp<float, float, false, false>;
#if defined(__SSE2__)
table[make_tuple(8, true, true, USE_SSE2)] = proc_simd<__m128i, uint8_t, 8, USE_SSE2, true, true>;
table[make_tuple(8, true, false, USE_SSE2)] = proc_simd<__m128i, uint8_t, 8, USE_SSE2, true, false>;
table[make_tuple(8, false, true, USE_SSE2)] = proc_simd<__m128i, uint8_t, 8, USE_SSE2, false, true>;
table[make_tuple(8, false, false, USE_SSE2)] = proc_simd<__m128i, uint8_t, 8, USE_SSE2, false, true>;

table[make_tuple(10, true, true, USE_SSE2)] = proc_simd<__m128i, int16_t, 16, USE_SSE2, true, true>;
table[make_tuple(10, true, false, USE_SSE2)] = proc_simd<__m128i, int16_t, 16, USE_SSE2, true, false>;
table[make_tuple(10, false, true, USE_SSE2)] = proc_simd<__m128i, int16_t, 16, USE_SSE2, false, true>;
table[make_tuple(10, false, false, USE_SSE2)] = proc_simd<__m128i, int16_t, 16, USE_SSE2, false, true>;

table[make_tuple(16, true, true, USE_SSE2)] = proc_simd<__m128i, uint16_t, 8, USE_SSE2, true, true>;
table[make_tuple(16, true, false, USE_SSE2)] = proc_simd<__m128i, uint16_t, 8, USE_SSE2, true, false>;
table[make_tuple(16, false, true, USE_SSE2)] = proc_simd<__m128i, uint16_t, 8, USE_SSE2, false, true>;
table[make_tuple(16, false, false, USE_SSE2)] = proc_simd<__m128i, uint16_t, 8, USE_SSE2, false, true>;

table[make_tuple(32, true, true, USE_SSE2)] = proc_simd<__m128, float, 16, USE_SSE2, true, true>;
table[make_tuple(32, true, false, USE_SSE2)] = proc_simd<__m128, float, 16, USE_SSE2, true, false>;
table[make_tuple(32, false, true, USE_SSE2)] = proc_simd<__m128, float, 16, USE_SSE2, false, true>;
table[make_tuple(32, false, false, USE_SSE2)] = proc_simd<__m128, float, 16, USE_SSE2, false, true>;
#if defined(__SSSE3__)
table[make_tuple(8, true, true, USE_SSSE3)] = proc_simd<__m128i, uint8_t, 8, USE_SSSE3, true, true>;
table[make_tuple(8, true, false, USE_SSSE3)] = proc_simd<__m128i, uint8_t, 8, USE_SSSE3, true, false>;
table[make_tuple(8, false, true, USE_SSSE3)] = proc_simd<__m128i, uint8_t, 8, USE_SSSE3, false, true>;
table[make_tuple(8, false, false, USE_SSSE3)] = proc_simd<__m128i, uint8_t, 8, USE_SSSE3, false, true>;

table[make_tuple(10, true, true, USE_SSSE3)] = proc_simd<__m128i, int16_t, 16, USE_SSSE3, true, true>;
table[make_tuple(10, true, false, USE_SSSE3)] = proc_simd<__m128i, int16_t, 16, USE_SSSE3, true, false>;
table[make_tuple(10, false, true, USE_SSSE3)] = proc_simd<__m128i, int16_t, 16, USE_SSSE3, false, true>;
table[make_tuple(10, false, false, USE_SSSE3)] = proc_simd<__m128i, int16_t, 16, USE_SSSE3, false, true>;

table[make_tuple(16, true, true, USE_SSSE3)] = proc_simd<__m128i, uint16_t, 8, USE_SSSE3, true, true>;
table[make_tuple(16, true, false, USE_SSSE3)] = proc_simd<__m128i, uint16_t, 8, USE_SSSE3, true, false>;
table[make_tuple(16, false, true, USE_SSSE3)] = proc_simd<__m128i, uint16_t, 8, USE_SSSE3, false, true>;
table[make_tuple(16, false, false, USE_SSSE3)] = proc_simd<__m128i, uint16_t, 8, USE_SSSE3, false, true>;
#if defined(__SSE4_1__)
table[make_tuple(8, true, true, USE_SSE41)] = proc_simd<__m128i, uint8_t, 8, USE_SSE41, true, true>;
table[make_tuple(8, true, false, USE_SSE41)] = proc_simd<__m128i, uint8_t, 8, USE_SSE41, true, false>;
table[make_tuple(8, false, true, USE_SSE41)] = proc_simd<__m128i, uint8_t, 8, USE_SSE41, false, true>;
table[make_tuple(8, false, false, USE_SSE41)] = proc_simd<__m128i, uint8_t, 8, USE_SSE41, false, true>;

table[make_tuple(10, true, true, USE_SSE41)] = proc_simd<__m128i, int16_t, 16, USE_SSE41, true, true>;
table[make_tuple(10, true, false, USE_SSE41)] = proc_simd<__m128i, int16_t, 16, USE_SSE41, true, false>;
table[make_tuple(10, false, true, USE_SSE41)] = proc_simd<__m128i, int16_t, 16, USE_SSE41, false, true>;
table[make_tuple(10, false, false, USE_SSE41)] = proc_simd<__m128i, int16_t, 16, USE_SSE41, false, true>;

table[make_tuple(16, true, true, USE_SSE41)] = proc_simd<__m128i, uint16_t, 8, USE_SSE41, true, true>;
table[make_tuple(16, true, false, USE_SSE41)] = proc_simd<__m128i, uint16_t, 8, USE_SSE41, true, false>;
table[make_tuple(16, false, true, USE_SSE41)] = proc_simd<__m128i, uint16_t, 8, USE_SSE41, false, true>;
table[make_tuple(16, false, false, USE_SSE41)] = proc_simd<__m128i, uint16_t, 8, USE_SSE41, false, true>;
#if defined(__AVX__)
table[make_tuple(32, true, true, USE_AVX)] = proc_simd<__m256, float, 32, USE_AVX, true, true>;
table[make_tuple(32, true, false, USE_AVX)] = proc_simd<__m256, float, 32, USE_AVX, true, false>;
table[make_tuple(32, false, true, USE_AVX)] = proc_simd<__m256, float, 32, USE_AVX, false, true>;
table[make_tuple(32, false, false, USE_AVX)] = proc_simd<__m256, float, 32, USE_AVX, false, true>;
#if defined(__AVX2__)
table[make_tuple(8, true, true, USE_AVX2)] = proc_simd<__m256i, uint8_t, 16, USE_AVX2, true, true>;
table[make_tuple(8, true, false, USE_AVX2)] = proc_simd<__m256i, uint8_t, 16, USE_AVX2, true, false>;
table[make_tuple(8, false, true, USE_AVX2)] = proc_simd<__m256i, uint8_t, 16, USE_AVX2, false, true>;
table[make_tuple(8, false, false, USE_AVX2)] = proc_simd<__m256i, uint8_t, 16, USE_AVX2, false, true>;

table[make_tuple(10, true, true, USE_AVX2)] = proc_simd<__m256i, int16_t, 32, USE_AVX2, true, true>;
table[make_tuple(10, true, false, USE_AVX2)] = proc_simd<__m256i, int16_t, 32, USE_AVX2, true, false>;
table[make_tuple(10, false, true, USE_AVX2)] = proc_simd<__m256i, int16_t, 32, USE_AVX2, false, true>;
table[make_tuple(10, false, false, USE_AVX2)] = proc_simd<__m256i, int16_t, 32, USE_AVX2, false, true>;

table[make_tuple(16, true, true, USE_AVX2)] = proc_simd<__m256i, uint16_t, 16, USE_AVX2, true, true>;
table[make_tuple(16, true, false, USE_AVX2)] = proc_simd<__m256i, uint16_t, 16, USE_AVX2, true, false>;
table[make_tuple(16, false, true, USE_AVX2)] = proc_simd<__m256i, uint16_t, 16, USE_AVX2, false, true>;
table[make_tuple(16, false, false, USE_AVX2)] = proc_simd<__m256i, uint16_t, 16, USE_AVX2, false, true>;
#endif // __AVX2__
#endif // __AVX__
#endif // __SSE4_1__
#endif // __SSSE3__
#endif // __SSE2__

return table[make_tuple(bps, spcheck, edeint, arch)];
}



interpolate_t get_interp(int bps)
{
if (bps == 1) {
return interpolate<uint8_t>;
}
if (bps == 2) {
return interpolate<uint16_t>;
}
return interpolate<float>;
}
Loading

0 comments on commit d8d711b

Please sign in to comment.