Skip to content

Commit

Permalink
arm/aarch64: vp9lpf: Calculate !hev directly
Browse files Browse the repository at this point in the history
Previously we first calculated hev, and then negated it.

Since we were able to schedule the negation in the middle
of another calculation, we don't see any gain in all cases.

Before:                     Cortex A7      A8      A9     A53  A53/AArch64
vp9_loop_filter_v_4_8_neon:     147.0   129.0   115.8    89.0         88.7
vp9_loop_filter_v_8_8_neon:     242.0   198.5   174.7   140.0        136.7
vp9_loop_filter_v_16_8_neon:    500.0   419.5   382.7   293.0        275.7
vp9_loop_filter_v_16_16_neon:   971.2   825.5   731.5   579.0        453.0
After:
vp9_loop_filter_v_4_8_neon:     143.0   127.7   114.8    88.0         87.7
vp9_loop_filter_v_8_8_neon:     241.0   197.2   173.7   140.0        136.7
vp9_loop_filter_v_16_8_neon:    497.0   419.5   379.7   293.0        275.7
vp9_loop_filter_v_16_16_neon:   965.2   818.7   731.4   579.0        452.0

This is cherrypicked from libav commit
e1f9de8.

Signed-off-by: Martin Storsjö <martin@martin.st>
  • Loading branch information
mstorsjo committed Mar 11, 2017
1 parent 148cc0b commit f0ecbb1
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 6 deletions.
5 changes: 2 additions & 3 deletions libavcodec/aarch64/vp9lpf_neon.S
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@
.if \mix != 0
sxtl v1.8h, v1.8b
.endif
cmhi v5\sz, v5\sz, v3\sz // hev
cmhs v5\sz, v3\sz, v5\sz // !hev
.if \wd == 8
// If a 4/8 or 8/4 mix is used, clear the relevant half of v6
.if \mix != 0
Expand All @@ -306,11 +306,10 @@
.elseif \wd == 8
bic v4\sz, v4\sz, v6\sz // fm && !flat8in
.endif
mvn v5\sz, v5\sz // !hev
and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
.if \wd == 16
and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm
.endif
and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in

mul_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0)
bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0
Expand Down
5 changes: 2 additions & 3 deletions libavcodec/arm/vp9lpf_neon.S
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
.if \wd == 8
vcle.u8 d6, d6, d0 @ flat8in
.endif
vcgt.u8 d5, d5, d3 @ hev
vcle.u8 d5, d5, d3 @ !hev
.if \wd == 8
vand d6, d6, d4 @ flat8in && fm
.endif
Expand All @@ -151,11 +151,10 @@
.elseif \wd == 8
vbic d4, d4, d6 @ fm && !flat8in
.endif
vmvn d5, d5 @ !hev
vand d5, d5, d4 @ !hev && fm && !flat8in
.if \wd == 16
vand d7, d7, d6 @ flat8out && flat8in && fm
.endif
vand d5, d5, d4 @ !hev && fm && !flat8in

vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0)
vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0
Expand Down

0 comments on commit f0ecbb1

Please sign in to comment.