Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
crypto/cipher: add VSX implementation of xorBytes for ppc64x
This change adds asm implementations of xorBytes for ppc64x that takes advantage of VSX registers and instructions. name old time/op new time/op delta XORBytes/8Bytes-8 16.4ns ± 0% 11.1ns ± 0% -32.32% (p=0.000 n=5+4) XORBytes/128Bytes-8 45.6ns ± 0% 16.2ns ± 0% -64.50% (p=0.008 n=5+5) XORBytes/2048Bytes-8 433ns ±13% 129ns ± 1% -70.29% (p=0.000 n=5+4) XORBytes/32768Bytes-8 7.16µs ± 0% 1.83µs ± 0% -74.39% (p=0.008 n=5+5) name old speed new speed delta XORBytes/8Bytes-8 488MB/s ± 0% 721MB/s ± 0% +47.75% (p=0.016 n=5+4) XORBytes/128Bytes-8 2.80GB/s ± 0% 7.89GB/s ± 0% +181.33% (p=0.008 n=5+5) XORBytes/2048Bytes-8 4.77GB/s ±13% 15.87GB/s ± 0% +232.68% (p=0.016 n=5+4) XORBytes/32768Bytes-8 4.58GB/s ± 0% 17.88GB/s ± 0% +290.47% (p=0.008 n=5+5) Change-Id: Ic27d9b858f8ec2d597fdabc68a288d6844eba701 Reviewed-on: https://go-review.googlesource.com/c/145997 Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
- Loading branch information
Showing
3 changed files
with
96 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Copyright 2018 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
// +build ppc64 ppc64le | ||
|
||
package cipher | ||
|
||
// xorBytes xors the bytes in a and b. The destination should have enough | ||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. | ||
func xorBytes(dst, a, b []byte) int { | ||
n := len(a) | ||
if len(b) < n { | ||
n = len(b) | ||
} | ||
if n == 0 { | ||
return 0 | ||
} | ||
_ = dst[n-1] | ||
xorBytesVSX(&dst[0], &a[0], &b[0], n) | ||
return n | ||
} | ||
|
||
func xorWords(dst, a, b []byte) { | ||
xorBytes(dst, a, b) | ||
} | ||
|
||
//go:noescape | ||
func xorBytesVSX(dst, a, b *byte, n int) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright 2018 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
// +build ppc64 ppc64le | ||
|
||
#include "textflag.h" | ||
|
||
// func xorBytesVSX(dst, a, b *byte, n int) | ||
TEXT ·xorBytesVSX(SB), NOSPLIT, $0 | ||
MOVD dst+0(FP), R3 // R3 = dst | ||
MOVD a+8(FP), R4 // R4 = a | ||
MOVD b+16(FP), R5 // R5 = b | ||
MOVD n+24(FP), R6 // R6 = n | ||
|
||
CMPU R6, $16, CR7 // Check if n ≥ 16 bytes | ||
MOVD R0, R8 // R8 = index | ||
CMPU R6, $8, CR6 // Check if 8 ≤ n < 16 bytes | ||
BGE CR7, preloop16 | ||
BLT CR6, small | ||
|
||
// Case for 8 ≤ n < 16 bytes | ||
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7] | ||
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7] | ||
XOR R14, R15, R16 // R16 = a[] ^ b[] | ||
SUB $8, R6 // n = n - 8 | ||
MOVD R16, (R3)(R8) // Store to dst | ||
ADD $8, R8 | ||
|
||
// Check if we're finished | ||
CMP R6, R0 | ||
BGT small | ||
JMP done | ||
|
||
// Case for n ≥ 16 bytes | ||
preloop16: | ||
SRD $4, R6, R7 // Setup loop counter | ||
MOVD R7, CTR | ||
ANDCC $15, R6, R9 // Check for tailing bytes for later | ||
loop16: | ||
LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15] | ||
LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15] | ||
XXLXOR VS32, VS33, VS34 // VS34 = a[] ^ b[] | ||
STXVD2X VS34, (R3)(R8) // Store to dst | ||
ADD $16, R8 // Update index | ||
BC 16, 0, loop16 // bdnz loop16 | ||
|
||
BEQ CR0, done | ||
SLD $4, R7 | ||
SUB R7, R6 // R6 = n - (R7 * 16) | ||
|
||
// Case for n < 8 bytes and tailing bytes from the | ||
// previous cases. | ||
small: | ||
MOVD R6, CTR // Setup loop counter | ||
|
||
loop: | ||
MOVBZ (R4)(R8), R14 // R14 = a[i] | ||
MOVBZ (R5)(R8), R15 // R15 = b[i] | ||
XOR R14, R15, R16 // R16 = a[i] ^ b[i] | ||
MOVB R16, (R3)(R8) // Store to dst | ||
ADD $1, R8 | ||
BC 16, 0, loop // bdnz loop | ||
|
||
done: | ||
RET |