Skip to content

Commit

Permalink
crypto/cipher: add VSX implementation of xorBytes for ppc64x
Browse files Browse the repository at this point in the history
This change adds asm implementations of xorBytes for ppc64x that
takes advantage of VSX registers and instructions.

name                   old time/op    new time/op     delta
XORBytes/8Bytes-8        16.4ns ± 0%     11.1ns ± 0%   -32.32%  (p=0.000 n=5+4)
XORBytes/128Bytes-8      45.6ns ± 0%     16.2ns ± 0%   -64.50%  (p=0.008 n=5+5)
XORBytes/2048Bytes-8      433ns ±13%      129ns ± 1%   -70.29%  (p=0.000 n=5+4)
XORBytes/32768Bytes-8    7.16µs ± 0%     1.83µs ± 0%   -74.39%  (p=0.008 n=5+5)

name                   old speed      new speed       delta
XORBytes/8Bytes-8       488MB/s ± 0%    721MB/s ± 0%   +47.75%  (p=0.016 n=5+4)
XORBytes/128Bytes-8    2.80GB/s ± 0%   7.89GB/s ± 0%  +181.33%  (p=0.008 n=5+5)
XORBytes/2048Bytes-8   4.77GB/s ±13%  15.87GB/s ± 0%  +232.68%  (p=0.016 n=5+4)
XORBytes/32768Bytes-8  4.58GB/s ± 0%  17.88GB/s ± 0%  +290.47%  (p=0.008 n=5+5)

Change-Id: Ic27d9b858f8ec2d597fdabc68a288d6844eba701
Reviewed-on: https://go-review.googlesource.com/c/145997
Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
  • Loading branch information
ceseo committed Nov 1, 2018
1 parent f5b6950 commit 0ff6e5f
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/crypto/cipher/xor_generic.go
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build !amd64
// +build !amd64,!ppc64,!ppc64le

package cipher

Expand Down
29 changes: 29 additions & 0 deletions src/crypto/cipher/xor_ppc64x.go
@@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ppc64 ppc64le

package cipher

// xorBytes xors the bytes in a and b. The destination should have enough
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
func xorBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
if n == 0 {
return 0
}
_ = dst[n-1]
xorBytesVSX(&dst[0], &a[0], &b[0], n)
return n
}

func xorWords(dst, a, b []byte) {
xorBytes(dst, a, b)
}

//go:noescape
func xorBytesVSX(dst, a, b *byte, n int)
66 changes: 66 additions & 0 deletions src/crypto/cipher/xor_ppc64x.s
@@ -0,0 +1,66 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ppc64 ppc64le

#include "textflag.h"

// func xorBytesVSX(dst, a, b *byte, n int)
TEXT ·xorBytesVSX(SB), NOSPLIT, $0
MOVD dst+0(FP), R3 // R3 = dst
MOVD a+8(FP), R4 // R4 = a
MOVD b+16(FP), R5 // R5 = b
MOVD n+24(FP), R6 // R6 = n

CMPU R6, $16, CR7 // Check if n ≥ 16 bytes
MOVD R0, R8 // R8 = index
CMPU R6, $8, CR6 // Check if 8 ≤ n < 16 bytes
BGE CR7, preloop16
BLT CR6, small

// Case for 8 ≤ n < 16 bytes
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
XOR R14, R15, R16 // R16 = a[] ^ b[]
SUB $8, R6 // n = n - 8
MOVD R16, (R3)(R8) // Store to dst
ADD $8, R8

// Check if we're finished
CMP R6, R0
BGT small
JMP done

// Case for n ≥ 16 bytes
preloop16:
SRD $4, R6, R7 // Setup loop counter
MOVD R7, CTR
ANDCC $15, R6, R9 // Check for tailing bytes for later
loop16:
LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15]
LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15]
XXLXOR VS32, VS33, VS34 // VS34 = a[] ^ b[]
STXVD2X VS34, (R3)(R8) // Store to dst
ADD $16, R8 // Update index
BC 16, 0, loop16 // bdnz loop16

BEQ CR0, done
SLD $4, R7
SUB R7, R6 // R6 = n - (R7 * 16)

// Case for n < 8 bytes and tailing bytes from the
// previous cases.
small:
MOVD R6, CTR // Setup loop counter

loop:
MOVBZ (R4)(R8), R14 // R14 = a[i]
MOVBZ (R5)(R8), R15 // R15 = b[i]
XOR R14, R15, R16 // R16 = a[i] ^ b[i]
MOVB R16, (R3)(R8) // Store to dst
ADD $1, R8
BC 16, 0, loop // bdnz loop

done:
RET

0 comments on commit 0ff6e5f

Please sign in to comment.