Skip to content

Commit

Permalink
bytes: move IndexByte assembly to pkg runtime
Browse files Browse the repository at this point in the history
Per suggestion from Russ in February. Then strings.IndexByte
can be implemented in terms of the shared code in pkg runtime.

Update #3751

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/12289043
  • Loading branch information
bradfitz committed Aug 1, 2013
1 parent 39679ca commit e2a1bd6
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 179 deletions.
17 changes: 0 additions & 17 deletions src/pkg/bytes/asm_386.s

This file was deleted.

91 changes: 0 additions & 91 deletions src/pkg/bytes/asm_amd64.s

This file was deleted.

56 changes: 0 additions & 56 deletions src/pkg/bytes/asm_arm.s

This file was deleted.

5 changes: 5 additions & 0 deletions src/pkg/bytes/bytes.s
@@ -0,0 +1,5 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// This file is here just to make the go tool happy.
4 changes: 2 additions & 2 deletions src/pkg/bytes/bytes_decl.go
Expand Up @@ -7,13 +7,13 @@ package bytes
//go:noescape

// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
func IndexByte(s []byte, c byte) int // asm_$GOARCH.s
func IndexByte(s []byte, c byte) int // ../runtime/asm_$GOARCH.s

//go:noescape

// Equal returns a boolean reporting whether a == b.
// A nil argument is equivalent to an empty slice.
func Equal(a, b []byte) bool // asm_arm.s or ../runtime/asm_{386,amd64}.s
func Equal(a, b []byte) bool // ../runtime/asm_$GOARCH.s

//go:noescape

Expand Down
14 changes: 14 additions & 0 deletions src/pkg/runtime/asm_386.s
Expand Up @@ -1117,6 +1117,20 @@ TEXT bytes·Compare(SB),7,$0-28
MOVL AX, res+24(FP)
RET

TEXT bytes·IndexByte(SB),7,$0
MOVL s+0(FP), SI
MOVL s_len+4(FP), CX
MOVB c+12(FP), AL
MOVL SI, DI
CLD; REPN; SCASB
JZ 3(PC)
MOVL $-1, ret+16(FP)
RET
SUBL SI, DI
SUBL $1, DI
MOVL DI, ret+16(FP)
RET

// input:
// SI = a
// DI = b
Expand Down
114 changes: 101 additions & 13 deletions src/pkg/runtime/asm_amd64.s
Expand Up @@ -908,19 +908,6 @@ TEXT runtime·memeq(SB),7,$0-24
MOVQ count+16(FP), BX
JMP runtime·memeqbody(SB)

TEXT bytes·Equal(SB),7,$0-49
MOVQ a_len+8(FP), BX
MOVQ b_len+32(FP), CX
XORQ AX, AX
CMPQ BX, CX
JNE eqret
MOVQ a+0(FP), SI
MOVQ b+24(FP), DI
CALL runtime·memeqbody(SB)
eqret:
MOVB AX, ret+48(FP)
RET

// a in SI
// b in DI
// count in BX
Expand Down Expand Up @@ -1142,3 +1129,104 @@ cmp_allsame:
SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
RET

TEXT bytes·IndexByte(SB),7,$0
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), BX
MOVB c+24(FP), AL
MOVQ SI, DI

CMPQ BX, $16
JLT indexbyte_small

// round up to first 16-byte boundary
TESTQ $15, SI
JZ aligned
MOVQ SI, CX
ANDQ $~15, CX
ADDQ $16, CX

// search the beginning
SUBQ SI, CX
REPN; SCASB
JZ success

// DI is 16-byte aligned; get ready to search using SSE instructions
aligned:
// round down to last 16-byte boundary
MOVQ BX, R11
ADDQ SI, R11
ANDQ $~15, R11

// shuffle X0 around so that each byte contains c
MOVD AX, X0
PUNPCKLBW X0, X0
PUNPCKLBW X0, X0
PSHUFL $0, X0, X0
JMP condition

sse:
// move the next 16-byte chunk of the buffer into X1
MOVO (DI), X1
// compare bytes in X0 to X1
PCMPEQB X0, X1
// take the top bit of each byte in X1 and put the result in DX
PMOVMSKB X1, DX
TESTL DX, DX
JNZ ssesuccess
ADDQ $16, DI

condition:
CMPQ DI, R11
JLT sse

// search the end
MOVQ SI, CX
ADDQ BX, CX
SUBQ R11, CX
// if CX == 0, the zero flag will be set and we'll end up
// returning a false success
JZ failure
REPN; SCASB
JZ success

failure:
MOVQ $-1, ret+32(FP)
RET

// handle for lengths < 16
indexbyte_small:
MOVQ BX, CX
REPN; SCASB
JZ success
MOVQ $-1, ret+32(FP)
RET

// we've found the chunk containing the byte
// now just figure out which specific byte it is
ssesuccess:
// get the index of the least significant set bit
BSFW DX, DX
SUBQ SI, DI
ADDQ DI, DX
MOVQ DX, ret+32(FP)
RET

success:
SUBQ SI, DI
SUBL $1, DI
MOVQ DI, ret+32(FP)
RET

TEXT bytes·Equal(SB),7,$0-49
MOVQ a_len+8(FP), BX
MOVQ b_len+32(FP), CX
XORQ AX, AX
CMPQ BX, CX
JNE eqret
MOVQ a+0(FP), SI
MOVQ b+24(FP), DI
CALL runtime·memeqbody(SB)
eqret:
MOVB AX, ret+48(FP)
RET
54 changes: 54 additions & 0 deletions src/pkg/runtime/asm_arm.s
Expand Up @@ -514,3 +514,57 @@ _next:

MOVW $0, R0
RET

// TODO: share code with memeq?
TEXT bytes·Equal(SB),7,$0
MOVW a_len+4(FP), R1
MOVW b_len+16(FP), R3

CMP R1, R3 // unequal lengths are not equal
B.NE _notequal

MOVW a+0(FP), R0
MOVW b+12(FP), R2
ADD R0, R1 // end

_byteseq_next:
CMP R0, R1
B.EQ _equal // reached the end
MOVBU.P 1(R0), R4
MOVBU.P 1(R2), R5
CMP R4, R5
B.EQ _byteseq_next

_notequal:
MOVW $0, R0
MOVBU R0, ret+24(FP)
RET

_equal:
MOVW $1, R0
MOVBU R0, ret+24(FP)
RET

TEXT bytes·IndexByte(SB),7,$0
MOVW s+0(FP), R0
MOVW s_len+4(FP), R1
MOVBU c+12(FP), R2 // byte to find
MOVW R0, R4 // store base for later
ADD R0, R1 // end

_loop:
CMP R0, R1
B.EQ _notfound
MOVBU.P 1(R0), R3
CMP R2, R3
B.NE _loop

SUB $1, R0 // R0 will be one beyond the position we want
SUB R4, R0 // remove base
MOVW R0, ret+16(FP)
RET

_notfound:
MOVW $-1, R0
MOVW R0, ret+16(FP)
RET

0 comments on commit e2a1bd6

Please sign in to comment.