Skip to content

Commit

Permalink
Remove assembly calls to runtime.panicindex for Go 1.12
Browse files Browse the repository at this point in the history
Go 1.12 does not allow the unexported function runtime.panicindex to
be called from assembly code in other packages. Fix this by wrapping
the assembly functions that call panicindex in Go functions and
lifting these checks into the Go functions. The Go wrapper functions
are inlined (as of Go 1.12), so there's no additional cost to doing
this.

Fixes #4.
  • Loading branch information
aclements committed Jan 14, 2019
1 parent 8f2bcc9 commit 89e245d
Show file tree
Hide file tree
Showing 13 changed files with 44 additions and 118 deletions.
32 changes: 28 additions & 4 deletions asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,40 @@
package vecf64

// Add performs a̅ + b̅. a̅ will be clobbered
func Add(a, b []float64)
func Add(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
addAsm(a, b)
}
func addAsm(a, b []float64)

// Sub performs a̅ - b̅. a̅ will be clobbered
func Sub(a, b []float64)
func Sub(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
subAsm(a, b)
}
func subAsm(a, b []float64)

// Mul performs a̅ × b̅. a̅ will be clobbered
func Mul(a, b []float64)
func Mul(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
mulAsm(a, b)
}
func mulAsm(a, b []float64)

// Div performs a̅ ÷ b̅. a̅ will be clobbered
func Div(a, b []float64)
func Div(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
divAsm(a, b)
}
func divAsm(a, b []float64)

// Sqrt performs √a̅ elementwise. a̅ will be clobbered
func Sqrt(a []float64)
Expand Down
14 changes: 2 additions & 12 deletions asm_vecAdd_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -77,17 +76,12 @@ This pseudocode best explains the rather simple assembly:
*/
#include "textflag.h"

// func Add(a, b []float64)
TEXT ·Add(SB), NOSPLIT, $0
// func addAsm(a, b []float64)
TEXT ·addAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use detination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
Expand Down Expand Up @@ -156,7 +150,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
14 changes: 2 additions & 12 deletions asm_vecAdd_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Add(a, b []float64)
TEXT ·Add(SB), NOSPLIT, $0
// func addAsm(a, b []float64)
TEXT ·addAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -69,8 +64,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

15 changes: 2 additions & 13 deletions asm_vecDiv_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -87,17 +86,12 @@ Citation
*/
#include "textflag.h"

// func Div(a, b []float64)
TEXT ·Div(SB), NOSPLIT, $0
// func divAsm(a, b []float64)
TEXT ·divAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

SUBQ $4, AX
JL remainder
Expand Down Expand Up @@ -167,8 +161,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

13 changes: 2 additions & 11 deletions asm_vecDiv_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Div(a, b []float64)
TEXT ·Div(SB), NOSPLIT, $0
// func divAsm(a, b []float64)
TEXT ·divAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -73,7 +68,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecInvSqrt_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecInvSqrt_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
15 changes: 2 additions & 13 deletions asm_vecMul_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -78,17 +77,12 @@ This pseudocode best explains the rather simple assembly:
*/
#include "textflag.h"

// func Mul(a, b []float64)
TEXT ·Mul(SB), NOSPLIT, $0
// func mulAsm(a, b []float64)
TEXT ·mulAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use detination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
Expand Down Expand Up @@ -156,8 +150,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

14 changes: 2 additions & 12 deletions asm_vecMul_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Mul(a, b []float64)
TEXT ·Mul(SB), NOSPLIT, $0
// func mulAsm(a, b []float64)
TEXT ·mulAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -68,8 +63,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

4 changes: 0 additions & 4 deletions asm_vecSqrt_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecSqrt_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
15 changes: 2 additions & 13 deletions asm_vecSub_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ These are the registers I use to store the relevant information:
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. Volatile register. AX is also used as the "working" count of the length that is decremented.
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -84,19 +83,14 @@ Citation
*/
#include "textflag.h"

// func Sub(a, b []float64)
TEXT ·Sub(SB), NOSPLIT, $0
// func subAsm(a, b []float64)
TEXT ·subAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX
MOVQ AX, AX // len(a) into AX for working purposes

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
JL remainder
Expand Down Expand Up @@ -165,8 +159,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

14 changes: 2 additions & 12 deletions asm_vecSub_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Sub(a, b []float64)
TEXT ·Sub(SB), NOSPLIT, $0
// func subAsm(a, b []float64)
TEXT ·subAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

SUBQ $8, AX // 8 items or more?
JL remainder
Expand Down Expand Up @@ -72,8 +67,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

0 comments on commit 89e245d

Please sign in to comment.