Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove assembly calls to runtime.panicindex for Go 1.12 #5

Merged
merged 1 commit into from
Jan 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,40 @@
package vecf64

// Add performs a̅ + b̅. a̅ will be clobbered
func Add(a, b []float64)
func Add(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
addAsm(a, b)
}
func addAsm(a, b []float64)

// Sub performs a̅ - b̅. a̅ will be clobbered
func Sub(a, b []float64)
func Sub(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
subAsm(a, b)
}
func subAsm(a, b []float64)

// Mul performs a̅ × b̅. a̅ will be clobbered
func Mul(a, b []float64)
func Mul(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
mulAsm(a, b)
}
func mulAsm(a, b []float64)

// Div performs a̅ ÷ b̅. a̅ will be clobbered
func Div(a, b []float64)
func Div(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
divAsm(a, b)
}
func divAsm(a, b []float64)

// Sqrt performs √a̅ elementwise. a̅ will be clobbered
func Sqrt(a []float64)
Expand Down
14 changes: 2 additions & 12 deletions asm_vecAdd_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.

Expand Down Expand Up @@ -77,17 +76,12 @@ This pseudocode best explains the rather simple assembly:
*/
#include "textflag.h"

// func Add(a, b []float64)
TEXT ·Add(SB), NOSPLIT, $0
// func addAsm(a, b []float64)
TEXT ·addAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use detination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
Expand Down Expand Up @@ -156,7 +150,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
14 changes: 2 additions & 12 deletions asm_vecAdd_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Add(a, b []float64)
TEXT ·Add(SB), NOSPLIT, $0
// func addAsm(a, b []float64)
TEXT ·addAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -69,8 +64,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

15 changes: 2 additions & 13 deletions asm_vecDiv_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.

Expand Down Expand Up @@ -87,17 +86,12 @@ Citation
*/
#include "textflag.h"

// func Div(a, b []float64)
TEXT ·Div(SB), NOSPLIT, $0
// func divAsm(a, b []float64)
TEXT ·divAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

SUBQ $4, AX
JL remainder
Expand Down Expand Up @@ -167,8 +161,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

13 changes: 2 additions & 11 deletions asm_vecDiv_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Div(a, b []float64)
TEXT ·Div(SB), NOSPLIT, $0
// func divAsm(a, b []float64)
TEXT ·divAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -73,7 +68,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecInvSqrt_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecInvSqrt_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
15 changes: 2 additions & 13 deletions asm_vecMul_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.

Expand Down Expand Up @@ -78,17 +77,12 @@ This pseudocode best explains the rather simple assembly:
*/
#include "textflag.h"

// func Mul(a, b []float64)
TEXT ·Mul(SB), NOSPLIT, $0
// func mulAsm(a, b []float64)
TEXT ·mulAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use detination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
Expand Down Expand Up @@ -156,8 +150,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

14 changes: 2 additions & 12 deletions asm_vecMul_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Mul(a, b []float64)
TEXT ·Mul(SB), NOSPLIT, $0
// func mulAsm(a, b []float64)
TEXT ·mulAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -68,8 +63,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

4 changes: 0 additions & 4 deletions asm_vecSqrt_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecSqrt_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
15 changes: 2 additions & 13 deletions asm_vecSub_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ These are the registers I use to store the relevant information:
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. Volatile register. AX is also used as the "working" count of the length that is decremented.
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.

Expand Down Expand Up @@ -84,19 +83,14 @@ Citation
*/
#include "textflag.h"

// func Sub(a, b []float64)
TEXT ·Sub(SB), NOSPLIT, $0
// func subAsm(a, b []float64)
TEXT ·subAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX
MOVQ AX, AX // len(a) into AX for working purposes

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
JL remainder
Expand Down Expand Up @@ -165,8 +159,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

14 changes: 2 additions & 12 deletions asm_vecSub_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Sub(a, b []float64)
TEXT ·Sub(SB), NOSPLIT, $0
// func subAsm(a, b []float64)
TEXT ·subAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

SUBQ $8, AX // 8 items or more?
JL remainder
Expand Down Expand Up @@ -72,8 +67,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET