Skip to content

Commit

Permalink
Merge 89e245d into 8f2bcc9
Browse files Browse the repository at this point in the history
  • Loading branch information
aclements committed Jan 14, 2019
2 parents 8f2bcc9 + 89e245d commit 4b514ca
Show file tree
Hide file tree
Showing 13 changed files with 44 additions and 118 deletions.
32 changes: 28 additions & 4 deletions asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,40 @@
package vecf64

// Add performs a̅ + b̅. a̅ will be clobbered
func Add(a, b []float64)
func Add(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
addAsm(a, b)
}
func addAsm(a, b []float64)

// Sub performs a̅ - b̅. a̅ will be clobbered
func Sub(a, b []float64)
func Sub(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
subAsm(a, b)
}
func subAsm(a, b []float64)

// Mul performs a̅ × b̅. a̅ will be clobbered
func Mul(a, b []float64)
func Mul(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
mulAsm(a, b)
}
func mulAsm(a, b []float64)

// Div performs a̅ ÷ b̅. a̅ will be clobbered
func Div(a, b []float64)
func Div(a, b []float64) {
if len(a) != len(b) {
panic("vectors must be the same length")
}
divAsm(a, b)
}
func divAsm(a, b []float64)

// Sqrt performs √a̅ elementwise. a̅ will be clobbered
func Sqrt(a []float64)
Expand Down
14 changes: 2 additions & 12 deletions asm_vecAdd_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -77,17 +76,12 @@ This pseudocode best explains the rather simple assembly:
*/
#include "textflag.h"

// func Add(a, b []float64)
TEXT ·Add(SB), NOSPLIT, $0
// func addAsm(a, b []float64)
TEXT ·addAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use detination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
Expand Down Expand Up @@ -156,7 +150,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
14 changes: 2 additions & 12 deletions asm_vecAdd_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Add(a, b []float64)
TEXT ·Add(SB), NOSPLIT, $0
// func addAsm(a, b []float64)
TEXT ·addAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -69,8 +64,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

15 changes: 2 additions & 13 deletions asm_vecDiv_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -87,17 +86,12 @@ Citation
*/
#include "textflag.h"

// func Div(a, b []float64)
TEXT ·Div(SB), NOSPLIT, $0
// func divAsm(a, b []float64)
TEXT ·divAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

SUBQ $4, AX
JL remainder
Expand Down Expand Up @@ -167,8 +161,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

13 changes: 2 additions & 11 deletions asm_vecDiv_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Div(a, b []float64)
TEXT ·Div(SB), NOSPLIT, $0
// func divAsm(a, b []float64)
TEXT ·divAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -73,7 +68,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecInvSqrt_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecInvSqrt_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
15 changes: 2 additions & 13 deletions asm_vecMul_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ These are the registers I use to store the relevant information:
SI - Used to store the top element of slice A (index 0). This register is incremented every loop
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -78,17 +77,12 @@ This pseudocode best explains the rather simple assembly:
*/
#include "textflag.h"

// func Mul(a, b []float64)
TEXT ·Mul(SB), NOSPLIT, $0
// func mulAsm(a, b []float64)
TEXT ·mulAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use detination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
Expand Down Expand Up @@ -156,8 +150,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

14 changes: 2 additions & 12 deletions asm_vecMul_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Mul(a, b []float64)
TEXT ·Mul(SB), NOSPLIT, $0
// func mulAsm(a, b []float64)
TEXT ·mulAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// check if there are at least 8 elements
SUBQ $8, AX
Expand Down Expand Up @@ -68,8 +63,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

4 changes: 0 additions & 4 deletions asm_vecSqrt_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
4 changes: 0 additions & 4 deletions asm_vecSqrt_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET
15 changes: 2 additions & 13 deletions asm_vecSub_avx.s
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ These are the registers I use to store the relevant information:
DI - used to store the top element of slice B. Incremented every loop
AX - len(a) is stored in here. Volatile register. AX is also used as the "working" count of the length that is decremented.
AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented.
BX - len(b) is stored in here. Used to compare against AX at the beginning to make sure both a and b have the same lengths
Y0, Y1 - YMM registers.
X0, X1 - XMM registers.
Expand Down Expand Up @@ -84,19 +83,14 @@ Citation
*/
#include "textflag.h"

// func Sub(a, b []float64)
TEXT ·Sub(SB), NOSPLIT, $0
// func subAsm(a, b []float64)
TEXT ·subAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX
MOVQ b_len+32(FP), BX // len(b) into BX
MOVQ AX, AX // len(a) into AX for working purposes

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

// each ymm register can take up to 4 float64s.
SUBQ $4, AX
JL remainder
Expand Down Expand Up @@ -165,8 +159,3 @@ remainder1:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

14 changes: 2 additions & 12 deletions asm_vecSub_sse.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@

#include "textflag.h"

// func Sub(a, b []float64)
TEXT ·Sub(SB), NOSPLIT, $0
// func subAsm(a, b []float64)
TEXT ·subAsm(SB), NOSPLIT, $0
MOVQ a_data+0(FP), SI
MOVQ b_data+24(FP), DI // use destination index register for this

MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap
MOVQ b_len+32(FP), BX // len(b) into BX

// check if they're the same length
CMPQ AX, BX
JNE panic // jump to panic if not the same length. TOOD: return bloody errors

SUBQ $8, AX // 8 items or more?
JL remainder
Expand Down Expand Up @@ -72,8 +67,3 @@ remainderloop:

done:
RET

panic:
CALL runtime·panicindex(SB)
RET

0 comments on commit 4b514ca

Please sign in to comment.