Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix asm for dynamic linking #57

Merged
merged 2 commits into from
Aug 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dynamic/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/plugin.so
46 changes: 46 additions & 0 deletions dynamic/dynamic_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// +build linux darwin

package main

import (
"bytes"
"log"
"os/exec"
"plugin"
"testing"
)

// This is a cursory test that checks whether things work under dynamic linking.

func TestMain(m *testing.M) {
cmd := exec.Command(
"go", "build",
"-buildmode", "plugin",
"-o", "plugin.so",
"plugin.go",
)
var out bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &out
if err := cmd.Run(); err != nil {
log.Fatalf("Error building plugin: %s\nOutput:\n%s", err, out.String())
}
m.Run()
}

func TestDynamic(t *testing.T) {
plug, err := plugin.Open("plugin.so")
if err != nil {
t.Fatal(err)
}
for _, test := range []string{
"TestSum",
"TestDigest",
} {
f, err := plug.Lookup(test)
if err != nil {
t.Fatalf("cannot find func %s: %s", test, err)
}
f.(func(*testing.T))(t)
}
}
46 changes: 46 additions & 0 deletions dynamic/plugin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// +build ignore

package main

import (
"fmt"
"log"
"testing"

"github.com/cespare/xxhash/v2"
)

const (
in = "Call me Ishmael. Some years ago--never mind how long precisely-"
want = 0x02a2e85470d6fd96
)

func TestSum(t *testing.T) {
got := xxhash.Sum64String(in)
if got != want {
t.Fatalf("Sum64String: got 0x%x; want 0x%x", got, want)
}
}

func TestDigest(t *testing.T) {
for chunkSize := 1; chunkSize <= len(in); chunkSize++ {
name := fmt.Sprintf("[chunkSize=%d]", chunkSize)
t.Run(name, func(t *testing.T) {
d := xxhash.New()
for i := 0; i < len(in); i += chunkSize {
chunk := in[i:]
if len(chunk) > chunkSize {
chunk = chunk[:chunkSize]
}
n, err := d.WriteString(chunk)
if err != nil || n != len(chunk) {
t.Fatalf("Digest.WriteString: got (%d, %v); want (%d, nil)",
n, err, len(chunk))
}
}
if got := d.Sum64(); got != want {
log.Fatalf("Digest.Sum64: got 0x%x; want 0x%x", got, want)
}
})
}
}
62 changes: 31 additions & 31 deletions xxhash_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

// Register allocation:
// AX h
// CX pointer to advance through b
// SI pointer to advance through b
// DX n
// BX loop end
// R8 v1, k1
Expand All @@ -16,39 +16,39 @@
// R12 tmp
// R13 prime1v
// R14 prime2v
// R15 prime4v
// DI prime4v

// round reads from and advances the buffer pointer in CX.
// round reads from and advances the buffer pointer in SI.
// It assumes that R13 has prime1v and R14 has prime2v.
#define round(r) \
MOVQ (CX), R12 \
ADDQ $8, CX \
MOVQ (SI), R12 \
ADDQ $8, SI \
IMULQ R14, R12 \
ADDQ R12, r \
ROLQ $31, r \
IMULQ R13, r

// mergeRound applies a merge round on the two registers acc and val.
// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
#define mergeRound(acc, val) \
IMULQ R14, val \
ROLQ $31, val \
IMULQ R13, val \
XORQ val, acc \
IMULQ R13, acc \
ADDQ R15, acc
ADDQ DI, acc

// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
// Load fixed primes.
MOVQ ·prime1v(SB), R13
MOVQ ·prime2v(SB), R14
MOVQ ·prime4v(SB), R15
MOVQ ·prime4v(SB), DI

// Load slice.
MOVQ b_base+0(FP), CX
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), DX
LEAQ (CX)(DX*1), BX
LEAQ (SI)(DX*1), BX

// The first loop limit will be len(b)-32.
SUBQ $32, BX
Expand All @@ -65,14 +65,14 @@ TEXT ·Sum64(SB), NOSPLIT, $0-32
XORQ R11, R11
SUBQ R13, R11

// Loop until CX > BX.
// Loop until SI > BX.
blockLoop:
round(R8)
round(R9)
round(R10)
round(R11)

CMPQ CX, BX
CMPQ SI, BX
JLE blockLoop

MOVQ R8, AX
Expand Down Expand Up @@ -100,35 +100,35 @@ noBlocks:
afterBlocks:
ADDQ DX, AX

// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
ADDQ $24, BX

CMPQ CX, BX
CMPQ SI, BX
JG fourByte

wordLoop:
// Calculate k1.
MOVQ (CX), R8
ADDQ $8, CX
MOVQ (SI), R8
ADDQ $8, SI
IMULQ R14, R8
ROLQ $31, R8
IMULQ R13, R8

XORQ R8, AX
ROLQ $27, AX
IMULQ R13, AX
ADDQ R15, AX
ADDQ DI, AX

CMPQ CX, BX
CMPQ SI, BX
JLE wordLoop

fourByte:
ADDQ $4, BX
CMPQ CX, BX
CMPQ SI, BX
JG singles

MOVL (CX), R8
ADDQ $4, CX
MOVL (SI), R8
ADDQ $4, SI
IMULQ R13, R8
XORQ R8, AX

Expand All @@ -138,19 +138,19 @@ fourByte:

singles:
ADDQ $4, BX
CMPQ CX, BX
CMPQ SI, BX
JGE finalize

singlesLoop:
MOVBQZX (CX), R12
ADDQ $1, CX
MOVBQZX (SI), R12
ADDQ $1, SI
IMULQ ·prime5v(SB), R12
XORQ R12, AX

ROLQ $11, AX
IMULQ R13, AX

CMPQ CX, BX
CMPQ SI, BX
JL singlesLoop

finalize:
Expand Down Expand Up @@ -179,9 +179,9 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40
MOVQ ·prime2v(SB), R14

// Load slice.
MOVQ b_base+8(FP), CX
MOVQ b_base+8(FP), SI
MOVQ b_len+16(FP), DX
LEAQ (CX)(DX*1), BX
LEAQ (SI)(DX*1), BX
SUBQ $32, BX

// Load vN from d.
Expand All @@ -199,7 +199,7 @@ blockLoop:
round(R10)
round(R11)

CMPQ CX, BX
CMPQ SI, BX
JLE blockLoop

// Copy vN back to d.
Expand All @@ -208,8 +208,8 @@ blockLoop:
MOVQ R10, 16(AX)
MOVQ R11, 24(AX)

// The number of bytes written is CX minus the old base pointer.
SUBQ b_base+8(FP), CX
MOVQ CX, ret+32(FP)
// The number of bytes written is SI minus the old base pointer.
SUBQ b_base+8(FP), SI
MOVQ SI, ret+32(FP)

RET