Skip to content

Commit

Permalink
cmd/compile: instrinsify math/bits.Mul on ppc64x
Browse files Browse the repository at this point in the history
Add SSA rules to intrinsify Mul/Mul64 on ppc64x.

benchmark             old ns/op     new ns/op     delta
BenchmarkMul-40       8.80          0.93          -89.43%
BenchmarkMul32-40     1.39          1.39          +0.00%
BenchmarkMul64-40     5.39          0.93          -82.75%

Updates #24813

Change-Id: I6e95bfbe976a2278bd17799df184a7fbc0e57829
Reviewed-on: https://go-review.googlesource.com/138917
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
  • Loading branch information
ceseo authored and laboger committed Oct 2, 2018
1 parent f5e5844 commit 9aed4cc
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/cmd/compile/internal/gc/ssa.go
Expand Up @@ -3435,12 +3435,12 @@ func init() {
addF("math/bits", "OnesCount",
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
sys.AMD64)
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64)
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
addF("math/bits", "Mul64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
},
sys.AMD64, sys.ARM64)
sys.AMD64, sys.ARM64, sys.PPC64)

/******** sync/atomic ********/

Expand Down
18 changes: 18 additions & 0 deletions src/cmd/compile/internal/ppc64/ssa.go
Expand Up @@ -153,6 +153,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = y
}

case ssa.OpPPC64LoweredMuluhilo:
// MULHDU Rarg1, Rarg0, Reg0
// MULLD Rarg1, Rarg0, Reg1
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
p := s.Prog(ppc64.AMULHDU)
p.From.Type = obj.TYPE_REG
p.From.Reg = r1
p.Reg = r0
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
p1 := s.Prog(ppc64.AMULLD)
p1.From.Type = obj.TYPE_REG
p1.From.Reg = r1
p1.Reg = r0
p1.To.Type = obj.TYPE_REG
p1.To.Reg = v.Reg1()

case ssa.OpPPC64LoweredAtomicAnd8,
ssa.OpPPC64LoweredAtomicOr8:
// LWSYNC
Expand Down
1 change: 1 addition & 0 deletions src/cmd/compile/internal/ssa/gen/PPC64.rules
Expand Up @@ -25,6 +25,7 @@

(Mul64 x y) -> (MULLD x y)
(Mul(32|16|8) x y) -> (MULLW x y)
(Mul64uhilo x y) -> (LoweredMuluhilo x y)

(Div64 x y) -> (DIVD x y)
(Div64u x y) -> (DIVDU x y)
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/compile/internal/ssa/gen/PPC64Ops.go
Expand Up @@ -135,6 +135,7 @@ func init() {
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
crgp = regInfo{inputs: nil, outputs: []regMask{gp}}
Expand Down Expand Up @@ -170,6 +171,7 @@ func init() {
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)

{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
Expand Down
16 changes: 16 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions src/cmd/compile/internal/ssa/rewritePPC64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/codegen/mathbits.go
Expand Up @@ -310,11 +310,13 @@ func IterateBits8(n uint8) int {
func Mul(x, y uint) (hi, lo uint) {
// amd64:"MULQ"
// arm64:"UMULH","MUL"
// ppc64: "MULHDU", "MULLD"
return bits.Mul(x, y)
}

func Mul64(x, y uint64) (hi, lo uint64) {
// amd64:"MULQ"
// arm64:"UMULH","MUL"
// ppc64: "MULHDU", "MULLD"
return bits.Mul64(x, y)
}

0 comments on commit 9aed4cc

Please sign in to comment.