Permalink
Browse files

cmd/compile: intrinsics for trunc, floor, ceil on ppc64x

This implements trunc, floor, and ceil in the math package
as intrinsics on ppc64x.  Significant improvement mainly due
to avoiding call overhead of args and return value.

BenchmarkCeil-16                    5.95          0.69          -88.40%
BenchmarkFloor-16                   5.95          0.69          -88.40%
BenchmarkTrunc-16                   5.82          0.69          -88.14%

Updates #21390

Change-Id: I951e182694f6e0c431da79c577272b81fb0ebad0
Reviewed-on: https://go-review.googlesource.com/54654
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: David Chase <drchase@google.com>
  • Loading branch information...
laboger committed Aug 9, 2017
1 parent 3cb41be commit 0f19e24da7fa564af3fa4e831463951d5715211a
@@ -2724,6 +2724,21 @@ func init() {
return s.newValue1(ssa.OpSqrt, types.Types[TFLOAT64], args[0])
},
sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
addF("math", "Trunc",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
addF("math", "Ceil",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
addF("math", "Floor",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
/******** math/bits ********/
addF("math/bits", "TrailingZeros64",
@@ -596,7 +596,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
@@ -74,6 +74,9 @@
(Round64F x) -> (LoweredRound64F x)
(Sqrt x) -> (FSQRT x)
(Floor x) -> (FFLOOR x)
(Ceil x) -> (FCEIL x)
(Trunc x) -> (FTRUNC x)
// Lowering constants
(Const8 [val]) -> (MOVDconst [val])
@@ -241,6 +241,9 @@ func init() {
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
@@ -255,7 +255,10 @@ var genericOps = []opData{
{name: "PopCount32", argLength: 1}, // Count bits in arg[0]
{name: "PopCount64", argLength: 1}, // Count bits in arg[0]
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
{name: "Floor", argLength: 1}, // floor(arg0), float64 only
{name: "Ceil", argLength: 1}, // ceil(arg0), float64 only
{name: "Trunc", argLength: 1}, // trunc(arg0), float64 only
// Data movement, max argument length for Phi is indefinite so just pick
// a really large number
@@ -1322,6 +1322,9 @@ const (
OpPPC64FNEG
OpPPC64FSQRT
OpPPC64FSQRTS
OpPPC64FFLOOR
OpPPC64FCEIL
OpPPC64FTRUNC
OpPPC64ORconst
OpPPC64XORconst
OpPPC64ANDconst
@@ -1800,6 +1803,9 @@ const (
OpPopCount32
OpPopCount64
OpSqrt
OpFloor
OpCeil
OpTrunc
OpPhi
OpCopy
OpConvert
@@ -16955,6 +16961,45 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FFLOOR",
argLen: 1,
asm: ppc64.AFRIM,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "FCEIL",
argLen: 1,
asm: ppc64.AFRIP,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "FTRUNC",
argLen: 1,
asm: ppc64.AFRIZ,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "ORconst",
auxType: auxInt64,
@@ -21976,6 +22021,21 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Floor",
argLen: 1,
generic: true,
},
{
name: "Ceil",
argLen: 1,
generic: true,
},
{
name: "Trunc",
argLen: 1,
generic: true,
},
{
name: "Phi",
argLen: -1,
@@ -73,6 +73,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpBitLen32_0(v)
case OpBitLen64:
return rewriteValuePPC64_OpBitLen64_0(v)
case OpCeil:
return rewriteValuePPC64_OpCeil_0(v)
case OpClosureCall:
return rewriteValuePPC64_OpClosureCall_0(v)
case OpCom16:
@@ -161,6 +163,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpEqB_0(v)
case OpEqPtr:
return rewriteValuePPC64_OpEqPtr_0(v)
case OpFloor:
return rewriteValuePPC64_OpFloor_0(v)
case OpGeq16:
return rewriteValuePPC64_OpGeq16_0(v)
case OpGeq16U:
@@ -583,6 +587,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpSub8_0(v)
case OpSubPtr:
return rewriteValuePPC64_OpSubPtr_0(v)
case OpTrunc:
return rewriteValuePPC64_OpTrunc_0(v)
case OpTrunc16to8:
return rewriteValuePPC64_OpTrunc16to8_0(v)
case OpTrunc32to16:
@@ -1070,6 +1076,17 @@ func rewriteValuePPC64_OpBitLen64_0(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpCeil_0(v *Value) bool {
// match: (Ceil x)
// cond:
// result: (FCEIL x)
for {
x := v.Args[0]
v.reset(OpPPC64FCEIL)
v.AddArg(x)
return true
}
}
func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
// match: (ClosureCall [argwid] entry closure mem)
// cond:
@@ -1823,6 +1840,17 @@ func rewriteValuePPC64_OpEqPtr_0(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpFloor_0(v *Value) bool {
// match: (Floor x)
// cond:
// result: (FFLOOR x)
for {
x := v.Args[0]
v.reset(OpPPC64FFLOOR)
v.AddArg(x)
return true
}
}
func rewriteValuePPC64_OpGeq16_0(v *Value) bool {
b := v.Block
_ = b
@@ -10463,6 +10491,17 @@ func rewriteValuePPC64_OpSubPtr_0(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpTrunc_0(v *Value) bool {
// match: (Trunc x)
// cond:
// result: (FTRUNC x)
for {
x := v.Args[0]
v.reset(OpPPC64FTRUNC)
v.AddArg(x)
return true
}
}
func rewriteValuePPC64_OpTrunc16to8_0(v *Value) bool {
// match: (Trunc16to8 x)
// cond:

0 comments on commit 0f19e24

Please sign in to comment.