Permalink
Browse files

cmd/compile,math: improve int<->float conversions on ppc64x

The functions Float64bits and Float64frombits perform
poorly on ppc64x because the int<->float conversions
often result in load and store sequences to handle the
type change. This patch adds more rules to recognize
those sequences and use register to register moves
and avoid unnecessary loads and stores where possible.

There were some existing rules to improve these conversions,
but this provides additional improvements. Included here:

- New instruction FCFIDS to improve on conversion to 32 bit
- Rename Xf2i64 and Xi2f64 as MTVSRD, MFVSRD, to match the asm
- Add rules to lower some of the load/store sequences for
- Added new go asm to ppc64.s testcase.
conversions

Improvements:

BenchmarkAbs-16                2.16          0.93          -56.94%
BenchmarkCopysign-16           2.66          1.18          -55.64%
BenchmarkRound-16              4.82          2.69          -44.19%
BenchmarkSignbit-16            1.71          1.14          -33.33%
BenchmarkFrexp-16              11.4          7.94          -30.35%
BenchmarkLogb-16               10.4          7.34          -29.42%
BenchmarkLdexp-16              15.7          11.2          -28.66%
BenchmarkIlogb-16              10.2          7.32          -28.24%
BenchmarkPowInt-16             69.6          55.9          -19.68%
BenchmarkModf-16               10.1          8.19          -18.91%
BenchmarkLog2-16               17.4          14.3          -17.82%
BenchmarkCbrt-16               45.0          37.3          -17.11%
BenchmarkAtanh-16              57.6          48.3          -16.15%
BenchmarkRemainder-16          76.6          65.4          -14.62%
BenchmarkGamma-16              26.0          22.5          -13.46%
BenchmarkPowFrac-16            197           174           -11.68%
BenchmarkMod-16                112           99.8          -10.89%
BenchmarkAsinh-16              59.9          53.7          -10.35%
BenchmarkAcosh-16              44.8          40.3          -10.04%

Updates #21390

Change-Id: I56cc991fc2e55249d69518d4e1ba76cc23904e35
Reviewed-on: https://go-review.googlesource.com/63290
Reviewed-by: Michael Munday <mike.munday@ibm.com>
  • Loading branch information...
laboger committed Sep 12, 2017
1 parent f351dbf commit 40e25895e3ab998033cc9f7086332d046c7f608a
@@ -550,6 +550,14 @@ label1:
// ftsqrt BF, FRB
FTSQRT F2,$7
// FCFID
// FCFIDS
FCFID F2,F3
FCFIDCC F3,F3
FCFIDS F2,F3
FCFIDSCC F2,F3
//
// CMP
//
@@ -152,29 +152,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = y
}
case ssa.OpPPC64Xf2i64:
{
x := v.Args[0].Reg()
y := v.Reg()
p := s.Prog(ppc64.AMFVSRD)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
}
case ssa.OpPPC64Xi2f64:
{
x := v.Args[0].Reg()
y := v.Reg()
p := s.Prog(ppc64.AMTVSRD)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
}
case ssa.OpPPC64LoweredAtomicAnd8,
ssa.OpPPC64LoweredAtomicOr8:
// SYNC
@@ -597,7 +574,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
@@ -57,19 +57,25 @@
(Div64F x y) -> (FDIV x y)
// Lowering float <-> int
(Cvt32to32F x) -> (FRSP (FCFID (Xi2f64 (SignExt32to64 x))))
(Cvt32to64F x) -> (FCFID (Xi2f64 (SignExt32to64 x)))
(Cvt64to32F x) -> (FRSP (FCFID (Xi2f64 x)))
(Cvt64to64F x) -> (FCFID (Xi2f64 x))
(Cvt32to32F x) -> (FCFIDS (MTVSRD (SignExt32to64 x)))
(Cvt32to64F x) -> (FCFID (MTVSRD (SignExt32to64 x)))
(Cvt64to32F x) -> (FCFIDS (MTVSRD x))
(Cvt64to64F x) -> (FCFID (MTVSRD x))
(Cvt32Fto32 x) -> (Xf2i64 (FCTIWZ x))
(Cvt32Fto64 x) -> (Xf2i64 (FCTIDZ x))
(Cvt64Fto32 x) -> (Xf2i64 (FCTIWZ x))
(Cvt64Fto64 x) -> (Xf2i64 (FCTIDZ x))
(Cvt32Fto32 x) -> (MFVSRD (FCTIWZ x))
(Cvt32Fto64 x) -> (MFVSRD (FCTIDZ x))
(Cvt64Fto32 x) -> (MFVSRD (FCTIWZ x))
(Cvt64Fto64 x) -> (MFVSRD (FCTIDZ x))
(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
(Cvt64Fto32F x) -> (FRSP x)
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) -> (MFVSRD x)
(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) -> (MTVSRD x)
(FMOVDstore [off] {sym} ptr (MTVSRD x) mem) -> (MOVDstore [off] {sym} ptr x mem)
(MOVDstore [off] {sym} ptr (MFVSRD x) mem) -> (FMOVDstore [off] {sym} ptr x mem)
(Round32F x) -> (LoweredRound32F x)
(Round64F x) -> (LoweredRound64F x)
@@ -223,6 +223,7 @@ func init() {
{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"}, // convert 64-bit integer to float
{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"}, // round float to 32-bit value
// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
@@ -231,8 +232,8 @@ func init() {
// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
// the word-load instructions. (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"}, // move 64 bits of F register into G register
{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register
{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register
{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1
@@ -1333,9 +1333,10 @@ const (
OpPPC64FCTIDZ
OpPPC64FCTIWZ
OpPPC64FCFID
OpPPC64FCFIDS
OpPPC64FRSP
OpPPC64Xf2i64
OpPPC64Xi2f64
OpPPC64MFVSRD
OpPPC64MTVSRD
OpPPC64AND
OpPPC64ANDN
OpPPC64OR
@@ -17086,6 +17087,19 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FCFIDS",
argLen: 1,
asm: ppc64.AFCFIDS,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "FRSP",
argLen: 1,
@@ -17100,8 +17114,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "Xf2i64",
name: "MFVSRD",
argLen: 1,
asm: ppc64.AMFVSRD,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
@@ -17112,8 +17127,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "Xi2f64",
name: "MTVSRD",
argLen: 1,
asm: ppc64.AMTVSRD,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
Oops, something went wrong.

0 comments on commit 40e2589

Please sign in to comment.