diff --git a/src/cmd/compile/internal/arm64/galign.go b/src/cmd/compile/internal/arm64/galign.go index 17c851cb14c79..7acc4e08eb992 100644 --- a/src/cmd/compile/internal/arm64/galign.go +++ b/src/cmd/compile/internal/arm64/galign.go @@ -29,6 +29,8 @@ func Main() { gc.Thearch.Betypeinit = betypeinit gc.Thearch.Cgen_hmul = cgen_hmul + gc.Thearch.AddSetCarry = AddSetCarry + gc.Thearch.RightShiftWithCarry = RightShiftWithCarry gc.Thearch.Cgen_shift = cgen_shift gc.Thearch.Clearfat = clearfat gc.Thearch.Defframe = defframe diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go index 9abd901d7a5ce..bddfed631a7d6 100644 --- a/src/cmd/compile/internal/arm64/ggen.go +++ b/src/cmd/compile/internal/arm64/ggen.go @@ -252,6 +252,53 @@ func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { } } +// RightShiftWithCarry generates a constant unsigned +// right shift with carry. +// +// res = n >> shift // with carry +func RightShiftWithCarry(n *gc.Node, shift uint, res *gc.Node) { + // Extra 1 is for carry bit. + maxshift := uint(n.Type.Width*8 + 1) + if shift == 0 { + gmove(n, res) + } else if shift < maxshift { + // 1. clear rightmost bit of target + var n1 gc.Node + gc.Nodconst(&n1, n.Type, 1) + gins(optoas(gc.ORSH, n.Type), &n1, n) + gins(optoas(gc.OLSH, n.Type), &n1, n) + // 2. add carry flag to target + var n2 gc.Node + gc.Nodconst(&n1, n.Type, 0) + gc.Regalloc(&n2, n.Type, nil) + gins(optoas(gc.OAS, n.Type), &n1, &n2) + gins(arm64.AADC, &n2, n) + // 3. right rotate 1 bit + gc.Nodconst(&n1, n.Type, 1) + gins(arm64.AROR, &n1, n) + + // ARM64 backend doesn't eliminate shifts by 0. It is manually checked here. + if shift > 1 { + var n3 gc.Node + gc.Nodconst(&n3, n.Type, int64(shift-1)) + cgen_shift(gc.ORSH, true, n, &n3, res) + } else { + gmove(n, res) + } + gc.Regfree(&n2) + } else { + gc.Fatalf("RightShiftWithCarry: shift(%v) is bigger than max size(%v)", shift, maxshift) + } +} + +// AddSetCarry generates add and set carry. +// +// res = nl + nr // with carry flag set +func AddSetCarry(nl *gc.Node, nr *gc.Node, res *gc.Node) { + gins(arm64.AADDS, nl, nr) + gmove(nr, res) +} + /* * generate high multiply: * res = (nl*nr) >> width diff --git a/src/cmd/compile/internal/arm64/gsubr.go b/src/cmd/compile/internal/arm64/gsubr.go index efa66a09d33b6..f193291d01e23 100644 --- a/src/cmd/compile/internal/arm64/gsubr.go +++ b/src/cmd/compile/internal/arm64/gsubr.go @@ -890,18 +890,6 @@ func optoas(op gc.Op, t *gc.Type) obj.As { ORSH_ | gc.TINT64: a = arm64.AASR - // TODO(minux): handle rotates - //case CASE(ORROTC, TINT8): - //case CASE(ORROTC, TUINT8): - //case CASE(ORROTC, TINT16): - //case CASE(ORROTC, TUINT16): - //case CASE(ORROTC, TINT32): - //case CASE(ORROTC, TUINT32): - //case CASE(ORROTC, TINT64): - //case CASE(ORROTC, TUINT64): - // a = 0//??? RLDC?? - // break; - case OHMUL_ | gc.TINT64: a = arm64.ASMULH diff --git a/src/cmd/compile/internal/arm64/peep.go b/src/cmd/compile/internal/arm64/peep.go index 887353c8894a5..22be1afebcb87 100644 --- a/src/cmd/compile/internal/arm64/peep.go +++ b/src/cmd/compile/internal/arm64/peep.go @@ -534,10 +534,13 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { return 0 case arm64.AADD, /* read p->from, read p->reg, write p->to */ + arm64.AADDS, arm64.ASUB, + arm64.AADC, arm64.AAND, arm64.AORR, arm64.AEOR, + arm64.AROR, arm64.AMUL, arm64.ASMULL, arm64.AUMULL, diff --git a/src/cmd/compile/internal/arm64/prog.go b/src/cmd/compile/internal/arm64/prog.go index 3091c4a840d6f..d504d0f0ee46e 100644 --- a/src/cmd/compile/internal/arm64/prog.go +++ b/src/cmd/compile/internal/arm64/prog.go @@ -59,6 +59,9 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{ arm64.ALSR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite}, arm64.AASR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite}, arm64.ACMP & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead}, + arm64.AADC & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry}, + arm64.AROR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite}, + arm64.AADDS & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry}, // Floating point. arm64.AFADDD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite}, diff --git a/src/cmd/compile/internal/gc/cgen.go b/src/cmd/compile/internal/gc/cgen.go index bb7487c958f50..8db752ec51110 100644 --- a/src/cmd/compile/internal/gc/cgen.go +++ b/src/cmd/compile/internal/gc/cgen.go @@ -2642,9 +2642,9 @@ func cgen_ret(n *Node) { // signed and unsigned high multiplication (OHMUL). func hasHMUL64() bool { switch Ctxt.Arch.Family { - case sys.AMD64, sys.S390X: + case sys.AMD64, sys.S390X, sys.ARM64: return true - case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64: + case sys.ARM, sys.I386, sys.MIPS64, sys.PPC64: return false } Fatalf("unknown architecture") @@ -2664,6 +2664,28 @@ func hasRROTC64() bool { return false } +func hasRightShiftWithCarry() bool { + switch Ctxt.Arch.Family { + case sys.ARM64: + return true + case sys.AMD64, sys.ARM, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X: + return false + } + Fatalf("unknown architecture") + return false +} + +func hasAddSetCarry() bool { + switch Ctxt.Arch.Family { + case sys.ARM64: + return true + case sys.AMD64, sys.ARM, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X: + return false + } + Fatalf("unknown architecture") + return false +} + // generate division according to op, one of: // res = nl / nr // res = nl % nr @@ -2699,8 +2721,9 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { // the MSB. For now this needs the RROTC instruction. // TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes // an alternative sequence of instructions for architectures - // that do not have a shift right with carry instruction. - if m.Ua != 0 && !hasRROTC64() { + // (TODO: MIPS64, PPC64, S390X) that do not have a shift + // right with carry instruction. + if m.Ua != 0 && !hasRROTC64() && !hasRightShiftWithCarry() { goto longdiv } if op == OMOD { @@ -2717,12 +2740,20 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { if m.Ua != 0 { // Need to add numerator accounting for overflow. - Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3) + if hasAddSetCarry() { + Thearch.AddSetCarry(&n1, &n3, &n3) + } else { + Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3) + } - Nodconst(&n2, nl.Type, 1) - Thearch.Gins(Thearch.Optoas(ORROTC, nl.Type), &n2, &n3) - Nodconst(&n2, nl.Type, int64(m.S)-1) - Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3) + if !hasRROTC64() { + Thearch.RightShiftWithCarry(&n3, uint(m.S), &n3) + } else { + Nodconst(&n2, nl.Type, 1) + Thearch.Gins(Thearch.Optoas(ORROTC, nl.Type), &n2, &n3) + Nodconst(&n2, nl.Type, int64(m.S)-1) + Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3) + } } else { Nodconst(&n2, nl.Type, int64(m.S)) Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3) // shift dx diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index 87b6121c8e80b..f9a372dccee63 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -378,23 +378,25 @@ type Arch struct { MAXWIDTH int64 ReservedRegs []int - AddIndex func(*Node, int64, *Node) bool // optional - Betypeinit func() - Bgen_float func(*Node, bool, int, *obj.Prog) // optional - Cgen64 func(*Node, *Node) // only on 32-bit systems - Cgenindex func(*Node, *Node, bool) *obj.Prog - Cgen_bmul func(Op, *Node, *Node, *Node) bool - Cgen_float func(*Node, *Node) // optional - Cgen_hmul func(*Node, *Node, *Node) - Cgen_shift func(Op, bool, *Node, *Node, *Node) - Clearfat func(*Node) - Cmp64 func(*Node, *Node, Op, int, *obj.Prog) // only on 32-bit systems - Defframe func(*obj.Prog) - Dodiv func(Op, *Node, *Node, *Node) - Excise func(*Flow) - Expandchecks func(*obj.Prog) - Getg func(*Node) - Gins func(obj.As, *Node, *Node) *obj.Prog + AddIndex func(*Node, int64, *Node) bool // optional + Betypeinit func() + Bgen_float func(*Node, bool, int, *obj.Prog) // optional + Cgen64 func(*Node, *Node) // only on 32-bit systems + Cgenindex func(*Node, *Node, bool) *obj.Prog + Cgen_bmul func(Op, *Node, *Node, *Node) bool + Cgen_float func(*Node, *Node) // optional + Cgen_hmul func(*Node, *Node, *Node) + RightShiftWithCarry func(*Node, uint, *Node) // only on systems without RROTC instruction + AddSetCarry func(*Node, *Node, *Node) // only on systems when ADD does not update carry flag + Cgen_shift func(Op, bool, *Node, *Node, *Node) + Clearfat func(*Node) + Cmp64 func(*Node, *Node, Op, int, *obj.Prog) // only on 32-bit systems + Defframe func(*obj.Prog) + Dodiv func(Op, *Node, *Node, *Node) + Excise func(*Flow) + Expandchecks func(*obj.Prog) + Getg func(*Node) + Gins func(obj.As, *Node, *Node) *obj.Prog // Ginscmp generates code comparing n1 to n2 and jumping away if op is satisfied. // The returned prog should be Patch'ed with the jump target. diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index bce34374e83e7..cc9a50e6a807f 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -3424,7 +3424,7 @@ func walkdiv(n *Node, init *Nodes) *Node { // if >= 0, nr is 1<