diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index cc210978ef0eb..22d2e7e475bf4 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1260,3 +1260,22 @@ (SHLQconst [40] (MOVBload [i+5] {s} p mem))) (SHLQconst [48] (MOVBload [i+6] {s} p mem))) (SHLQconst [56] (MOVBload [i+7] {s} p mem))) -> @x.Block (MOVQload [i] {s} p mem) + +(ORW x:(MOVBloadidx1 [i] {s} p idx mem) + (SHLWconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) -> @x.Block (MOVWload [i] {s} (ADDQ p idx) mem) + +(ORL (ORL (ORL + x:(MOVBloadidx1 [i] {s} p idx mem) + (SHLLconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) + (SHLLconst [16] (MOVBloadidx1 [i+2] {s} p idx mem))) + (SHLLconst [24] (MOVBloadidx1 [i+3] {s} p idx mem))) -> @x.Block (MOVLload [i] {s} (ADDQ p idx) mem) + +(ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ + x:(MOVBloadidx1 [i] {s} p idx mem) + (SHLQconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) + (SHLQconst [16] (MOVBloadidx1 [i+2] {s} p idx mem))) + (SHLQconst [24] (MOVBloadidx1 [i+3] {s} p idx mem))) + (SHLQconst [32] (MOVBloadidx1 [i+4] {s} p idx mem))) + (SHLQconst [40] (MOVBloadidx1 [i+5] {s} p idx mem))) + (SHLQconst [48] (MOVBloadidx1 [i+6] {s} p idx mem))) + (SHLQconst [56] (MOVBloadidx1 [i+7] {s} p idx mem))) -> @x.Block (MOVQload [i] {s} (ADDQ p idx) mem) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 8dd1b15f135a8..2d7aa3bd80bb7 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -10668,6 +10668,118 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { v0.AddArg(mem) return true } + // match: (ORL (ORL (ORL x:(MOVBloadidx1 [i] {s} p idx mem) (SHLLconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) (SHLLconst [16] (MOVBloadidx1 [i+2] {s} p idx mem))) (SHLLconst [24] (MOVBloadidx1 [i+3] {s} p idx mem))) + // cond: + // result: @x.Block (MOVLload [i] {s} (ADDQ p idx) mem) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64ORL { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64ORL { + break + } + x := v_0_0.Args[0] + if x.Op != OpAMD64MOVBloadidx1 { + break + } + i := x.AuxInt + s := x.Aux + p := x.Args[0] + idx := x.Args[1] + mem := x.Args[2] + v_0_0_1 := v_0_0.Args[1] + if v_0_0_1.Op != OpAMD64SHLLconst { + break + } + if v_0_0_1.AuxInt != 8 { + break + } + v_0_0_1_0 := v_0_0_1.Args[0] + if v_0_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_0_1_0.AuxInt != i+1 { + break + } + if v_0_0_1_0.Aux != s { + break + } + if p != v_0_0_1_0.Args[0] { + break + } + if idx != v_0_0_1_0.Args[1] { + break + } + if mem != v_0_0_1_0.Args[2] { + break + } + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SHLLconst { + break + } + if v_0_1.AuxInt != 16 { + break + } + v_0_1_0 := v_0_1.Args[0] + if v_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_1_0.AuxInt != i+2 { + break + } + if v_0_1_0.Aux != s { + break + } + if p != v_0_1_0.Args[0] { + break + } + if idx != v_0_1_0.Args[1] { + break + } + if mem != v_0_1_0.Args[2] { + break + } + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 24 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_1_0.AuxInt != i+3 { + break + } + if v_1_0.Aux != s { + break + } + if p != v_1_0.Args[0] { + break + } + if idx != v_1_0.Args[1] { + break + } + if mem != v_1_0.Args[2] { + break + } + b = x.Block + v0 := b.NewValue0(v.Line, OpAMD64MOVLload, config.fe.TypeUInt32()) + v.reset(OpCopy) + v.AddArg(v0) + v0.AuxInt = i + v0.Aux = s + v1 := b.NewValue0(v.Line, OpAMD64ADDQ, p.Type) + v1.AddArg(p) + v1.AddArg(idx) + v0.AddArg(v1) + v0.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64ORLconst(v *Value, config *Config) bool { @@ -10974,6 +11086,238 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { v0.AddArg(mem) return true } + // match: (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ x:(MOVBloadidx1 [i] {s} p idx mem) (SHLQconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) (SHLQconst [16] (MOVBloadidx1 [i+2] {s} p idx mem))) (SHLQconst [24] (MOVBloadidx1 [i+3] {s} p idx mem))) (SHLQconst [32] (MOVBloadidx1 [i+4] {s} p idx mem))) (SHLQconst [40] (MOVBloadidx1 [i+5] {s} p idx mem))) (SHLQconst [48] (MOVBloadidx1 [i+6] {s} p idx mem))) (SHLQconst [56] (MOVBloadidx1 [i+7] {s} p idx mem))) + // cond: + // result: @x.Block (MOVQload [i] {s} (ADDQ p idx) mem) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64ORQ { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64ORQ { + break + } + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64ORQ { + break + } + v_0_0_0_0 := v_0_0_0.Args[0] + if v_0_0_0_0.Op != OpAMD64ORQ { + break + } + v_0_0_0_0_0 := v_0_0_0_0.Args[0] + if v_0_0_0_0_0.Op != OpAMD64ORQ { + break + } + v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0] + if v_0_0_0_0_0_0.Op != OpAMD64ORQ { + break + } + x := v_0_0_0_0_0_0.Args[0] + if x.Op != OpAMD64MOVBloadidx1 { + break + } + i := x.AuxInt + s := x.Aux + p := x.Args[0] + idx := x.Args[1] + mem := x.Args[2] + v_0_0_0_0_0_0_1 := v_0_0_0_0_0_0.Args[1] + if v_0_0_0_0_0_0_1.Op != OpAMD64SHLQconst { + break + } + if v_0_0_0_0_0_0_1.AuxInt != 8 { + break + } + v_0_0_0_0_0_0_1_0 := v_0_0_0_0_0_0_1.Args[0] + if v_0_0_0_0_0_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_0_0_0_0_0_1_0.AuxInt != i+1 { + break + } + if v_0_0_0_0_0_0_1_0.Aux != s { + break + } + if p != v_0_0_0_0_0_0_1_0.Args[0] { + break + } + if idx != v_0_0_0_0_0_0_1_0.Args[1] { + break + } + if mem != v_0_0_0_0_0_0_1_0.Args[2] { + break + } + v_0_0_0_0_0_1 := v_0_0_0_0_0.Args[1] + if v_0_0_0_0_0_1.Op != OpAMD64SHLQconst { + break + } + if v_0_0_0_0_0_1.AuxInt != 16 { + break + } + v_0_0_0_0_0_1_0 := v_0_0_0_0_0_1.Args[0] + if v_0_0_0_0_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_0_0_0_0_1_0.AuxInt != i+2 { + break + } + if v_0_0_0_0_0_1_0.Aux != s { + break + } + if p != v_0_0_0_0_0_1_0.Args[0] { + break + } + if idx != v_0_0_0_0_0_1_0.Args[1] { + break + } + if mem != v_0_0_0_0_0_1_0.Args[2] { + break + } + v_0_0_0_0_1 := v_0_0_0_0.Args[1] + if v_0_0_0_0_1.Op != OpAMD64SHLQconst { + break + } + if v_0_0_0_0_1.AuxInt != 24 { + break + } + v_0_0_0_0_1_0 := v_0_0_0_0_1.Args[0] + if v_0_0_0_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_0_0_0_1_0.AuxInt != i+3 { + break + } + if v_0_0_0_0_1_0.Aux != s { + break + } + if p != v_0_0_0_0_1_0.Args[0] { + break + } + if idx != v_0_0_0_0_1_0.Args[1] { + break + } + if mem != v_0_0_0_0_1_0.Args[2] { + break + } + v_0_0_0_1 := v_0_0_0.Args[1] + if v_0_0_0_1.Op != OpAMD64SHLQconst { + break + } + if v_0_0_0_1.AuxInt != 32 { + break + } + v_0_0_0_1_0 := v_0_0_0_1.Args[0] + if v_0_0_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_0_0_1_0.AuxInt != i+4 { + break + } + if v_0_0_0_1_0.Aux != s { + break + } + if p != v_0_0_0_1_0.Args[0] { + break + } + if idx != v_0_0_0_1_0.Args[1] { + break + } + if mem != v_0_0_0_1_0.Args[2] { + break + } + v_0_0_1 := v_0_0.Args[1] + if v_0_0_1.Op != OpAMD64SHLQconst { + break + } + if v_0_0_1.AuxInt != 40 { + break + } + v_0_0_1_0 := v_0_0_1.Args[0] + if v_0_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_0_1_0.AuxInt != i+5 { + break + } + if v_0_0_1_0.Aux != s { + break + } + if p != v_0_0_1_0.Args[0] { + break + } + if idx != v_0_0_1_0.Args[1] { + break + } + if mem != v_0_0_1_0.Args[2] { + break + } + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SHLQconst { + break + } + if v_0_1.AuxInt != 48 { + break + } + v_0_1_0 := v_0_1.Args[0] + if v_0_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_0_1_0.AuxInt != i+6 { + break + } + if v_0_1_0.Aux != s { + break + } + if p != v_0_1_0.Args[0] { + break + } + if idx != v_0_1_0.Args[1] { + break + } + if mem != v_0_1_0.Args[2] { + break + } + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLQconst { + break + } + if v_1.AuxInt != 56 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_1_0.AuxInt != i+7 { + break + } + if v_1_0.Aux != s { + break + } + if p != v_1_0.Args[0] { + break + } + if idx != v_1_0.Args[1] { + break + } + if mem != v_1_0.Args[2] { + break + } + b = x.Block + v0 := b.NewValue0(v.Line, OpAMD64MOVQload, config.fe.TypeUInt64()) + v.reset(OpCopy) + v.AddArg(v0) + v0.AuxInt = i + v0.Aux = s + v1 := b.NewValue0(v.Line, OpAMD64ADDQ, p.Type) + v1.AddArg(p) + v1.AddArg(idx) + v0.AddArg(v1) + v0.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64ORQconst(v *Value, config *Config) bool { @@ -11110,6 +11454,58 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool { v0.AddArg(mem) return true } + // match: (ORW x:(MOVBloadidx1 [i] {s} p idx mem) (SHLWconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) + // cond: + // result: @x.Block (MOVWload [i] {s} (ADDQ p idx) mem) + for { + x := v.Args[0] + if x.Op != OpAMD64MOVBloadidx1 { + break + } + i := x.AuxInt + s := x.Aux + p := x.Args[0] + idx := x.Args[1] + mem := x.Args[2] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLWconst { + break + } + if v_1.AuxInt != 8 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVBloadidx1 { + break + } + if v_1_0.AuxInt != i+1 { + break + } + if v_1_0.Aux != s { + break + } + if p != v_1_0.Args[0] { + break + } + if idx != v_1_0.Args[1] { + break + } + if mem != v_1_0.Args[2] { + break + } + b = x.Block + v0 := b.NewValue0(v.Line, OpAMD64MOVWload, config.fe.TypeUInt16()) + v.reset(OpCopy) + v.AddArg(v0) + v0.AuxInt = i + v0.Aux = s + v1 := b.NewValue0(v.Line, OpAMD64ADDQ, p.Type) + v1.AddArg(p) + v1.AddArg(idx) + v0.AddArg(v1) + v0.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64ORWconst(v *Value, config *Config) bool {