Skip to content

Commit 01f7f57

Browse files
committed
[dev.simd] cmd/compile, simd: add variable Permute
This CL also added some tests for them. This CL is generated by CL 687919. Change-Id: I9ddd2cd23bb98ecca91bfbeaffd62faa4bd85e0d Reviewed-on: https://go-review.googlesource.com/c/go/+/687939 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
1 parent f5f4275 commit 01f7f57

File tree

11 files changed

+4385
-0
lines changed

11 files changed

+4385
-0
lines changed

src/cmd/compile/internal/amd64/simdssa.go

Lines changed: 96 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,114 @@
985985
(PairwiseSubUint16x16 ...) => (VPHSUBW256 ...)
986986
(PairwiseSubUint32x4 ...) => (VPHSUBD128 ...)
987987
(PairwiseSubUint32x8 ...) => (VPHSUBD256 ...)
988+
(PermuteFloat32x8 ...) => (VPERMPS256 ...)
989+
(PermuteFloat32x16 ...) => (VPERMPS512 ...)
990+
(PermuteFloat64x4 ...) => (VPERMPD256 ...)
991+
(PermuteFloat64x8 ...) => (VPERMPD512 ...)
992+
(PermuteInt8x16 ...) => (VPERMB128 ...)
993+
(PermuteInt8x32 ...) => (VPERMB256 ...)
994+
(PermuteInt8x64 ...) => (VPERMB512 ...)
995+
(PermuteInt16x8 ...) => (VPERMW128 ...)
996+
(PermuteInt16x16 ...) => (VPERMW256 ...)
997+
(PermuteInt16x32 ...) => (VPERMW512 ...)
998+
(PermuteInt32x8 ...) => (VPERMD256 ...)
999+
(PermuteInt32x16 ...) => (VPERMD512 ...)
1000+
(PermuteInt64x4 ...) => (VPERMQ256 ...)
1001+
(PermuteInt64x8 ...) => (VPERMQ512 ...)
1002+
(PermuteUint8x16 ...) => (VPERMB128 ...)
1003+
(PermuteUint8x32 ...) => (VPERMB256 ...)
1004+
(PermuteUint8x64 ...) => (VPERMB512 ...)
1005+
(PermuteUint16x8 ...) => (VPERMW128 ...)
1006+
(PermuteUint16x16 ...) => (VPERMW256 ...)
1007+
(PermuteUint16x32 ...) => (VPERMW512 ...)
1008+
(PermuteUint32x8 ...) => (VPERMD256 ...)
1009+
(PermuteUint32x16 ...) => (VPERMD512 ...)
1010+
(PermuteUint64x4 ...) => (VPERMQ256 ...)
1011+
(PermuteUint64x8 ...) => (VPERMQ512 ...)
1012+
(Permute2Float32x4 ...) => (VPERMI2PS128 ...)
1013+
(Permute2Float32x8 ...) => (VPERMI2PS256 ...)
1014+
(Permute2Float32x16 ...) => (VPERMI2PS512 ...)
1015+
(Permute2Float64x2 ...) => (VPERMI2PD128 ...)
1016+
(Permute2Float64x4 ...) => (VPERMI2PD256 ...)
1017+
(Permute2Float64x8 ...) => (VPERMI2PD512 ...)
1018+
(Permute2Int8x16 ...) => (VPERMI2B128 ...)
1019+
(Permute2Int8x32 ...) => (VPERMI2B256 ...)
1020+
(Permute2Int8x64 ...) => (VPERMI2B512 ...)
1021+
(Permute2Int16x8 ...) => (VPERMI2W128 ...)
1022+
(Permute2Int16x16 ...) => (VPERMI2W256 ...)
1023+
(Permute2Int16x32 ...) => (VPERMI2W512 ...)
1024+
(Permute2Int32x4 ...) => (VPERMI2D128 ...)
1025+
(Permute2Int32x8 ...) => (VPERMI2D256 ...)
1026+
(Permute2Int32x16 ...) => (VPERMI2D512 ...)
1027+
(Permute2Int64x2 ...) => (VPERMI2Q128 ...)
1028+
(Permute2Int64x4 ...) => (VPERMI2Q256 ...)
1029+
(Permute2Int64x8 ...) => (VPERMI2Q512 ...)
1030+
(Permute2Uint8x16 ...) => (VPERMI2B128 ...)
1031+
(Permute2Uint8x32 ...) => (VPERMI2B256 ...)
1032+
(Permute2Uint8x64 ...) => (VPERMI2B512 ...)
1033+
(Permute2Uint16x8 ...) => (VPERMI2W128 ...)
1034+
(Permute2Uint16x16 ...) => (VPERMI2W256 ...)
1035+
(Permute2Uint16x32 ...) => (VPERMI2W512 ...)
1036+
(Permute2Uint32x4 ...) => (VPERMI2D128 ...)
1037+
(Permute2Uint32x8 ...) => (VPERMI2D256 ...)
1038+
(Permute2Uint32x16 ...) => (VPERMI2D512 ...)
1039+
(Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
1040+
(Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
1041+
(Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
1042+
(Permute2MaskedFloat32x4 x y z mask) => (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
1043+
(Permute2MaskedFloat32x8 x y z mask) => (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
1044+
(Permute2MaskedFloat32x16 x y z mask) => (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
1045+
(Permute2MaskedFloat64x2 x y z mask) => (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
1046+
(Permute2MaskedFloat64x4 x y z mask) => (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
1047+
(Permute2MaskedFloat64x8 x y z mask) => (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
1048+
(Permute2MaskedInt8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
1049+
(Permute2MaskedInt8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
1050+
(Permute2MaskedInt8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
1051+
(Permute2MaskedInt16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
1052+
(Permute2MaskedInt16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
1053+
(Permute2MaskedInt16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
1054+
(Permute2MaskedInt32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
1055+
(Permute2MaskedInt32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
1056+
(Permute2MaskedInt32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
1057+
(Permute2MaskedInt64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
1058+
(Permute2MaskedInt64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
1059+
(Permute2MaskedInt64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
1060+
(Permute2MaskedUint8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
1061+
(Permute2MaskedUint8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
1062+
(Permute2MaskedUint8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
1063+
(Permute2MaskedUint16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
1064+
(Permute2MaskedUint16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
1065+
(Permute2MaskedUint16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
1066+
(Permute2MaskedUint32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
1067+
(Permute2MaskedUint32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
1068+
(Permute2MaskedUint32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
1069+
(Permute2MaskedUint64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
1070+
(Permute2MaskedUint64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
1071+
(Permute2MaskedUint64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
1072+
(PermuteMaskedFloat32x8 x y mask) => (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
1073+
(PermuteMaskedFloat32x16 x y mask) => (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
1074+
(PermuteMaskedFloat64x4 x y mask) => (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
1075+
(PermuteMaskedFloat64x8 x y mask) => (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
1076+
(PermuteMaskedInt8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
1077+
(PermuteMaskedInt8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
1078+
(PermuteMaskedInt8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
1079+
(PermuteMaskedInt16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
1080+
(PermuteMaskedInt16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
1081+
(PermuteMaskedInt16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
1082+
(PermuteMaskedInt32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
1083+
(PermuteMaskedInt32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
1084+
(PermuteMaskedInt64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
1085+
(PermuteMaskedInt64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
1086+
(PermuteMaskedUint8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
1087+
(PermuteMaskedUint8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
1088+
(PermuteMaskedUint8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
1089+
(PermuteMaskedUint16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
1090+
(PermuteMaskedUint16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
1091+
(PermuteMaskedUint16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
1092+
(PermuteMaskedUint32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
1093+
(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
1094+
(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
1095+
(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
9881096
(PopCountInt8x16 ...) => (VPOPCNTB128 ...)
9891097
(PopCountInt8x32 ...) => (VPOPCNTB256 ...)
9901098
(PopCountInt8x64 ...) => (VPOPCNTB512 ...)

0 commit comments

Comments
 (0)