From 9761ad2901df6f42ba683e1491cd411bc3f53af2 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 1 Apr 2025 00:17:14 +0000 Subject: [PATCH] Implement breakpoint disassembly support for Intel APX Teach the amd64 breakpoint disassembler about APX, specifically the REX2 and extended EVEX encodings. Update the tools to work with newer versions of gcc/gdb, such as handling new gdb output format in the parsing regular expressions. Due to these newer versions, there are differences in the non-APX tables, apparently due to gcc/gdb bug fixes and improvements (e.g., supporting instructions previously unsupported). Note that the APX code is untested due to lack of APX hardware. Also, the Windows SDK CONTEXT record does not define APX extended GPR (eGPR) registers yet, so accessing those registers is disabled. The tables were generated using the following versions of gcc/gdb on Ubuntu 24.04.2 LTS, in WSL2: ``` gcc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 GNU gdb (Ubuntu 15.0.50.20240403-0ubuntu1) 15.0.50.20240403-git ``` Details: 1. Change the "createOpcodes.cpp" tool to generate more varieties of possible instructions, to include encoding forms for REX2 and extended EVEX. 2. Change createOpcodes to always generate 16 bytes of codes. Previously, the parser looked for a "58" followed by "59 pop" to indicate the end of an instruction sequence. This failed in various cases. Note that the longest legal x86 instruction sequence is 15 bytes, as defined by the architecture. 3. Update the parser and table generation tool (Amd64InstructionTableGenerator.cs) to be able to parse REX2 and extended EVEX instructions, and generate a new EVEX table for EVEX map 4. 4. The parser was updated to handle new gdb disassembly formats, such as different whitespace usage (spaces versus tabs), and using the "BCST" tag to indicate EVEX embedded broadcast. 5. The native walker was updated to understand the new tables, including when to use them (thus, it needs to recognize REX2 and extended EVEX formats). 6. Fixed bugs in existing AVX-512 (EVEX) handling of `b`, `L'L`, and `pp` bits: they were being read from the wrong prefix byte. 7. There seem to be a couple existing bugs in `NativeWalker::Decode` which I annotated but did not feel confident fixing: a. the loop to read and process instruction prefixes only reads a single prefix. Thus, a case like 0x66 (operand size) followed by 0x40 (REX) improperly assumes the REX byte is the instruction opcode. b. if the instruction opcode (after the prefix) is 0xcc, `DebuggerController::GetPatchedOpcode()` is called to read the actual opcode, but it uses the wrong address to do so. --- src/coreclr/debug/ee/amd64/amd64InstrDecode.h | 1711 +++++++++++++---- .../Amd64InstructionTableGenerator.cs | 216 ++- .../gen_amd64InstrDecode/createOpcodes.cpp | 242 ++- src/coreclr/debug/ee/amd64/walker.cpp | 279 ++- 4 files changed, 1930 insertions(+), 518 deletions(-) diff --git a/src/coreclr/debug/ee/amd64/amd64InstrDecode.h b/src/coreclr/debug/ee/amd64/amd64InstrDecode.h index 3c2da5c40ea342..8aa398e018f7c3 100644 --- a/src/coreclr/debug/ee/amd64/amd64InstrDecode.h +++ b/src/coreclr/debug/ee/amd64/amd64InstrDecode.h @@ -30,12 +30,21 @@ namespace Amd64InstrDecode // I4B // Instruction includes 4 bytes of immediates // I8B // Instruction includes 8 bytes of immediates // Unknown // Instruction samples did not include a modrm configured to produce RIP addressing - // L // Flags depend on L bit in encoding. L__or_ - // LL // Flags depend on L'L bits in EVEX encoding. LL___ - // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector - // W // Flags depend on W bit in encoding. W__or_ - // P // Flags depend on OpSize prefix for encoding. P__or_ - // WP // Flags depend on W bit in encoding and OpSize prefix. WP__or__or_ + // L // Flags depend on L bit in encoding. + // // L__or_ + // // L__or_ + // LL // Flags depend on L'L bits in EVEX encoding. + // // LL___ + // // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector + // W // Flags depend on W bit in encoding. + // // W__or_ + // // W__or_ + // P // Flags depend on OpSize prefix for encoding. + // // P__or_ + // // P__or_ + // WP // Flags depend on W bit in encoding and OpSize prefix. + // // WP__or__or_ + // // WP__or__or_ // WLL // Flags depend on W and L'L bits. // // WLL____or___ // bLL // Flags depend on EVEX.b and L'L bits. @@ -53,12 +62,12 @@ namespace Amd64InstrDecode I3B, I4B, I8B, - M1st_bLL_M4B_M16B_M32B_M64B, - M1st_bLL_M8B_M16B_M32B_M64B, M1st_I1B_L_M16B_or_M8B, M1st_I1B_LL_M8B_M16B_M32B, + M1st_I1B_W_M8B_or_M2B, M1st_I1B_W_M8B_or_M4B, M1st_I1B_WP_M8B_or_M4B_or_M2B, + M1st_I4B_W_M8B_or_M4B, M1st_L_M32B_or_M16B, M1st_LL_M16B_M32B_M64B, M1st_LL_M2B_M4B_M8B, @@ -76,6 +85,7 @@ namespace Amd64InstrDecode M1st_M8B, M1st_MUnknown, M1st_W_M4B_or_M1B, + M1st_W_M8B_I4B_or_M2B_I2B, M1st_W_M8B_or_M2B, M1st_W_M8B_or_M4B, M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B, @@ -88,6 +98,7 @@ namespace Amd64InstrDecode MOnly_MUnknown, MOnly_P_M6B_or_M4B, MOnly_W_M16B_or_M8B, + MOnly_W_M8B_or_M2B, MOnly_W_M8B_or_M4B, MOnly_WP_M8B_or_M4B_or_M2B, MOnly_WP_M8B_or_M8B_or_M2B, @@ -138,14 +149,14 @@ namespace Amd64InstrDecode MOp_WP_M8B_or_M4B_or_M2B, WP_I4B_or_I4B_or_I2B, WP_I8B_or_I4B_or_I2B, - Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location in encoded in lower bits + Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location is encoded in lower bits. }; - // The following instrForm maps correspond to the amd64 instr maps - // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics - // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp - // - For Vex* the pp is directly included in the encoding - // - For the Secondary, F38, and F3A pages the pp is not defined in the encoding, but affects instr form. + // The following instrForm maps correspond to the amd64 instruction maps. + // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics. + // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp. For example, 0x123 is opcode 0x12, pp=0x3. + // - For Vex* and EVEX the pp is directly included in the encoding + // - For the Secondary (0F), 0F 38, and 0F 3A pages the pp is not defined in the encoding, but affects instruction form. // - pp = 0 implies no prefix. // - pp = 1 implies 0x66 OpSize prefix only. // - pp = 2 implies 0xF3 prefix. @@ -153,9 +164,9 @@ namespace Amd64InstrDecode // - For the primary map, pp is not used and is always 0 in the comments. - // Instruction which change forms based on modrm.reg are encoded in this extension table. - // Since there are 8 modrm.reg values, they occur is groups of 8. - // Each group is referenced from the other tables below using Extension|(index >> 3). + // Instructions which change forms based on modrm.reg are encoded in this extension table. + // Since there are 8 modrm.reg values, they occur in groups of 8. + // Each group is referenced from the other tables below using (Extension|(index >> 3)). static const InstrForm instrFormExtension[217] { MOnly_M4B, // Primary:0xd90/0 fld @@ -252,32 +263,32 @@ namespace Amd64InstrDecode MOnly_M1B, // Secondary:0x180/3 prefetcht2 MOnly_W_M8B_or_M4B, // Secondary:0x180/4 nop MOnly_W_M8B_or_M4B, // Secondary:0x180/5 nop - MOnly_W_M8B_or_M4B, // Secondary:0x180/6 nop - MOnly_W_M8B_or_M4B, // Secondary:0x180/7 nop + MOnly_M1B, // Secondary:0x180/6 prefetchit1 + MOnly_M1B, // Secondary:0x180/7 prefetchit0 MOnly_M1B, // Secondary:0x181/0 prefetchnta MOnly_M1B, // Secondary:0x181/1 prefetcht0 MOnly_M1B, // Secondary:0x181/2 prefetcht1 MOnly_M1B, // Secondary:0x181/3 prefetcht2 - MOnly_M2B, // Secondary:0x181/4 nop - MOnly_M2B, // Secondary:0x181/5 nop - MOnly_M2B, // Secondary:0x181/6 nop - MOnly_M2B, // Secondary:0x181/7 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/4 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/5 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/6 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/7 nop MOnly_M1B, // Secondary:0x182/0 prefetchnta MOnly_M1B, // Secondary:0x182/1 prefetcht0 MOnly_M1B, // Secondary:0x182/2 prefetcht1 MOnly_M1B, // Secondary:0x182/3 prefetcht2 - MOnly_M4B, // Secondary:0x182/4 nop - MOnly_M4B, // Secondary:0x182/5 nop - MOnly_M4B, // Secondary:0x182/6 nop - MOnly_M4B, // Secondary:0x182/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/7 nop MOnly_M1B, // Secondary:0x183/0 prefetchnta MOnly_M1B, // Secondary:0x183/1 prefetcht0 MOnly_M1B, // Secondary:0x183/2 prefetcht1 MOnly_M1B, // Secondary:0x183/3 prefetcht2 - MOnly_M4B, // Secondary:0x183/4 nop - MOnly_M4B, // Secondary:0x183/5 nop - MOnly_M4B, // Secondary:0x183/6 nop - MOnly_M4B, // Secondary:0x183/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/7 nop MOnly_M1B, // Secondary:0x1c0/0 cldemote MOnly_W_M8B_or_M4B, // Secondary:0x1c0/1 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/2 nop @@ -286,30 +297,30 @@ namespace Amd64InstrDecode MOnly_W_M8B_or_M4B, // Secondary:0x1c0/5 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/6 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/7 nop - MOnly_M2B, // Secondary:0x1c1/0 nop - MOnly_M2B, // Secondary:0x1c1/1 nop - MOnly_M2B, // Secondary:0x1c1/2 nop - MOnly_M2B, // Secondary:0x1c1/3 nop - MOnly_M2B, // Secondary:0x1c1/4 nop - MOnly_M2B, // Secondary:0x1c1/5 nop - MOnly_M2B, // Secondary:0x1c1/6 nop - MOnly_M2B, // Secondary:0x1c1/7 nop - MOnly_M4B, // Secondary:0x1c2/0 nop - MOnly_M4B, // Secondary:0x1c2/1 nop - MOnly_M4B, // Secondary:0x1c2/2 nop - MOnly_M4B, // Secondary:0x1c2/3 nop - MOnly_M4B, // Secondary:0x1c2/4 nop - MOnly_M4B, // Secondary:0x1c2/5 nop - MOnly_M4B, // Secondary:0x1c2/6 nop - MOnly_M4B, // Secondary:0x1c2/7 nop - MOnly_M4B, // Secondary:0x1c3/0 nop - MOnly_M4B, // Secondary:0x1c3/1 nop - MOnly_M4B, // Secondary:0x1c3/2 nop - MOnly_M4B, // Secondary:0x1c3/3 nop - MOnly_M4B, // Secondary:0x1c3/4 nop - MOnly_M4B, // Secondary:0x1c3/5 nop - MOnly_M4B, // Secondary:0x1c3/6 nop - MOnly_M4B, // Secondary:0x1c3/7 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/0 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/1 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/2 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/3 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/4 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/5 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/6 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/0 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/1 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/2 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/3 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/0 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/1 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/2 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/3 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/7 nop MOnly_MUnknown, // Secondary:0xae0/0 fxsave,fxsave64 MOnly_MUnknown, // Secondary:0xae0/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae0/2 ldmxcsr @@ -318,24 +329,24 @@ namespace Amd64InstrDecode MOnly_MUnknown, // Secondary:0xae0/5 xrstor,xrstor64 MOnly_MUnknown, // Secondary:0xae0/6 xsaveopt,xsaveopt64 MOnly_M1B, // Secondary:0xae0/7 clflush - MOnly_MUnknown, // Secondary:0xae1/0 fxsave - MOnly_MUnknown, // Secondary:0xae1/1 fxrstor + MOnly_MUnknown, // Secondary:0xae1/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae1/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae1/2 ldmxcsr MOnly_M4B, // Secondary:0xae1/3 stmxcsr None, None, MOnly_M1B, // Secondary:0xae1/6 clwb MOnly_M1B, // Secondary:0xae1/7 clflushopt - MOnly_MUnknown, // Secondary:0xae2/0 fxsave - MOnly_MUnknown, // Secondary:0xae2/1 fxrstor + MOnly_MUnknown, // Secondary:0xae2/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae2/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae2/2 ldmxcsr MOnly_M4B, // Secondary:0xae2/3 stmxcsr - MOnly_M4B, // Secondary:0xae2/4 ptwrite + MOnly_W_M8B_or_M4B, // Secondary:0xae2/4 ptwrite None, MOnly_M8B, // Secondary:0xae2/6 clrssbsy None, - MOnly_MUnknown, // Secondary:0xae3/0 fxsave - MOnly_MUnknown, // Secondary:0xae3/1 fxrstor + MOnly_MUnknown, // Secondary:0xae3/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae3/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae3/2 ldmxcsr MOnly_M4B, // Secondary:0xae3/3 stmxcsr None, @@ -351,27 +362,27 @@ namespace Amd64InstrDecode MOnly_M8B, // Secondary:0xc70/6 vmptrld MOnly_M8B, // Secondary:0xc70/7 vmptrst None, - MOnly_M8B, // Secondary:0xc71/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc71/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc71/3 xrstors - MOnly_MUnknown, // Secondary:0xc71/4 xsavec - MOnly_MUnknown, // Secondary:0xc71/5 xsaves + MOnly_MUnknown, // Secondary:0xc71/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc71/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc71/5 xsaves,xsaves64 MOnly_M8B, // Secondary:0xc71/6 vmclear MOnly_M8B, // Secondary:0xc71/7 vmptrst None, - MOnly_M8B, // Secondary:0xc72/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc72/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc72/3 xrstors - MOnly_MUnknown, // Secondary:0xc72/4 xsavec - MOnly_MUnknown, // Secondary:0xc72/5 xsaves + MOnly_MUnknown, // Secondary:0xc72/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc72/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc72/5 xsaves,xsaves64 MOnly_M8B, // Secondary:0xc72/6 vmxon MOnly_M8B, // Secondary:0xc72/7 vmptrst None, - MOnly_M8B, // Secondary:0xc73/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc73/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc73/3 xrstors - MOnly_MUnknown, // Secondary:0xc73/4 xsavec - MOnly_MUnknown, // Secondary:0xc73/5 xsaves + MOnly_MUnknown, // Secondary:0xc73/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc73/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc73/5 xsaves,xsaves64 None, MOnly_M8B, // Secondary:0xc73/7 vmptrst }; @@ -458,22 +469,22 @@ namespace Amd64InstrDecode None, // 0x4d0 None, // 0x4e0 None, // 0x4f0 - None, // 0x500 push - None, // 0x510 push - None, // 0x520 push - None, // 0x530 push - None, // 0x540 push - None, // 0x550 push - None, // 0x560 push - None, // 0x570 push - None, // 0x580 pop - None, // 0x590 pop - None, // 0x5a0 pop - None, // 0x5b0 pop - None, // 0x5c0 pop - None, // 0x5d0 pop - None, // 0x5e0 pop - None, // 0x5f0 pop + None, // 0x500 push,pushp + None, // 0x510 push,pushp + None, // 0x520 push,pushp + None, // 0x530 push,pushp + None, // 0x540 push,pushp + None, // 0x550 push,pushp + None, // 0x560 push,pushp + None, // 0x570 push,pushp + None, // 0x580 pop,popp + None, // 0x590 pop,popp + None, // 0x5a0 pop,popp + None, // 0x5b0 pop,popp + None, // 0x5c0 pop,popp + None, // 0x5d0 pop,popp + None, // 0x5e0 pop,popp + None, // 0x5f0 pop,popp None, // 0x600 None, // 0x610 None, // 0x620 @@ -539,7 +550,7 @@ namespace Amd64InstrDecode None, // 0x9e0 sahf None, // 0x9f0 lahf I8B, // 0xa00 movabs - I8B, // 0xa10 movabs + I8B, // 0xa10 jmpabs,movabs I8B, // 0xa20 movabs I8B, // 0xa30 movabs None, // 0xa40 movs @@ -641,7 +652,7 @@ namespace Amd64InstrDecode MOnly_M2B, // 0x000 lldt,ltr,sldt,str,verr,verw MOnly_M2B, // 0x001 lldt,ltr,sldt,str,verr,verw MOnly_M2B, // 0x002 lldt,ltr,sldt,str,verr,verw - MOnly_M2B, // 0x003 lldt,ltr,sldt,str,verr,verw + MOnly_M2B, // 0x003 lkgs,lldt,ltr,sldt,str,verr,verw InstrForm(int(Extension)|0x07), // 0x010 InstrForm(int(Extension)|0x08), // 0x011 InstrForm(int(Extension)|0x09), // 0x012 @@ -667,9 +678,9 @@ namespace Amd64InstrDecode None, // 0x062 clts None, // 0x063 clts None, // 0x070 sysretd,sysretq - None, // 0x071 sysretd - None, // 0x072 sysretd - None, // 0x073 sysretd + None, // 0x071 sysretd,sysretq + None, // 0x072 sysretd,sysretq + None, // 0x073 sysretd,sysretq None, // 0x080 invd None, // 0x081 invd None, // 0x082 invd @@ -739,9 +750,9 @@ namespace Amd64InstrDecode InstrForm(int(Extension)|0x0d), // 0x182 InstrForm(int(Extension)|0x0e), // 0x183 MOnly_W_M8B_or_M4B, // 0x190 nop - MOnly_M2B, // 0x191 nop - MOnly_M4B, // 0x192 nop - MOnly_M4B, // 0x193 nop + MOnly_W_M8B_or_M2B, // 0x191 nop + MOnly_W_M8B_or_M4B, // 0x192 nop + MOnly_W_M8B_or_M4B, // 0x193 nop None, // 0x1a0 MOp_MUnknown, // 0x1a1 bndmov MOp_MUnknown, // 0x1a2 bndcl @@ -755,17 +766,17 @@ namespace Amd64InstrDecode InstrForm(int(Extension)|0x11), // 0x1c2 InstrForm(int(Extension)|0x12), // 0x1c3 MOnly_W_M8B_or_M4B, // 0x1d0 nop - MOnly_M2B, // 0x1d1 nop - MOnly_M4B, // 0x1d2 nop - MOnly_M4B, // 0x1d3 nop + MOnly_W_M8B_or_M2B, // 0x1d1 nop + MOnly_W_M8B_or_M4B, // 0x1d2 nop + MOnly_W_M8B_or_M4B, // 0x1d3 nop MOnly_W_M8B_or_M4B, // 0x1e0 nop - MOnly_M2B, // 0x1e1 nop - MOnly_M4B, // 0x1e2 nop - MOnly_M4B, // 0x1e3 nop + MOnly_W_M8B_or_M2B, // 0x1e1 nop + MOnly_W_M8B_or_M4B, // 0x1e2 nop + MOnly_W_M8B_or_M4B, // 0x1e3 nop MOnly_W_M8B_or_M4B, // 0x1f0 nop - MOnly_M2B, // 0x1f1 nop - MOnly_M4B, // 0x1f2 nop - MOnly_M4B, // 0x1f3 nop + MOnly_W_M8B_or_M2B, // 0x1f1 nop + MOnly_W_M8B_or_M4B, // 0x1f2 nop + MOnly_W_M8B_or_M4B, // 0x1f3 nop I1B, // 0x200 mov I1B, // 0x201 mov I1B, // 0x202 mov @@ -808,8 +819,8 @@ namespace Amd64InstrDecode None, // 0x293 MOp_M8B, // 0x2a0 cvtpi2ps MOp_M8B, // 0x2a1 cvtpi2pd - MOp_M4B, // 0x2a2 cvtsi2ss - MOp_M4B, // 0x2a3 cvtsi2sd + MOp_W_M8B_or_M4B, // 0x2a2 cvtsi2ss + MOp_W_M8B_or_M4B, // 0x2a3 cvtsi2sd M1st_M16B, // 0x2b0 movntps M1st_M16B, // 0x2b1 movntpd M1st_M4B, // 0x2b2 movntss @@ -851,9 +862,9 @@ namespace Amd64InstrDecode None, // 0x342 sysenter None, // 0x343 sysenter None, // 0x350 sysexitd,sysexitq - None, // 0x351 sysexitd - None, // 0x352 sysexitd - None, // 0x353 sysexitd + None, // 0x351 sysexitd,sysexitq + None, // 0x352 sysexitd,sysexitq + None, // 0x353 sysexitd,sysexitq None, // 0x360 None, // 0x361 None, // 0x362 @@ -895,69 +906,69 @@ namespace Amd64InstrDecode None, // 0x3f2 None, // 0x3f3 MOp_W_M8B_or_M4B, // 0x400 cmovo - MOp_M2B, // 0x401 cmovo - MOp_M4B, // 0x402 cmovo - MOp_M4B, // 0x403 cmovo + MOp_W_M8B_or_M2B, // 0x401 cmovo + MOp_W_M8B_or_M4B, // 0x402 cmovo + MOp_W_M8B_or_M4B, // 0x403 cmovo MOp_W_M8B_or_M4B, // 0x410 cmovno - MOp_M2B, // 0x411 cmovno - MOp_M4B, // 0x412 cmovno - MOp_M4B, // 0x413 cmovno + MOp_W_M8B_or_M2B, // 0x411 cmovno + MOp_W_M8B_or_M4B, // 0x412 cmovno + MOp_W_M8B_or_M4B, // 0x413 cmovno MOp_W_M8B_or_M4B, // 0x420 cmovb - MOp_M2B, // 0x421 cmovb - MOp_M4B, // 0x422 cmovb - MOp_M4B, // 0x423 cmovb + MOp_W_M8B_or_M2B, // 0x421 cmovb + MOp_W_M8B_or_M4B, // 0x422 cmovb + MOp_W_M8B_or_M4B, // 0x423 cmovb MOp_W_M8B_or_M4B, // 0x430 cmovae - MOp_M2B, // 0x431 cmovae - MOp_M4B, // 0x432 cmovae - MOp_M4B, // 0x433 cmovae + MOp_W_M8B_or_M2B, // 0x431 cmovae + MOp_W_M8B_or_M4B, // 0x432 cmovae + MOp_W_M8B_or_M4B, // 0x433 cmovae MOp_W_M8B_or_M4B, // 0x440 cmove - MOp_M2B, // 0x441 cmove - MOp_M4B, // 0x442 cmove - MOp_M4B, // 0x443 cmove + MOp_W_M8B_or_M2B, // 0x441 cmove + MOp_W_M8B_or_M4B, // 0x442 cmove + MOp_W_M8B_or_M4B, // 0x443 cmove MOp_W_M8B_or_M4B, // 0x450 cmovne - MOp_M2B, // 0x451 cmovne - MOp_M4B, // 0x452 cmovne - MOp_M4B, // 0x453 cmovne + MOp_W_M8B_or_M2B, // 0x451 cmovne + MOp_W_M8B_or_M4B, // 0x452 cmovne + MOp_W_M8B_or_M4B, // 0x453 cmovne MOp_W_M8B_or_M4B, // 0x460 cmovbe - MOp_M2B, // 0x461 cmovbe - MOp_M4B, // 0x462 cmovbe - MOp_M4B, // 0x463 cmovbe + MOp_W_M8B_or_M2B, // 0x461 cmovbe + MOp_W_M8B_or_M4B, // 0x462 cmovbe + MOp_W_M8B_or_M4B, // 0x463 cmovbe MOp_W_M8B_or_M4B, // 0x470 cmova - MOp_M2B, // 0x471 cmova - MOp_M4B, // 0x472 cmova - MOp_M4B, // 0x473 cmova + MOp_W_M8B_or_M2B, // 0x471 cmova + MOp_W_M8B_or_M4B, // 0x472 cmova + MOp_W_M8B_or_M4B, // 0x473 cmova MOp_W_M8B_or_M4B, // 0x480 cmovs - MOp_M2B, // 0x481 cmovs - MOp_M4B, // 0x482 cmovs - MOp_M4B, // 0x483 cmovs + MOp_W_M8B_or_M2B, // 0x481 cmovs + MOp_W_M8B_or_M4B, // 0x482 cmovs + MOp_W_M8B_or_M4B, // 0x483 cmovs MOp_W_M8B_or_M4B, // 0x490 cmovns - MOp_M2B, // 0x491 cmovns - MOp_M4B, // 0x492 cmovns - MOp_M4B, // 0x493 cmovns + MOp_W_M8B_or_M2B, // 0x491 cmovns + MOp_W_M8B_or_M4B, // 0x492 cmovns + MOp_W_M8B_or_M4B, // 0x493 cmovns MOp_W_M8B_or_M4B, // 0x4a0 cmovp - MOp_M2B, // 0x4a1 cmovp - MOp_M4B, // 0x4a2 cmovp - MOp_M4B, // 0x4a3 cmovp + MOp_W_M8B_or_M2B, // 0x4a1 cmovp + MOp_W_M8B_or_M4B, // 0x4a2 cmovp + MOp_W_M8B_or_M4B, // 0x4a3 cmovp MOp_W_M8B_or_M4B, // 0x4b0 cmovnp - MOp_M2B, // 0x4b1 cmovnp - MOp_M4B, // 0x4b2 cmovnp - MOp_M4B, // 0x4b3 cmovnp + MOp_W_M8B_or_M2B, // 0x4b1 cmovnp + MOp_W_M8B_or_M4B, // 0x4b2 cmovnp + MOp_W_M8B_or_M4B, // 0x4b3 cmovnp MOp_W_M8B_or_M4B, // 0x4c0 cmovl - MOp_M2B, // 0x4c1 cmovl - MOp_M4B, // 0x4c2 cmovl - MOp_M4B, // 0x4c3 cmovl + MOp_W_M8B_or_M2B, // 0x4c1 cmovl + MOp_W_M8B_or_M4B, // 0x4c2 cmovl + MOp_W_M8B_or_M4B, // 0x4c3 cmovl MOp_W_M8B_or_M4B, // 0x4d0 cmovge - MOp_M2B, // 0x4d1 cmovge - MOp_M4B, // 0x4d2 cmovge - MOp_M4B, // 0x4d3 cmovge + MOp_W_M8B_or_M2B, // 0x4d1 cmovge + MOp_W_M8B_or_M4B, // 0x4d2 cmovge + MOp_W_M8B_or_M4B, // 0x4d3 cmovge MOp_W_M8B_or_M4B, // 0x4e0 cmovle - MOp_M2B, // 0x4e1 cmovle - MOp_M4B, // 0x4e2 cmovle - MOp_M4B, // 0x4e3 cmovle + MOp_W_M8B_or_M2B, // 0x4e1 cmovle + MOp_W_M8B_or_M4B, // 0x4e2 cmovle + MOp_W_M8B_or_M4B, // 0x4e3 cmovle MOp_W_M8B_or_M4B, // 0x4f0 cmovg - MOp_M2B, // 0x4f1 cmovg - MOp_M4B, // 0x4f2 cmovg - MOp_M4B, // 0x4f3 cmovg + MOp_W_M8B_or_M2B, // 0x4f1 cmovg + MOp_W_M8B_or_M4B, // 0x4f2 cmovg + MOp_W_M8B_or_M4B, // 0x4f3 cmovg None, // 0x500 None, // 0x501 None, // 0x502 @@ -1079,7 +1090,7 @@ namespace Amd64InstrDecode None, // 0x6d2 None, // 0x6d3 MOp_W_M8B_or_M4B, // 0x6e0 movd,movq - MOp_M4B, // 0x6e1 movd + MOp_W_M8B_or_M4B, // 0x6e1 movd,movq None, // 0x6e2 None, // 0x6e3 MOp_M8B, // 0x6f0 movq @@ -1143,7 +1154,7 @@ namespace Amd64InstrDecode None, // 0x7d2 MOp_M16B, // 0x7d3 hsubps M1st_W_M8B_or_M4B, // 0x7e0 movd,movq - M1st_M4B, // 0x7e1 movd + M1st_W_M8B_or_M4B, // 0x7e1 movd,movq MOp_M8B, // 0x7e2 movq None, // 0x7e3 M1st_M8B, // 0x7f0 movq @@ -1151,67 +1162,67 @@ namespace Amd64InstrDecode M1st_M16B, // 0x7f2 movdqu None, // 0x7f3 I4B, // 0x800 jo - I2B, // 0x801 jo + WP_I4B_or_I4B_or_I2B, // 0x801 jo I4B, // 0x802 jo I4B, // 0x803 jo I4B, // 0x810 jno - I2B, // 0x811 jno + WP_I4B_or_I4B_or_I2B, // 0x811 jno I4B, // 0x812 jno I4B, // 0x813 jno I4B, // 0x820 jb - I2B, // 0x821 jb + WP_I4B_or_I4B_or_I2B, // 0x821 jb I4B, // 0x822 jb I4B, // 0x823 jb I4B, // 0x830 jae - I2B, // 0x831 jae + WP_I4B_or_I4B_or_I2B, // 0x831 jae I4B, // 0x832 jae I4B, // 0x833 jae I4B, // 0x840 je - I2B, // 0x841 je + WP_I4B_or_I4B_or_I2B, // 0x841 je I4B, // 0x842 je I4B, // 0x843 je I4B, // 0x850 jne - I2B, // 0x851 jne + WP_I4B_or_I4B_or_I2B, // 0x851 jne I4B, // 0x852 jne I4B, // 0x853 jne I4B, // 0x860 jbe - I2B, // 0x861 jbe + WP_I4B_or_I4B_or_I2B, // 0x861 jbe I4B, // 0x862 jbe I4B, // 0x863 jbe I4B, // 0x870 ja - I2B, // 0x871 ja + WP_I4B_or_I4B_or_I2B, // 0x871 ja I4B, // 0x872 ja I4B, // 0x873 ja I4B, // 0x880 js - I2B, // 0x881 js + WP_I4B_or_I4B_or_I2B, // 0x881 js I4B, // 0x882 js I4B, // 0x883 js I4B, // 0x890 jns - I2B, // 0x891 jns + WP_I4B_or_I4B_or_I2B, // 0x891 jns I4B, // 0x892 jns I4B, // 0x893 jns I4B, // 0x8a0 jp - I2B, // 0x8a1 jp + WP_I4B_or_I4B_or_I2B, // 0x8a1 jp I4B, // 0x8a2 jp I4B, // 0x8a3 jp I4B, // 0x8b0 jnp - I2B, // 0x8b1 jnp + WP_I4B_or_I4B_or_I2B, // 0x8b1 jnp I4B, // 0x8b2 jnp I4B, // 0x8b3 jnp I4B, // 0x8c0 jl - I2B, // 0x8c1 jl + WP_I4B_or_I4B_or_I2B, // 0x8c1 jl I4B, // 0x8c2 jl I4B, // 0x8c3 jl I4B, // 0x8d0 jge - I2B, // 0x8d1 jge + WP_I4B_or_I4B_or_I2B, // 0x8d1 jge I4B, // 0x8d2 jge I4B, // 0x8d3 jge I4B, // 0x8e0 jle - I2B, // 0x8e1 jle + WP_I4B_or_I4B_or_I2B, // 0x8e1 jle I4B, // 0x8e2 jle I4B, // 0x8e3 jle I4B, // 0x8f0 jg - I2B, // 0x8f1 jg + WP_I4B_or_I4B_or_I2B, // 0x8f1 jg I4B, // 0x8f2 jg I4B, // 0x8f3 jg MOnly_M1B, // 0x900 seto @@ -1279,11 +1290,11 @@ namespace Amd64InstrDecode MOnly_M1B, // 0x9f2 setg MOnly_M1B, // 0x9f3 setg None, // 0xa00 push - None, // 0xa01 pushw + None, // 0xa01 push,pushw None, // 0xa02 push None, // 0xa03 push None, // 0xa10 pop - None, // 0xa11 popw + None, // 0xa11 pop,popw None, // 0xa12 pop None, // 0xa13 pop None, // 0xa20 cpuid @@ -1291,17 +1302,17 @@ namespace Amd64InstrDecode None, // 0xa22 cpuid None, // 0xa23 cpuid M1st_W_M8B_or_M4B, // 0xa30 bt - M1st_M2B, // 0xa31 bt - M1st_M4B, // 0xa32 bt - M1st_M4B, // 0xa33 bt + M1st_W_M8B_or_M2B, // 0xa31 bt + M1st_W_M8B_or_M4B, // 0xa32 bt + M1st_W_M8B_or_M4B, // 0xa33 bt M1st_I1B_W_M8B_or_M4B, // 0xa40 shld - M1st_M2B_I1B, // 0xa41 shld - M1st_M4B_I1B, // 0xa42 shld - M1st_M4B_I1B, // 0xa43 shld + M1st_I1B_W_M8B_or_M2B, // 0xa41 shld + M1st_I1B_W_M8B_or_M4B, // 0xa42 shld + M1st_I1B_W_M8B_or_M4B, // 0xa43 shld M1st_W_M8B_or_M4B, // 0xa50 shld - M1st_M2B, // 0xa51 shld - M1st_M4B, // 0xa52 shld - M1st_M4B, // 0xa53 shld + M1st_W_M8B_or_M2B, // 0xa51 shld + M1st_W_M8B_or_M4B, // 0xa52 shld + M1st_W_M8B_or_M4B, // 0xa53 shld None, // 0xa60 None, // 0xa61 None, // 0xa62 @@ -1311,11 +1322,11 @@ namespace Amd64InstrDecode None, // 0xa72 None, // 0xa73 None, // 0xa80 push - None, // 0xa81 pushw + None, // 0xa81 push,pushw None, // 0xa82 push None, // 0xa83 push None, // 0xa90 pop - None, // 0xa91 popw + None, // 0xa91 pop,popw None, // 0xa92 pop None, // 0xa93 pop None, // 0xaa0 rsm @@ -1323,41 +1334,41 @@ namespace Amd64InstrDecode None, // 0xaa2 rsm None, // 0xaa3 rsm M1st_W_M8B_or_M4B, // 0xab0 bts - M1st_M2B, // 0xab1 bts - M1st_M4B, // 0xab2 bts - M1st_M4B, // 0xab3 bts + M1st_W_M8B_or_M2B, // 0xab1 bts + M1st_W_M8B_or_M4B, // 0xab2 bts + M1st_W_M8B_or_M4B, // 0xab3 bts M1st_I1B_W_M8B_or_M4B, // 0xac0 shrd - M1st_M2B_I1B, // 0xac1 shrd - M1st_M4B_I1B, // 0xac2 shrd - M1st_M4B_I1B, // 0xac3 shrd + M1st_I1B_W_M8B_or_M2B, // 0xac1 shrd + M1st_I1B_W_M8B_or_M4B, // 0xac2 shrd + M1st_I1B_W_M8B_or_M4B, // 0xac3 shrd M1st_W_M8B_or_M4B, // 0xad0 shrd - M1st_M2B, // 0xad1 shrd - M1st_M4B, // 0xad2 shrd - M1st_M4B, // 0xad3 shrd + M1st_W_M8B_or_M2B, // 0xad1 shrd + M1st_W_M8B_or_M4B, // 0xad2 shrd + M1st_W_M8B_or_M4B, // 0xad3 shrd InstrForm(int(Extension)|0x13), // 0xae0 InstrForm(int(Extension)|0x14), // 0xae1 InstrForm(int(Extension)|0x15), // 0xae2 InstrForm(int(Extension)|0x16), // 0xae3 MOp_W_M8B_or_M4B, // 0xaf0 imul - MOp_M2B, // 0xaf1 imul - MOp_M4B, // 0xaf2 imul - MOp_M4B, // 0xaf3 imul + MOp_W_M8B_or_M2B, // 0xaf1 imul + MOp_W_M8B_or_M4B, // 0xaf2 imul + MOp_W_M8B_or_M4B, // 0xaf3 imul M1st_M1B, // 0xb00 cmpxchg M1st_M1B, // 0xb01 cmpxchg M1st_M1B, // 0xb02 cmpxchg M1st_M1B, // 0xb03 cmpxchg M1st_W_M8B_or_M4B, // 0xb10 cmpxchg - M1st_M2B, // 0xb11 cmpxchg - M1st_M4B, // 0xb12 cmpxchg - M1st_M4B, // 0xb13 cmpxchg + M1st_W_M8B_or_M2B, // 0xb11 cmpxchg + M1st_W_M8B_or_M4B, // 0xb12 cmpxchg + M1st_W_M8B_or_M4B, // 0xb13 cmpxchg MOp_M6B, // 0xb20 lss MOp_M4B, // 0xb21 lss MOp_M6B, // 0xb22 lss MOp_M6B, // 0xb23 lss M1st_W_M8B_or_M4B, // 0xb30 btr - M1st_M2B, // 0xb31 btr - M1st_M4B, // 0xb32 btr - M1st_M4B, // 0xb33 btr + M1st_W_M8B_or_M2B, // 0xb31 btr + M1st_W_M8B_or_M4B, // 0xb32 btr + M1st_W_M8B_or_M4B, // 0xb33 btr MOp_M6B, // 0xb40 lfs MOp_M4B, // 0xb41 lfs MOp_M6B, // 0xb42 lfs @@ -1376,27 +1387,27 @@ namespace Amd64InstrDecode MOp_M2B, // 0xb73 movzx None, // 0xb80 None, // 0xb81 - MOp_M4B, // 0xb82 popcnt + MOp_W_M8B_or_M4B, // 0xb82 popcnt None, // 0xb83 MOp_W_M8B_or_M4B, // 0xb90 ud1 - MOp_M2B, // 0xb91 ud1 - MOp_M4B, // 0xb92 ud1 - MOp_M4B, // 0xb93 ud1 + MOp_W_M8B_or_M2B, // 0xb91 ud1 + MOp_W_M8B_or_M4B, // 0xb92 ud1 + MOp_W_M8B_or_M4B, // 0xb93 ud1 M1st_I1B_W_M8B_or_M4B, // 0xba0 bt,btc,btr,bts - M1st_M2B_I1B, // 0xba1 bt,btc,btr,bts - M1st_M4B_I1B, // 0xba2 bt,btc,btr,bts - M1st_M4B_I1B, // 0xba3 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M2B, // 0xba1 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M4B, // 0xba2 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M4B, // 0xba3 bt,btc,btr,bts M1st_W_M8B_or_M4B, // 0xbb0 btc - M1st_M2B, // 0xbb1 btc - M1st_M4B, // 0xbb2 btc - M1st_M4B, // 0xbb3 btc + M1st_W_M8B_or_M2B, // 0xbb1 btc + M1st_W_M8B_or_M4B, // 0xbb2 btc + M1st_W_M8B_or_M4B, // 0xbb3 btc MOp_W_M8B_or_M4B, // 0xbc0 bsf - MOp_M2B, // 0xbc1 bsf - MOp_M4B, // 0xbc2 tzcnt + MOp_W_M8B_or_M2B, // 0xbc1 bsf + MOp_W_M8B_or_M4B, // 0xbc2 tzcnt None, // 0xbc3 MOp_W_M8B_or_M4B, // 0xbd0 bsr - MOp_M2B, // 0xbd1 bsr - MOp_M4B, // 0xbd2 lzcnt + MOp_W_M8B_or_M2B, // 0xbd1 bsr + MOp_W_M8B_or_M4B, // 0xbd2 lzcnt None, // 0xbd3 MOp_M1B, // 0xbe0 movsx MOp_M1B, // 0xbe1 movsx @@ -1411,9 +1422,9 @@ namespace Amd64InstrDecode M1st_M1B, // 0xc02 xadd M1st_M1B, // 0xc03 xadd M1st_W_M8B_or_M4B, // 0xc10 xadd - M1st_M2B, // 0xc11 xadd - M1st_M4B, // 0xc12 xadd - M1st_M4B, // 0xc13 xadd + M1st_W_M8B_or_M2B, // 0xc11 xadd + M1st_W_M8B_or_M4B, // 0xc12 xadd + M1st_W_M8B_or_M4B, // 0xc13 xadd MOp_M16B_I1B, // 0xc20 cmpps MOp_M16B_I1B, // 0xc21 cmppd MOp_M4B_I1B, // 0xc22 cmpss @@ -1659,9 +1670,9 @@ namespace Amd64InstrDecode None, // 0xfe2 None, // 0xfe3 MOp_W_M8B_or_M4B, // 0xff0 ud0 - MOp_M2B, // 0xff1 ud0 - MOp_M4B, // 0xff2 ud0 - MOp_M4B, // 0xff3 ud0 + MOp_W_M8B_or_M2B, // 0xff1 ud0 + MOp_W_M8B_or_M4B, // 0xff2 ud0 + MOp_W_M8B_or_M4B, // 0xff3 ud0 }; static const InstrForm instrFormF38[1024] @@ -2674,10 +2685,10 @@ namespace Amd64InstrDecode None, // 0xfb1 None, // 0xfb2 None, // 0xfb3 - None, // 0xfc0 - None, // 0xfc1 - None, // 0xfc2 - None, // 0xfc3 + M1st_W_M8B_or_M4B, // 0xfc0 aadd + M1st_W_M8B_or_M4B, // 0xfc1 aand + M1st_W_M8B_or_M4B, // 0xfc2 axor + M1st_W_M8B_or_M4B, // 0xfc3 aor None, // 0xfd0 None, // 0xfd1 None, // 0xfd2 @@ -5070,14 +5081,14 @@ namespace Amd64InstrDecode None, // 0x4f1 None, // 0x4f2 None, // 0x4f3 - None, // 0x500 + MOp_L_M32B_or_M16B, // 0x500 vpdpbuud None, // 0x501 - None, // 0x502 - None, // 0x503 - None, // 0x510 + MOp_L_M32B_or_M16B, // 0x502 vpdpbsud + MOp_L_M32B_or_M16B, // 0x503 vpdpbssd + MOp_L_M32B_or_M16B, // 0x510 vpdpbuuds None, // 0x511 - None, // 0x512 - None, // 0x513 + MOp_L_M32B_or_M16B, // 0x512 vpdpbsuds + MOp_L_M32B_or_M16B, // 0x513 vpdpbssds None, // 0x520 None, // 0x521 None, // 0x522 @@ -5454,13 +5465,13 @@ namespace Amd64InstrDecode MOp_W_M8B_or_M4B, // 0xaf1 vfnmsub213sd,vfnmsub213ss None, // 0xaf2 None, // 0xaf3 - None, // 0xb00 - None, // 0xb01 - None, // 0xb02 - None, // 0xb03 + MOp_L_M32B_or_M16B, // 0xb00 vcvtneoph2ps + MOp_L_M32B_or_M16B, // 0xb01 vcvtneeph2ps + MOp_L_M32B_or_M16B, // 0xb02 vcvtneebf162ps + MOp_L_M32B_or_M16B, // 0xb03 vcvtneobf162ps None, // 0xb10 - None, // 0xb11 - None, // 0xb12 + MOp_M2B, // 0xb11 vbcstnesh2ps + MOp_M2B, // 0xb12 vbcstnebf162ps None, // 0xb13 None, // 0xb20 None, // 0xb21 @@ -5590,13 +5601,13 @@ namespace Amd64InstrDecode None, // 0xd11 None, // 0xd12 None, // 0xd13 - None, // 0xd20 - None, // 0xd21 - None, // 0xd22 + MOp_L_M32B_or_M16B, // 0xd20 vpdpwuud + MOp_L_M32B_or_M16B, // 0xd21 vpdpwusd + MOp_L_M32B_or_M16B, // 0xd22 vpdpwsud None, // 0xd23 - None, // 0xd30 - None, // 0xd31 - None, // 0xd32 + MOp_L_M32B_or_M16B, // 0xd30 vpdpwuuds + MOp_L_M32B_or_M16B, // 0xd31 vpdpwusds + MOp_L_M32B_or_M16B, // 0xd32 vpdpwsuds None, // 0xd33 None, // 0xd40 None, // 0xd41 @@ -5622,10 +5633,10 @@ namespace Amd64InstrDecode None, // 0xd91 None, // 0xd92 None, // 0xd93 - None, // 0xda0 - None, // 0xda1 - None, // 0xda2 - None, // 0xda3 + MOp_M16B, // 0xda0 vsm3msg1 + MOp_M16B, // 0xda1 vsm3msg2 + MOp_L_M32B_or_M16B, // 0xda2 vsm4key4 + MOp_L_M32B_or_M16B, // 0xda3 vsm4rnds4 None, // 0xdb0 MOp_M16B, // 0xdb1 vaesimc None, // 0xdb2 @@ -5647,67 +5658,67 @@ namespace Amd64InstrDecode None, // 0xdf2 None, // 0xdf3 None, // 0xe00 - None, // 0xe01 + M1st_W_M8B_or_M4B, // 0xe01 cmpoxadd None, // 0xe02 None, // 0xe03 None, // 0xe10 - None, // 0xe11 + M1st_W_M8B_or_M4B, // 0xe11 cmpnoxadd None, // 0xe12 None, // 0xe13 None, // 0xe20 - None, // 0xe21 + M1st_W_M8B_or_M4B, // 0xe21 cmpbxadd None, // 0xe22 None, // 0xe23 None, // 0xe30 - None, // 0xe31 + M1st_W_M8B_or_M4B, // 0xe31 cmpnbxadd None, // 0xe32 None, // 0xe33 None, // 0xe40 - None, // 0xe41 + M1st_W_M8B_or_M4B, // 0xe41 cmpzxadd None, // 0xe42 None, // 0xe43 None, // 0xe50 - None, // 0xe51 + M1st_W_M8B_or_M4B, // 0xe51 cmpnzxadd None, // 0xe52 None, // 0xe53 None, // 0xe60 - None, // 0xe61 + M1st_W_M8B_or_M4B, // 0xe61 cmpbexadd None, // 0xe62 None, // 0xe63 None, // 0xe70 - None, // 0xe71 + M1st_W_M8B_or_M4B, // 0xe71 cmpnbexadd None, // 0xe72 None, // 0xe73 None, // 0xe80 - None, // 0xe81 + M1st_W_M8B_or_M4B, // 0xe81 cmpsxadd None, // 0xe82 None, // 0xe83 None, // 0xe90 - None, // 0xe91 + M1st_W_M8B_or_M4B, // 0xe91 cmpnsxadd None, // 0xe92 None, // 0xe93 None, // 0xea0 - None, // 0xea1 + M1st_W_M8B_or_M4B, // 0xea1 cmppxadd None, // 0xea2 None, // 0xea3 None, // 0xeb0 - None, // 0xeb1 + M1st_W_M8B_or_M4B, // 0xeb1 cmpnpxadd None, // 0xeb2 None, // 0xeb3 None, // 0xec0 - None, // 0xec1 + M1st_W_M8B_or_M4B, // 0xec1 cmplxadd None, // 0xec2 None, // 0xec3 None, // 0xed0 - None, // 0xed1 + M1st_W_M8B_or_M4B, // 0xed1 cmpnlxadd None, // 0xed2 None, // 0xed3 None, // 0xee0 - None, // 0xee1 + M1st_W_M8B_or_M4B, // 0xee1 cmplexadd None, // 0xee2 None, // 0xee3 None, // 0xef0 - None, // 0xef1 + M1st_W_M8B_or_M4B, // 0xef1 cmpnlexadd None, // 0xef2 None, // 0xef3 None, // 0xf00 @@ -6667,7 +6678,7 @@ namespace Amd64InstrDecode None, // 0xdd2 None, // 0xdd3 None, // 0xde0 - None, // 0xde1 + MOp_M16B_I1B, // 0xde1 vsm3rnds2 None, // 0xde2 None, // 0xde3 None, // 0xdf0 @@ -6978,8 +6989,8 @@ namespace Amd64InstrDecode None, // 0x2a1 MOp_W_M8B_or_M4B, // 0x2a2 vcvtsi2ss MOp_W_M8B_or_M4B, // 0x2a3 vcvtsi2sd - M1st_bLL_M4B_M16B_M32B_M64B, // 0x2b0 vmovntps - M1st_bLL_M8B_M16B_M32B_M64B, // 0x2b1 vmovntpd + M1st_LL_M16B_M32B_M64B, // 0x2b0 vmovntps + M1st_LL_M16B_M32B_M64B, // 0x2b1 vmovntpd None, // 0x2b2 None, // 0x2b3 None, // 0x2c0 @@ -7130,8 +7141,8 @@ namespace Amd64InstrDecode None, // 0x501 None, // 0x502 None, // 0x503 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x510 vsqrtps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x511 vsqrtpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x510 vsqrtps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x511 vsqrtpd MOp_M4B, // 0x512 vsqrtss MOp_M8B, // 0x513 vsqrtsd None, // 0x520 @@ -7158,12 +7169,12 @@ namespace Amd64InstrDecode MOp_bLL_M8B_M16B_M32B_M64B, // 0x571 vxorpd None, // 0x572 None, // 0x573 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x580 vaddps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x581 vaddpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x580 vaddps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x581 vaddpd MOp_M4B, // 0x582 vaddss MOp_M8B, // 0x583 vaddsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x590 vmulps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x591 vmulpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x590 vmulps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x591 vmulpd MOp_M4B, // 0x592 vmulss MOp_M8B, // 0x593 vmulsd MOp_bLL_M4B_M8B_M16B_M32B, // 0x5a0 vcvtps2pd @@ -7174,20 +7185,20 @@ namespace Amd64InstrDecode MOp_bLL_M4B_M16B_M32B_M64B, // 0x5b1 vcvtps2dq MOp_bLL_M4B_M16B_M32B_M64B, // 0x5b2 vcvttps2dq None, // 0x5b3 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5c0 vsubps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5c1 vsubpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5c0 vsubps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5c1 vsubpd MOp_M4B, // 0x5c2 vsubss MOp_M8B, // 0x5c3 vsubsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5d0 vminps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5d1 vminpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5d0 vminps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5d1 vminpd MOp_M4B, // 0x5d2 vminss MOp_M8B, // 0x5d3 vminsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5e0 vdivps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5e1 vdivpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5e0 vdivps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5e1 vdivpd MOp_M4B, // 0x5e2 vdivss MOp_M8B, // 0x5e3 vdivsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5f0 vmaxps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5f1 vmaxpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5f0 vmaxps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5f1 vmaxpd MOp_M4B, // 0x5f2 vmaxss MOp_M8B, // 0x5f3 vmaxsd None, // 0x600 @@ -7382,12 +7393,12 @@ namespace Amd64InstrDecode None, // 0x8f1 None, // 0x8f2 None, // 0x8f3 - None, // 0x900 - None, // 0x901 + MOp_W_M8B_or_M2B, // 0x900 kmovq,kmovw + MOp_W_M4B_or_M1B, // 0x901 kmovb,kmovd None, // 0x902 None, // 0x903 - None, // 0x910 - None, // 0x911 + M1st_W_M8B_or_M2B, // 0x910 kmovq,kmovw + M1st_W_M4B_or_M1B, // 0x911 kmovb,kmovd None, // 0x912 None, // 0x913 None, // 0x920 @@ -8000,7 +8011,7 @@ namespace Amd64InstrDecode None, // 0x283 None, // 0x290 MOp_bLL_M8B_M16B_M32B_M64B, // 0x291 vpcmpeqq - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x292 vpmovb2m,vpmovw2m + None, // 0x292 None, // 0x293 None, // 0x2a0 MOp_LL_M16B_M32B_M64B, // 0x2a1 vmovntdqa @@ -8064,7 +8075,7 @@ namespace Amd64InstrDecode None, // 0x383 None, // 0x390 MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x391 vpminsd,vpminsq - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x392 vpmovd2m,vpmovq2m + None, // 0x392 None, // 0x393 None, // 0x3a0 MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x3a1 vpminuw @@ -8126,8 +8137,8 @@ namespace Amd64InstrDecode None, // 0x481 None, // 0x482 None, // 0x483 - None, // 0x490 - None, // 0x491 + MOnly_MUnknown, // 0x490 ldtilecfg + MOnly_MUnknown, // 0x491 sttilecfg None, // 0x492 None, // 0x493 None, // 0x4a0 @@ -8154,20 +8165,20 @@ namespace Amd64InstrDecode MOp_W_M8B_or_M4B, // 0x4f1 vrsqrt14sd,vrsqrt14ss None, // 0x4f2 None, // 0x4f3 - None, // 0x500 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x501 vpdpbusd - None, // 0x502 - None, // 0x503 - None, // 0x510 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x511 vpdpbusds - None, // 0x512 - None, // 0x513 + MOp_bLL_M4B_M16B_M32B_M64B, // 0x500 vpdpbuud + MOp_bLL_M4B_M16B_M32B_M64B, // 0x501 vpdpbusd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x502 vpdpbsud + MOp_bLL_M4B_M16B_M32B_M64B, // 0x503 vpdpbssd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x510 vpdpbuuds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x511 vpdpbusds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x512 vpdpbsuds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x513 vpdpbssds None, // 0x520 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x521 vpdpwssd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x521 vpdpwssd MOp_bLL_M4B_M16B_M32B_M64B, // 0x522 vdpbf16ps MOp_M16B, // 0x523 vp4dpwssd None, // 0x530 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x531 vpdpwssds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x531 vpdpwssds None, // 0x532 MOp_M16B, // 0x533 vp4dpwssds None, // 0x540 @@ -8555,11 +8566,11 @@ namespace Amd64InstrDecode None, // 0xb32 None, // 0xb33 None, // 0xb40 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0xb41 vpmadd52luq + MOp_bLL_M8B_M16B_M32B_M64B, // 0xb41 vpmadd52luq None, // 0xb42 None, // 0xb43 None, // 0xb50 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0xb51 vpmadd52huq + MOp_bLL_M8B_M16B_M32B_M64B, // 0xb51 vpmadd52huq None, // 0xb52 None, // 0xb53 None, // 0xb60 @@ -8731,67 +8742,67 @@ namespace Amd64InstrDecode None, // 0xdf2 None, // 0xdf3 None, // 0xe00 - None, // 0xe01 + M1st_W_M8B_or_M4B, // 0xe01 cmpoxadd None, // 0xe02 None, // 0xe03 None, // 0xe10 - None, // 0xe11 + M1st_W_M8B_or_M4B, // 0xe11 cmpnoxadd None, // 0xe12 None, // 0xe13 None, // 0xe20 - None, // 0xe21 + M1st_W_M8B_or_M4B, // 0xe21 cmpbxadd None, // 0xe22 None, // 0xe23 None, // 0xe30 - None, // 0xe31 + M1st_W_M8B_or_M4B, // 0xe31 cmpnbxadd None, // 0xe32 None, // 0xe33 None, // 0xe40 - None, // 0xe41 + M1st_W_M8B_or_M4B, // 0xe41 cmpzxadd None, // 0xe42 None, // 0xe43 None, // 0xe50 - None, // 0xe51 + M1st_W_M8B_or_M4B, // 0xe51 cmpnzxadd None, // 0xe52 None, // 0xe53 None, // 0xe60 - None, // 0xe61 + M1st_W_M8B_or_M4B, // 0xe61 cmpbexadd None, // 0xe62 None, // 0xe63 None, // 0xe70 - None, // 0xe71 + M1st_W_M8B_or_M4B, // 0xe71 cmpnbexadd None, // 0xe72 None, // 0xe73 None, // 0xe80 - None, // 0xe81 + M1st_W_M8B_or_M4B, // 0xe81 cmpsxadd None, // 0xe82 None, // 0xe83 None, // 0xe90 - None, // 0xe91 + M1st_W_M8B_or_M4B, // 0xe91 cmpnsxadd None, // 0xe92 None, // 0xe93 None, // 0xea0 - None, // 0xea1 + M1st_W_M8B_or_M4B, // 0xea1 cmppxadd None, // 0xea2 None, // 0xea3 None, // 0xeb0 - None, // 0xeb1 + M1st_W_M8B_or_M4B, // 0xeb1 cmpnpxadd None, // 0xeb2 None, // 0xeb3 None, // 0xec0 - None, // 0xec1 + M1st_W_M8B_or_M4B, // 0xec1 cmplxadd None, // 0xec2 None, // 0xec3 None, // 0xed0 - None, // 0xed1 + M1st_W_M8B_or_M4B, // 0xed1 cmpnlxadd None, // 0xed2 None, // 0xed3 None, // 0xee0 - None, // 0xee1 + M1st_W_M8B_or_M4B, // 0xee1 cmplexadd None, // 0xee2 None, // 0xee3 None, // 0xef0 - None, // 0xef1 + M1st_W_M8B_or_M4B, // 0xef1 cmpnlexadd None, // 0xef2 None, // 0xef3 None, // 0xf00 @@ -8802,11 +8813,11 @@ namespace Amd64InstrDecode None, // 0xf11 None, // 0xf12 None, // 0xf13 - None, // 0xf20 + MOp_W_M8B_or_M4B, // 0xf20 andn None, // 0xf21 None, // 0xf22 None, // 0xf23 - None, // 0xf30 + MOp_W_M8B_or_M4B, // 0xf30 blsi,blsmsk,blsr None, // 0xf31 None, // 0xf32 None, // 0xf33 @@ -8814,18 +8825,18 @@ namespace Amd64InstrDecode None, // 0xf41 None, // 0xf42 None, // 0xf43 - None, // 0xf50 + MOp_W_M8B_or_M4B, // 0xf50 bzhi None, // 0xf51 - None, // 0xf52 - None, // 0xf53 + MOp_W_M8B_or_M4B, // 0xf52 pext + MOp_W_M8B_or_M4B, // 0xf53 pdep None, // 0xf60 None, // 0xf61 None, // 0xf62 - None, // 0xf63 - None, // 0xf70 - None, // 0xf71 - None, // 0xf72 - None, // 0xf73 + MOp_W_M8B_or_M4B, // 0xf63 mulx + MOp_W_M8B_or_M4B, // 0xf70 bextr + MOp_W_M8B_or_M4B, // 0xf71 shlx + MOp_W_M8B_or_M4B, // 0xf72 sarx + MOp_W_M8B_or_M4B, // 0xf73 shrx None, // 0xf80 None, // 0xf81 None, // 0xf82 @@ -9691,7 +9702,7 @@ namespace Amd64InstrDecode None, // 0xce2 None, // 0xce3 None, // 0xcf0 - None, // 0xcf1 + MOp_I1B_bLL_M8B_M16B_M32B_M64B, // 0xcf1 vgf2p8affineinvqb None, // 0xcf2 None, // 0xcf3 None, // 0xd00 @@ -9825,7 +9836,7 @@ namespace Amd64InstrDecode None, // 0xf00 None, // 0xf01 None, // 0xf02 - None, // 0xf03 + MOp_I1B_W_M8B_or_M4B, // 0xf03 rorx None, // 0xf10 None, // 0xf11 None, // 0xf12 @@ -9887,4 +9898,1032 @@ namespace Amd64InstrDecode None, // 0xff2 None, // 0xff3 }; + + static const InstrForm instrFormEvex_4[1024] + { + M1st_M1B, // 0x000 add + None, // 0x001 + None, // 0x002 + None, // 0x003 + M1st_W_M8B_or_M4B, // 0x010 add + M1st_W_M8B_or_M2B, // 0x011 add + None, // 0x012 + None, // 0x013 + MOp_M1B, // 0x020 add + None, // 0x021 + None, // 0x022 + None, // 0x023 + MOp_W_M8B_or_M4B, // 0x030 add + MOp_W_M8B_or_M2B, // 0x031 add + None, // 0x032 + None, // 0x033 + None, // 0x040 + None, // 0x041 + None, // 0x042 + None, // 0x043 + None, // 0x050 + None, // 0x051 + None, // 0x052 + None, // 0x053 + None, // 0x060 + None, // 0x061 + None, // 0x062 + None, // 0x063 + None, // 0x070 + None, // 0x071 + None, // 0x072 + None, // 0x073 + M1st_M1B, // 0x080 or + None, // 0x081 + None, // 0x082 + None, // 0x083 + M1st_W_M8B_or_M4B, // 0x090 or + M1st_W_M8B_or_M2B, // 0x091 or + None, // 0x092 + None, // 0x093 + MOp_M1B, // 0x0a0 or + None, // 0x0a1 + None, // 0x0a2 + None, // 0x0a3 + MOp_W_M8B_or_M4B, // 0x0b0 or + MOp_W_M8B_or_M2B, // 0x0b1 or + None, // 0x0b2 + None, // 0x0b3 + None, // 0x0c0 + None, // 0x0c1 + None, // 0x0c2 + None, // 0x0c3 + None, // 0x0d0 + None, // 0x0d1 + None, // 0x0d2 + None, // 0x0d3 + None, // 0x0e0 + None, // 0x0e1 + None, // 0x0e2 + None, // 0x0e3 + None, // 0x0f0 + None, // 0x0f1 + None, // 0x0f2 + None, // 0x0f3 + M1st_M1B, // 0x100 adc + None, // 0x101 + None, // 0x102 + None, // 0x103 + M1st_W_M8B_or_M4B, // 0x110 adc + M1st_W_M8B_or_M2B, // 0x111 adc + None, // 0x112 + None, // 0x113 + MOp_M1B, // 0x120 adc + None, // 0x121 + None, // 0x122 + None, // 0x123 + MOp_W_M8B_or_M4B, // 0x130 adc + MOp_W_M8B_or_M2B, // 0x131 adc + None, // 0x132 + None, // 0x133 + None, // 0x140 + None, // 0x141 + None, // 0x142 + None, // 0x143 + None, // 0x150 + None, // 0x151 + None, // 0x152 + None, // 0x153 + None, // 0x160 + None, // 0x161 + None, // 0x162 + None, // 0x163 + None, // 0x170 + None, // 0x171 + None, // 0x172 + None, // 0x173 + M1st_M1B, // 0x180 sbb + None, // 0x181 + None, // 0x182 + None, // 0x183 + M1st_W_M8B_or_M4B, // 0x190 sbb + M1st_W_M8B_or_M2B, // 0x191 sbb + None, // 0x192 + None, // 0x193 + MOp_M1B, // 0x1a0 sbb + None, // 0x1a1 + None, // 0x1a2 + None, // 0x1a3 + MOp_W_M8B_or_M4B, // 0x1b0 sbb + MOp_W_M8B_or_M2B, // 0x1b1 sbb + None, // 0x1b2 + None, // 0x1b3 + None, // 0x1c0 + None, // 0x1c1 + None, // 0x1c2 + None, // 0x1c3 + None, // 0x1d0 + None, // 0x1d1 + None, // 0x1d2 + None, // 0x1d3 + None, // 0x1e0 + None, // 0x1e1 + None, // 0x1e2 + None, // 0x1e3 + None, // 0x1f0 + None, // 0x1f1 + None, // 0x1f2 + None, // 0x1f3 + M1st_M1B, // 0x200 and + None, // 0x201 + None, // 0x202 + None, // 0x203 + M1st_W_M8B_or_M4B, // 0x210 and + M1st_W_M8B_or_M2B, // 0x211 and + None, // 0x212 + None, // 0x213 + MOp_M1B, // 0x220 and + None, // 0x221 + None, // 0x222 + None, // 0x223 + MOp_W_M8B_or_M4B, // 0x230 and + MOp_W_M8B_or_M2B, // 0x231 and + None, // 0x232 + None, // 0x233 + M1st_I1B_W_M8B_or_M4B, // 0x240 shld + M1st_I1B_W_M8B_or_M2B, // 0x241 shld + None, // 0x242 + None, // 0x243 + None, // 0x250 + None, // 0x251 + None, // 0x252 + None, // 0x253 + None, // 0x260 + None, // 0x261 + None, // 0x262 + None, // 0x263 + None, // 0x270 + None, // 0x271 + None, // 0x272 + None, // 0x273 + M1st_M1B, // 0x280 sub + None, // 0x281 + None, // 0x282 + None, // 0x283 + M1st_W_M8B_or_M4B, // 0x290 sub + M1st_W_M8B_or_M2B, // 0x291 sub + None, // 0x292 + None, // 0x293 + MOp_M1B, // 0x2a0 sub + None, // 0x2a1 + None, // 0x2a2 + None, // 0x2a3 + MOp_W_M8B_or_M4B, // 0x2b0 sub + MOp_W_M8B_or_M2B, // 0x2b1 sub + None, // 0x2b2 + None, // 0x2b3 + M1st_I1B_W_M8B_or_M4B, // 0x2c0 shrd + M1st_I1B_W_M8B_or_M2B, // 0x2c1 shrd + None, // 0x2c2 + None, // 0x2c3 + None, // 0x2d0 + None, // 0x2d1 + None, // 0x2d2 + None, // 0x2d3 + None, // 0x2e0 + None, // 0x2e1 + None, // 0x2e2 + None, // 0x2e3 + None, // 0x2f0 + None, // 0x2f1 + None, // 0x2f2 + None, // 0x2f3 + M1st_M1B, // 0x300 xor + None, // 0x301 + None, // 0x302 + None, // 0x303 + M1st_W_M8B_or_M4B, // 0x310 xor + M1st_W_M8B_or_M2B, // 0x311 xor + None, // 0x312 + None, // 0x313 + MOp_M1B, // 0x320 xor + None, // 0x321 + None, // 0x322 + None, // 0x323 + MOp_W_M8B_or_M4B, // 0x330 xor + MOp_W_M8B_or_M2B, // 0x331 xor + None, // 0x332 + None, // 0x333 + None, // 0x340 + None, // 0x341 + None, // 0x342 + None, // 0x343 + None, // 0x350 + None, // 0x351 + None, // 0x352 + None, // 0x353 + None, // 0x360 + None, // 0x361 + None, // 0x362 + None, // 0x363 + None, // 0x370 + None, // 0x371 + None, // 0x372 + None, // 0x373 + None, // 0x380 + None, // 0x381 + None, // 0x382 + None, // 0x383 + None, // 0x390 + None, // 0x391 + None, // 0x392 + None, // 0x393 + None, // 0x3a0 + None, // 0x3a1 + None, // 0x3a2 + None, // 0x3a3 + None, // 0x3b0 + None, // 0x3b1 + None, // 0x3b2 + None, // 0x3b3 + None, // 0x3c0 + None, // 0x3c1 + None, // 0x3c2 + None, // 0x3c3 + None, // 0x3d0 + None, // 0x3d1 + None, // 0x3d2 + None, // 0x3d3 + None, // 0x3e0 + None, // 0x3e1 + None, // 0x3e2 + None, // 0x3e3 + None, // 0x3f0 + None, // 0x3f1 + None, // 0x3f2 + None, // 0x3f3 + MOp_W_M8B_or_M4B, // 0x400 cmovo + MOp_W_M8B_or_M2B, // 0x401 cmovo + None, // 0x402 + None, // 0x403 + MOp_W_M8B_or_M4B, // 0x410 cmovno + MOp_W_M8B_or_M2B, // 0x411 cmovno + None, // 0x412 + None, // 0x413 + MOp_W_M8B_or_M4B, // 0x420 cmovb + MOp_W_M8B_or_M2B, // 0x421 cmovb + None, // 0x422 + None, // 0x423 + MOp_W_M8B_or_M4B, // 0x430 cmovae + MOp_W_M8B_or_M2B, // 0x431 cmovae + None, // 0x432 + None, // 0x433 + MOp_W_M8B_or_M4B, // 0x440 cmove + MOp_W_M8B_or_M2B, // 0x441 cmove + None, // 0x442 + None, // 0x443 + MOp_W_M8B_or_M4B, // 0x450 cmovne + MOp_W_M8B_or_M2B, // 0x451 cmovne + None, // 0x452 + None, // 0x453 + MOp_W_M8B_or_M4B, // 0x460 cmovbe + MOp_W_M8B_or_M2B, // 0x461 cmovbe + None, // 0x462 + None, // 0x463 + MOp_W_M8B_or_M4B, // 0x470 cmova + MOp_W_M8B_or_M2B, // 0x471 cmova + None, // 0x472 + None, // 0x473 + MOp_W_M8B_or_M4B, // 0x480 cmovs + MOp_W_M8B_or_M2B, // 0x481 cmovs + None, // 0x482 + None, // 0x483 + MOp_W_M8B_or_M4B, // 0x490 cmovns + MOp_W_M8B_or_M2B, // 0x491 cmovns + None, // 0x492 + None, // 0x493 + MOp_W_M8B_or_M4B, // 0x4a0 cmovp + MOp_W_M8B_or_M2B, // 0x4a1 cmovp + None, // 0x4a2 + None, // 0x4a3 + MOp_W_M8B_or_M4B, // 0x4b0 cmovnp + MOp_W_M8B_or_M2B, // 0x4b1 cmovnp + None, // 0x4b2 + None, // 0x4b3 + MOp_W_M8B_or_M4B, // 0x4c0 cmovl + MOp_W_M8B_or_M2B, // 0x4c1 cmovl + None, // 0x4c2 + None, // 0x4c3 + MOp_W_M8B_or_M4B, // 0x4d0 cmovge + MOp_W_M8B_or_M2B, // 0x4d1 cmovge + None, // 0x4d2 + None, // 0x4d3 + MOp_W_M8B_or_M4B, // 0x4e0 cmovle + MOp_W_M8B_or_M2B, // 0x4e1 cmovle + None, // 0x4e2 + None, // 0x4e3 + MOp_W_M8B_or_M4B, // 0x4f0 cmovg + MOp_W_M8B_or_M2B, // 0x4f1 cmovg + None, // 0x4f2 + None, // 0x4f3 + None, // 0x500 + None, // 0x501 + None, // 0x502 + None, // 0x503 + None, // 0x510 + None, // 0x511 + None, // 0x512 + None, // 0x513 + None, // 0x520 + None, // 0x521 + None, // 0x522 + None, // 0x523 + None, // 0x530 + None, // 0x531 + None, // 0x532 + None, // 0x533 + None, // 0x540 + None, // 0x541 + None, // 0x542 + None, // 0x543 + None, // 0x550 + None, // 0x551 + None, // 0x552 + None, // 0x553 + None, // 0x560 + None, // 0x561 + None, // 0x562 + None, // 0x563 + None, // 0x570 + None, // 0x571 + None, // 0x572 + None, // 0x573 + None, // 0x580 + None, // 0x581 + None, // 0x582 + None, // 0x583 + None, // 0x590 + None, // 0x591 + None, // 0x592 + None, // 0x593 + None, // 0x5a0 + None, // 0x5a1 + None, // 0x5a2 + None, // 0x5a3 + None, // 0x5b0 + None, // 0x5b1 + None, // 0x5b2 + None, // 0x5b3 + None, // 0x5c0 + None, // 0x5c1 + None, // 0x5c2 + None, // 0x5c3 + None, // 0x5d0 + None, // 0x5d1 + None, // 0x5d2 + None, // 0x5d3 + None, // 0x5e0 + None, // 0x5e1 + None, // 0x5e2 + None, // 0x5e3 + None, // 0x5f0 + None, // 0x5f1 + None, // 0x5f2 + None, // 0x5f3 + MOp_W_M8B_or_M4B, // 0x600 movbe + MOp_W_M8B_or_M2B, // 0x601 movbe + None, // 0x602 + None, // 0x603 + M1st_W_M8B_or_M4B, // 0x610 movbe + M1st_W_M8B_or_M2B, // 0x611 movbe + None, // 0x612 + None, // 0x613 + None, // 0x620 + None, // 0x621 + None, // 0x622 + None, // 0x623 + None, // 0x630 + None, // 0x631 + None, // 0x632 + None, // 0x633 + None, // 0x640 + None, // 0x641 + None, // 0x642 + None, // 0x643 + None, // 0x650 + M1st_MUnknown, // 0x651 wrussd,wrussq + None, // 0x652 + None, // 0x653 + M1st_MUnknown, // 0x660 wrssd,wrssq + MOp_W_M8B_or_M4B, // 0x661 adcx + MOp_W_M8B_or_M4B, // 0x662 adox + None, // 0x663 + None, // 0x670 + None, // 0x671 + None, // 0x672 + None, // 0x673 + None, // 0x680 + None, // 0x681 + None, // 0x682 + None, // 0x683 + None, // 0x690 + None, // 0x691 + None, // 0x692 + None, // 0x693 + None, // 0x6a0 + None, // 0x6a1 + None, // 0x6a2 + None, // 0x6a3 + None, // 0x6b0 + None, // 0x6b1 + None, // 0x6b2 + None, // 0x6b3 + None, // 0x6c0 + None, // 0x6c1 + None, // 0x6c2 + None, // 0x6c3 + None, // 0x6d0 + None, // 0x6d1 + None, // 0x6d2 + None, // 0x6d3 + None, // 0x6e0 + None, // 0x6e1 + None, // 0x6e2 + None, // 0x6e3 + None, // 0x6f0 + None, // 0x6f1 + None, // 0x6f2 + None, // 0x6f3 + None, // 0x700 + None, // 0x701 + None, // 0x702 + None, // 0x703 + None, // 0x710 + None, // 0x711 + None, // 0x712 + None, // 0x713 + None, // 0x720 + None, // 0x721 + None, // 0x722 + None, // 0x723 + None, // 0x730 + None, // 0x731 + None, // 0x732 + None, // 0x733 + None, // 0x740 + None, // 0x741 + None, // 0x742 + None, // 0x743 + None, // 0x750 + None, // 0x751 + None, // 0x752 + None, // 0x753 + None, // 0x760 + None, // 0x761 + None, // 0x762 + None, // 0x763 + None, // 0x770 + None, // 0x771 + None, // 0x772 + None, // 0x773 + None, // 0x780 + None, // 0x781 + None, // 0x782 + None, // 0x783 + None, // 0x790 + None, // 0x791 + None, // 0x792 + None, // 0x793 + None, // 0x7a0 + None, // 0x7a1 + None, // 0x7a2 + None, // 0x7a3 + None, // 0x7b0 + None, // 0x7b1 + None, // 0x7b2 + None, // 0x7b3 + None, // 0x7c0 + None, // 0x7c1 + None, // 0x7c2 + None, // 0x7c3 + None, // 0x7d0 + None, // 0x7d1 + None, // 0x7d2 + None, // 0x7d3 + None, // 0x7e0 + None, // 0x7e1 + None, // 0x7e2 + None, // 0x7e3 + None, // 0x7f0 + None, // 0x7f1 + None, // 0x7f2 + None, // 0x7f3 + M1st_M1B_I1B, // 0x800 adc,add,and,or,sbb,sub,xor + None, // 0x801 + None, // 0x802 + None, // 0x803 + M1st_I4B_W_M8B_or_M4B, // 0x810 adc,add,and,or,sbb,sub,xor + M1st_W_M8B_I4B_or_M2B_I2B, // 0x811 adc,add,and,or,sbb,sub,xor + None, // 0x812 + None, // 0x813 + None, // 0x820 + None, // 0x821 + None, // 0x822 + None, // 0x823 + M1st_I1B_W_M8B_or_M4B, // 0x830 adc,add,and,or,sbb,sub,xor + M1st_I1B_W_M8B_or_M2B, // 0x831 adc,add,and,or,sbb,sub,xor + None, // 0x832 + None, // 0x833 + None, // 0x840 + None, // 0x841 + None, // 0x842 + None, // 0x843 + None, // 0x850 + None, // 0x851 + None, // 0x852 + None, // 0x853 + None, // 0x860 + None, // 0x861 + None, // 0x862 + None, // 0x863 + None, // 0x870 + None, // 0x871 + None, // 0x872 + None, // 0x873 + None, // 0x880 + None, // 0x881 + None, // 0x882 + None, // 0x883 + None, // 0x890 + None, // 0x891 + None, // 0x892 + None, // 0x893 + None, // 0x8a0 + None, // 0x8a1 + None, // 0x8a2 + None, // 0x8a3 + None, // 0x8b0 + None, // 0x8b1 + None, // 0x8b2 + None, // 0x8b3 + None, // 0x8c0 + None, // 0x8c1 + None, // 0x8c2 + None, // 0x8c3 + None, // 0x8d0 + None, // 0x8d1 + None, // 0x8d2 + None, // 0x8d3 + None, // 0x8e0 + None, // 0x8e1 + None, // 0x8e2 + None, // 0x8e3 + None, // 0x8f0 + None, // 0x8f1 + None, // 0x8f2 + None, // 0x8f3 + None, // 0x900 + None, // 0x901 + None, // 0x902 + None, // 0x903 + None, // 0x910 + None, // 0x911 + None, // 0x912 + None, // 0x913 + None, // 0x920 + None, // 0x921 + None, // 0x922 + None, // 0x923 + None, // 0x930 + None, // 0x931 + None, // 0x932 + None, // 0x933 + None, // 0x940 + None, // 0x941 + None, // 0x942 + None, // 0x943 + None, // 0x950 + None, // 0x951 + None, // 0x952 + None, // 0x953 + None, // 0x960 + None, // 0x961 + None, // 0x962 + None, // 0x963 + None, // 0x970 + None, // 0x971 + None, // 0x972 + None, // 0x973 + None, // 0x980 + None, // 0x981 + None, // 0x982 + None, // 0x983 + None, // 0x990 + None, // 0x991 + None, // 0x992 + None, // 0x993 + None, // 0x9a0 + None, // 0x9a1 + None, // 0x9a2 + None, // 0x9a3 + None, // 0x9b0 + None, // 0x9b1 + None, // 0x9b2 + None, // 0x9b3 + None, // 0x9c0 + None, // 0x9c1 + None, // 0x9c2 + None, // 0x9c3 + None, // 0x9d0 + None, // 0x9d1 + None, // 0x9d2 + None, // 0x9d3 + None, // 0x9e0 + None, // 0x9e1 + None, // 0x9e2 + None, // 0x9e3 + None, // 0x9f0 + None, // 0x9f1 + None, // 0x9f2 + None, // 0x9f3 + None, // 0xa00 + None, // 0xa01 + None, // 0xa02 + None, // 0xa03 + None, // 0xa10 + None, // 0xa11 + None, // 0xa12 + None, // 0xa13 + None, // 0xa20 + None, // 0xa21 + None, // 0xa22 + None, // 0xa23 + None, // 0xa30 + None, // 0xa31 + None, // 0xa32 + None, // 0xa33 + None, // 0xa40 + None, // 0xa41 + None, // 0xa42 + None, // 0xa43 + M1st_W_M8B_or_M4B, // 0xa50 shld + M1st_W_M8B_or_M2B, // 0xa51 shld + None, // 0xa52 + None, // 0xa53 + None, // 0xa60 + None, // 0xa61 + None, // 0xa62 + None, // 0xa63 + None, // 0xa70 + None, // 0xa71 + None, // 0xa72 + None, // 0xa73 + None, // 0xa80 + None, // 0xa81 + None, // 0xa82 + None, // 0xa83 + None, // 0xa90 + None, // 0xa91 + None, // 0xa92 + None, // 0xa93 + None, // 0xaa0 + None, // 0xaa1 + None, // 0xaa2 + None, // 0xaa3 + None, // 0xab0 + None, // 0xab1 + None, // 0xab2 + None, // 0xab3 + None, // 0xac0 + None, // 0xac1 + None, // 0xac2 + None, // 0xac3 + M1st_W_M8B_or_M4B, // 0xad0 shrd + M1st_W_M8B_or_M2B, // 0xad1 shrd + None, // 0xad2 + None, // 0xad3 + None, // 0xae0 + None, // 0xae1 + None, // 0xae2 + None, // 0xae3 + MOp_W_M8B_or_M4B, // 0xaf0 imul + MOp_W_M8B_or_M2B, // 0xaf1 imul + None, // 0xaf2 + None, // 0xaf3 + None, // 0xb00 + None, // 0xb01 + None, // 0xb02 + None, // 0xb03 + None, // 0xb10 + None, // 0xb11 + None, // 0xb12 + None, // 0xb13 + None, // 0xb20 + None, // 0xb21 + None, // 0xb22 + None, // 0xb23 + None, // 0xb30 + None, // 0xb31 + None, // 0xb32 + None, // 0xb33 + None, // 0xb40 + None, // 0xb41 + None, // 0xb42 + None, // 0xb43 + None, // 0xb50 + None, // 0xb51 + None, // 0xb52 + None, // 0xb53 + None, // 0xb60 + None, // 0xb61 + None, // 0xb62 + None, // 0xb63 + None, // 0xb70 + None, // 0xb71 + None, // 0xb72 + None, // 0xb73 + None, // 0xb80 + None, // 0xb81 + None, // 0xb82 + None, // 0xb83 + None, // 0xb90 + None, // 0xb91 + None, // 0xb92 + None, // 0xb93 + None, // 0xba0 + None, // 0xba1 + None, // 0xba2 + None, // 0xba3 + None, // 0xbb0 + None, // 0xbb1 + None, // 0xbb2 + None, // 0xbb3 + None, // 0xbc0 + None, // 0xbc1 + None, // 0xbc2 + None, // 0xbc3 + None, // 0xbd0 + None, // 0xbd1 + None, // 0xbd2 + None, // 0xbd3 + None, // 0xbe0 + None, // 0xbe1 + None, // 0xbe2 + None, // 0xbe3 + None, // 0xbf0 + None, // 0xbf1 + None, // 0xbf2 + None, // 0xbf3 + M1st_M1B_I1B, // 0xc00 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xc01 + None, // 0xc02 + None, // 0xc03 + M1st_I1B_W_M8B_or_M4B, // 0xc10 rcl,rcr,rol,ror,sar,shl,shr + M1st_I1B_W_M8B_or_M2B, // 0xc11 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xc12 + None, // 0xc13 + None, // 0xc20 + None, // 0xc21 + None, // 0xc22 + None, // 0xc23 + None, // 0xc30 + None, // 0xc31 + None, // 0xc32 + None, // 0xc33 + None, // 0xc40 + None, // 0xc41 + None, // 0xc42 + None, // 0xc43 + None, // 0xc50 + None, // 0xc51 + None, // 0xc52 + None, // 0xc53 + None, // 0xc60 + None, // 0xc61 + None, // 0xc62 + None, // 0xc63 + None, // 0xc70 + None, // 0xc71 + None, // 0xc72 + None, // 0xc73 + None, // 0xc80 + None, // 0xc81 + None, // 0xc82 + None, // 0xc83 + None, // 0xc90 + None, // 0xc91 + None, // 0xc92 + None, // 0xc93 + None, // 0xca0 + None, // 0xca1 + None, // 0xca2 + None, // 0xca3 + None, // 0xcb0 + None, // 0xcb1 + None, // 0xcb2 + None, // 0xcb3 + None, // 0xcc0 + None, // 0xcc1 + None, // 0xcc2 + None, // 0xcc3 + None, // 0xcd0 + None, // 0xcd1 + None, // 0xcd2 + None, // 0xcd3 + None, // 0xce0 + None, // 0xce1 + None, // 0xce2 + None, // 0xce3 + None, // 0xcf0 + None, // 0xcf1 + None, // 0xcf2 + None, // 0xcf3 + M1st_M1B, // 0xd00 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd01 + None, // 0xd02 + None, // 0xd03 + M1st_W_M8B_or_M4B, // 0xd10 rcl,rcr,rol,ror,sar,shl,shr + M1st_W_M8B_or_M2B, // 0xd11 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd12 + None, // 0xd13 + M1st_M1B, // 0xd20 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd21 + None, // 0xd22 + None, // 0xd23 + M1st_W_M8B_or_M4B, // 0xd30 rcl,rcr,rol,ror,sar,shl,shr + M1st_W_M8B_or_M2B, // 0xd31 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd32 + None, // 0xd33 + MOp_M16B_I1B, // 0xd40 sha1rnds4 + None, // 0xd41 + None, // 0xd42 + None, // 0xd43 + None, // 0xd50 + None, // 0xd51 + None, // 0xd52 + None, // 0xd53 + None, // 0xd60 + None, // 0xd61 + None, // 0xd62 + None, // 0xd63 + None, // 0xd70 + None, // 0xd71 + None, // 0xd72 + None, // 0xd73 + MOp_M16B, // 0xd80 sha1nexte + None, // 0xd81 + MOnly_MUnknown, // 0xd82 aesdecwide128kl,aesdecwide256kl,aesencwide128kl,aesencwide256kl + None, // 0xd83 + MOp_M16B, // 0xd90 sha1msg1 + None, // 0xd91 + None, // 0xd92 + None, // 0xd93 + MOp_M16B, // 0xda0 sha1msg2 + None, // 0xda1 + None, // 0xda2 + None, // 0xda3 + MOp_M16B, // 0xdb0 sha256rnds2 + None, // 0xdb1 + None, // 0xdb2 + None, // 0xdb3 + MOp_M16B, // 0xdc0 sha256msg1 + None, // 0xdc1 + MOp_MUnknown, // 0xdc2 aesenc128kl + None, // 0xdc3 + MOp_M16B, // 0xdd0 sha256msg2 + None, // 0xdd1 + MOp_MUnknown, // 0xdd2 aesdec128kl + None, // 0xdd3 + None, // 0xde0 + None, // 0xde1 + MOp_MUnknown, // 0xde2 aesenc256kl + None, // 0xde3 + None, // 0xdf0 + None, // 0xdf1 + MOp_MUnknown, // 0xdf2 aesdec256kl + None, // 0xdf3 + None, // 0xe00 + None, // 0xe01 + None, // 0xe02 + None, // 0xe03 + None, // 0xe10 + None, // 0xe11 + None, // 0xe12 + None, // 0xe13 + None, // 0xe20 + None, // 0xe21 + None, // 0xe22 + None, // 0xe23 + None, // 0xe30 + None, // 0xe31 + None, // 0xe32 + None, // 0xe33 + None, // 0xe40 + None, // 0xe41 + None, // 0xe42 + None, // 0xe43 + None, // 0xe50 + None, // 0xe51 + None, // 0xe52 + None, // 0xe53 + None, // 0xe60 + None, // 0xe61 + None, // 0xe62 + None, // 0xe63 + None, // 0xe70 + None, // 0xe71 + None, // 0xe72 + None, // 0xe73 + None, // 0xe80 + None, // 0xe81 + None, // 0xe82 + None, // 0xe83 + None, // 0xe90 + None, // 0xe91 + None, // 0xe92 + None, // 0xe93 + None, // 0xea0 + None, // 0xea1 + None, // 0xea2 + None, // 0xea3 + None, // 0xeb0 + None, // 0xeb1 + None, // 0xeb2 + None, // 0xeb3 + None, // 0xec0 + None, // 0xec1 + None, // 0xec2 + None, // 0xec3 + None, // 0xed0 + None, // 0xed1 + None, // 0xed2 + None, // 0xed3 + None, // 0xee0 + None, // 0xee1 + None, // 0xee2 + None, // 0xee3 + None, // 0xef0 + None, // 0xef1 + None, // 0xef2 + None, // 0xef3 + MOp_M1B, // 0xf00 crc32 + None, // 0xf01 + MOp_M16B, // 0xf02 invept + None, // 0xf03 + MOp_W_M8B_or_M4B, // 0xf10 crc32 + MOp_W_M8B_or_M2B, // 0xf11 crc32 + MOp_M16B, // 0xf12 invvpid + None, // 0xf13 + None, // 0xf20 + None, // 0xf21 + MOp_MUnknown, // 0xf22 invpcid + None, // 0xf23 + None, // 0xf30 + None, // 0xf31 + None, // 0xf32 + None, // 0xf33 + None, // 0xf40 + None, // 0xf41 + None, // 0xf42 + None, // 0xf43 + None, // 0xf50 + None, // 0xf51 + None, // 0xf52 + None, // 0xf53 + MOnly_M1B, // 0xf60 neg,not + None, // 0xf61 + None, // 0xf62 + None, // 0xf63 + MOnly_W_M8B_or_M4B, // 0xf70 neg,not + MOnly_W_M8B_or_M2B, // 0xf71 neg,not + None, // 0xf72 + None, // 0xf73 + None, // 0xf80 + MOp_MUnknown, // 0xf81 movdir64b + MOp_MUnknown, // 0xf82 enqcmds + MOp_MUnknown, // 0xf83 enqcmd + M1st_W_M8B_or_M4B, // 0xf90 movdiri + None, // 0xf91 + None, // 0xf92 + None, // 0xf93 + None, // 0xfa0 + None, // 0xfa1 + None, // 0xfa2 + None, // 0xfa3 + None, // 0xfb0 + None, // 0xfb1 + None, // 0xfb2 + None, // 0xfb3 + M1st_W_M8B_or_M4B, // 0xfc0 aadd + M1st_W_M8B_or_M4B, // 0xfc1 aand + M1st_W_M8B_or_M4B, // 0xfc2 axor + M1st_W_M8B_or_M4B, // 0xfc3 aor + None, // 0xfd0 + None, // 0xfd1 + None, // 0xfd2 + None, // 0xfd3 + MOnly_M1B, // 0xfe0 dec,inc + None, // 0xfe1 + None, // 0xfe2 + None, // 0xfe3 + MOnly_W_M8B_or_M4B, // 0xff0 dec,inc + None, // 0xff1 + None, // 0xff2 + None, // 0xff3 + }; } diff --git a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs index dbcec26dd67583..0e55c4c2d4fac3 100644 --- a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs +++ b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs @@ -23,9 +23,8 @@ public enum EncodingFlags : int P = 0x1, // OpSize (P)refix F2 = 0x2, F3 = 0x4, - Rex = 0x8, - W = 0x10, // VEX.W / EVEX.W + W = 0x10, // REX.W / REX2.W / VEX.W / EVEX.W L = 0x20, // VEX.L (for EVEX, see LL bits below) b = 0x40, // EVEX.b (broadcast/RC/SAE Context) @@ -102,21 +101,29 @@ internal enum Map { // Map None, - Primary, - Secondary, - F38, - F3A, + Primary, // legacy map 0 + Secondary, // 0F - legacy map 1 + F38, // 0F 38 - legacy map 2 + F3A, // 0F 3A - legacy map 3 Vex1, // mmmmm = 00001 (0F) Vex2, // mmmmm = 00010 (0F 38) Vex3, // mmmmm = 00011 (0F 3A) Evex_0F, // mmm = 001 Evex_0F38, // mmm = 010 Evex_0F3A, // mmm = 011 + Evex_4, // mmm = 100 // Extended EVEX legacy promoted map 0/1 } internal sealed partial class Amd64InstructionSample { - [GeneratedRegex(@"^\s*(?
0x[a-f0-9]+)\s[^:]*:\s*(?[0-9a-f ]*)\t(?(((rex[.WRXB]*)|(rep[nez]*)|(data16)|(addr32)|(lock)|(bnd)|(\{vex\})|([cdefgs]s)) +)*)(?\S+) *(?(\S[^#]*?)?)\s*(?#.*)?$", + [GeneratedRegex( + @"^\s*" + + @"(?
0x[0-9a-fA-F]+)\s[^:]*:\s*" + + @"(?[0-9a-fA-F][0-9a-fA-F]( [0-9a-fA-F][0-9a-fA-F])*)\s*" + + @"(?(((rex[.WRXB]*)|(\{rex2 0x[0-9a-fA-F][0-9a-fA-F]?\})|(rep[nez]*)|(data16)|(addr32)|(lock)|(bnd)|(\{vex\})|(\{evex\})|([cdefgs]s)) +)*)" + + @"(?\S+) *" + + @"(?(\S[^#]*?)?)\s*" + + @"(?#.*)?$", RegexOptions.ExplicitCapture)] private static partial Regex EncDisassemblySplit(); @@ -139,15 +146,28 @@ internal sealed partial class Amd64InstructionSample ["WORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M2B, ["WORD PTR [rip+0x53525150]{1to16}"] = SuffixFlags.M2B, ["WORD PTR [rip+0x53525150]{1to32}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to16}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to32}"] = SuffixFlags.M2B, ["DWORD PTR [rip+0x53525150]"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to2}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to4}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to16}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to2}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to4}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to16}"] = SuffixFlags.M4B, ["QWORD PTR [rip+0x53525150]"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to2}"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to4}"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to2}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to4}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M8B, ["OWORD PTR [rip+0x53525150]"] = SuffixFlags.M16B, ["XMMWORD PTR [rip+0x53525150]"] = SuffixFlags.M16B, ["YMMWORD PTR [rip+0x53525150]"] = SuffixFlags.M32B, @@ -181,7 +201,8 @@ public int opCodeExt { get { - const byte BytePP = 0x3; + const byte VEX_pp_mask = 0x3; + const byte EVEX_pp_mask = 0x3; byte opcode = encoding[opIndex]; byte pp = 0; @@ -203,14 +224,15 @@ public int opCodeExt case Map.Vex2: case Map.Vex3: // `pp` is the low 2 bits of the last byte of the VEX prefix (either 3-byte or 2-byte form). - pp = (byte)(encoding[opIndex - 1] & BytePP); + pp = (byte)(encoding[opIndex - 1] & VEX_pp_mask); break; case Map.Evex_0F: case Map.Evex_0F38: case Map.Evex_0F3A: + case Map.Evex_4: { var evex_p1 = encoding[opIndex - 2]; - pp = (byte)(evex_p1 & BytePP); + pp = (byte)(evex_p1 & EVEX_pp_mask); break; } default: @@ -307,6 +329,7 @@ internal enum Prefixes : byte AddSize = 0x67, Vex = 0xc4, VexShort = 0xc5, + Rex2 = 0xD5, Lock = 0xf0, Rep = 0xf2, Repne = 0xf3 @@ -321,6 +344,8 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) const byte RexMask = 0xf0; const byte RexW = 0x8; + const byte Rex2W = 0x8; + const byte Rex2_M0 = 0x80; const byte Vex_ByteW = 0x80; const byte Vex_ByteL = 0x04; @@ -365,7 +390,6 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) { byte rex = encoding[operandIndex++]; - flags |= EncodingFlags.Rex; if (Debug.debug) Console.WriteLine($" P:REX"); if ((rex & RexW) != 0) @@ -477,6 +501,11 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) map = Map.Evex_0F3A; if (Debug.debug) Console.WriteLine($" map: Evex_0F3A"); break; + case 0x4: + map = Map.Evex_4; + // Extended EVEX legacy promoted map 0/1. + if (Debug.debug) Console.WriteLine($" map: Evex_4"); + break; default: throw new Exception($"Unexpected EVEX map {encoding}"); } @@ -487,23 +516,55 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) if (Debug.debug) Console.WriteLine($" EVEX.W"); } - byte evex_LprimeL = (byte)((evex_p2 & Evex_ByteLprimeLmask) >> Evex_ByteLprimeLshift); - flags |= Util.ConvertEvexLLToEncodingFlags(evex_LprimeL); - if (Debug.debug) + if (evex_mmm != 4) // EVEX.L'L is not used in map 4 { - Console.WriteLine($" EVEX.L'L={evex_LprimeL:x1}"); + byte evex_LprimeL = (byte)((evex_p2 & Evex_ByteLprimeLmask) >> Evex_ByteLprimeLshift); + flags |= Util.ConvertEvexLLToEncodingFlags(evex_LprimeL); + if (Debug.debug) + { + Console.WriteLine($" EVEX.L'L={evex_LprimeL:x1}"); + } } - var evex_b = evex_p2 & 0x10; - if (evex_b != 0) + if (evex_mmm != 4) // EVEX.b is not used in map 4 { - flags |= EncodingFlags.b; - if (Debug.debug) Console.WriteLine($" EVEX.b"); + var evex_b = evex_p2 & 0x10; + if (evex_b != 0) + { + flags |= EncodingFlags.b; + if (Debug.debug) Console.WriteLine($" EVEX.b"); + } } operandIndex += 4; break; } + case Prefixes.Rex2: + { + if (Debug.debug) Console.WriteLine($" P:REX2"); + var byte1 = encoding[operandIndex + 1]; + + var rex2_m0 = byte1 & Rex2_M0; + if (rex2_m0 == 0) + { + map = Map.Primary; + if (Debug.debug) Console.WriteLine($" map: Primary"); + } + else + { + map = Map.Secondary; + if (Debug.debug) Console.WriteLine($" map: Secondary"); + } + + if ((byte1 & Rex2W) != 0) + { + flags |= EncodingFlags.W; + if (Debug.debug) Console.WriteLine($" P:REX2.W"); + } + + operandIndex += 2; + break; + } default: map = Map.Primary; if (Debug.debug) Console.WriteLine($" map: primary"); @@ -599,11 +660,19 @@ internal sealed partial class Amd64InstructionTableGenerator { private List samples = new List(); - private const string assemblyPrefix = " 0x000000000"; - private const string preTerminator = "58\t"; - private const string groupTerminator = "59\tpop"; + [GeneratedRegex(@"^\s+0x00000000")] + private static partial Regex AssemblyPrefix(); + + // The '0x' prefix is not included in the regex match. + [GeneratedRegex(@"^\s*0x(?
[0-9a-fA-F]+)", RegexOptions.ExplicitCapture)] + private static partial Regex AssemblyAddress(); - [GeneratedRegex(@"((\{vex\})|(\{bad\})|(\(bad\))|(\srex(\.[WRXB]*)?\s*(#.*)?$))")] + // NOTE: APX instructions push2/push2p/pop2/pop2p are not causing gdb to report an illegal instruction, + // which is causing problems. So manually disallow them. + // NOTE: we don't disqualify disassembly with `{evex}` in the text: there are some cases where an instruction + // can be encoded with either an EVEX or VEX encoding, and the disassembler will annotate the instruction with + // `{evex}` to indicate it is not the canonical encoding. + [GeneratedRegex(@"((push2)|(pop2)|(\{vex\})|(\{bad\})|(\(bad\))|(\srex(\.[WRXB]*)?\s*(#.*)?$))")] private static partial Regex BadDisassembly(); private List<(Map, int)> regExpandOpcodes; @@ -648,6 +717,7 @@ private Amd64InstructionTableGenerator() { Map.Evex_0F, new Dictionary() }, { Map.Evex_0F38, new Dictionary() }, { Map.Evex_0F3A, new Dictionary() }, + { Map.Evex_4, new Dictionary() }, }; ParseSamples(); @@ -658,32 +728,34 @@ private void ParseSamples() { string line; string sample = null; - bool saw58 = false; + int sampleAddress = 0; + + // Each sample is written out as 16 bytes of disassembly. If we hit bad disassembly, we need to skip to the next sample + // based on the disassembly address. + while ((line = Console.In.ReadLine()) != null) { //if (Debug.debug) Console.WriteLine($"line: {line}"); - if (sample == null) + var match = AssemblyAddress().Match(line); + if (!match.Success) { - // Ignore non-assembly lines - if (line.StartsWith(assemblyPrefix)) - sample = line.Trim(); continue; } + int lineAddress = int.Parse(match.Groups["address"].Value, NumberStyles.AllowHexSpecifier); - //if (Debug.debug) Console.WriteLine($"sample: {sample}"); - - // Each sample may contain multiple instructions - // We are only interested in the first of each group - // Each group is terminated by 0x58 then 0x59 which is a pop instruction - if (!saw58) + if (sample == null) { - saw58 = line.Contains(preTerminator); + sample = line.Trim(); + sampleAddress = lineAddress; + //if (Debug.debug) Console.WriteLine($"sample: ({sampleAddress:x}) {sample}"); continue; } - else if (!line.Contains(groupTerminator)) + + // Keep skipping instructions until we get to the next sample address. + if (lineAddress < sampleAddress + 15) { - saw58 = false; + //if (Debug.debug) Console.WriteLine($"Skipping {lineAddress:x}"); continue; } @@ -712,8 +784,8 @@ private void ParseSamples() } } - saw58 = false; sample = null; + sampleAddress = 0; } } @@ -947,13 +1019,22 @@ private void SummarizeSamples(bool reg) else goto default; break; + case SuffixFlags.M8B | SuffixFlags.M2B | SuffixFlags.I4B | SuffixFlags.I2B: + if (TestHypothesis((e) => Amd64W(SuffixFlags.M8B | SuffixFlags.I4B, SuffixFlags.M2B | SuffixFlags.I2B, e), sometimesSuffix, map)) + rules += "_W_M8B_I4B_or_M2B_I2B"; + else + goto default; + break; default: - if (Debug.debug) { - Console.WriteLine($"Unhandled rule...{sometimesSuffix}"); + string mnemonics_string = string.Join(",", mnemonics.OrderBy(s => s)); + if (Debug.debug) + { + Console.WriteLine($"Unhandled rule...{sometimesSuffix} : {mnemonics_string}"); + } + Console.Error.WriteLine($"Unhandled rule...{sometimesSuffix} : {mnemonics_string}"); + return; } - Console.Error.WriteLine($"Unhandled rule...{sometimesSuffix}"); - return; } rules = rules.Replace("^_", "").Replace("^", "None"); @@ -1003,12 +1084,12 @@ public static SuffixFlags TestLL(SuffixFlags LL00, SuffixFlags LL01, SuffixFlags public static SuffixFlags Amd64L(SuffixFlags t, SuffixFlags f, EncodingFlags g) => Test(EncodingFlags.L, t, f, g); public static SuffixFlags Amd64W(SuffixFlags W1, SuffixFlags W0, EncodingFlags g) => Test(EncodingFlags.W, W1, W0, g); - public static SuffixFlags Amd64P(SuffixFlags t, SuffixFlags f, EncodingFlags g) => Test(EncodingFlags.P, f, t, g); + public static SuffixFlags Amd64P(SuffixFlags P0, SuffixFlags P1, EncodingFlags g) => Test(EncodingFlags.P, P1, P0, g); // Note: P0/P1 reversed. This puts smaller (OSIZE override) second. public static SuffixFlags Amd64b(SuffixFlags b1, SuffixFlags b0, EncodingFlags g) => Test(EncodingFlags.b, b1, b0, g); // Tests for multiple flags - public static SuffixFlags Amd64WP(SuffixFlags tx, SuffixFlags ft, SuffixFlags ff, EncodingFlags g) => Amd64W(tx, Amd64P(ft, ff, g), g); + public static SuffixFlags Amd64WP(SuffixFlags W1, SuffixFlags W0P0, SuffixFlags W0P1, EncodingFlags g) => Amd64W(W1, Amd64P(W0P0, W0P1, g), g); public static SuffixFlags Amd64WLL(SuffixFlags W1LL00, SuffixFlags W1LL01, SuffixFlags W1LL10, SuffixFlags W0LL00, SuffixFlags W0LL01, SuffixFlags W0LL10, EncodingFlags g) => Amd64W(TestLL(W1LL00, W1LL01, W1LL10, g), TestLL(W0LL00, W0LL01, W0LL10, g), g); public static SuffixFlags Amd64bLL(SuffixFlags b1, SuffixFlags b0LL00, SuffixFlags b0LL01, SuffixFlags b0LL10, EncodingFlags g) => @@ -1035,7 +1116,7 @@ private void AddOpCode(Map map, int opCodeExt, bool reg, int modrmReg, string ru else { string oldstring = null; - if (Debug.debug) + if (true) // Debug.debug { if (opcodes[map].TryGetValue(opCodeExt, out oldstring)) { @@ -1046,6 +1127,9 @@ private void AddOpCode(Map map, int opCodeExt, bool reg, int modrmReg, string ru if (Debug.debug) { Console.WriteLine($"add opcodes[{map}][{opCodeExt:x3}] = {opcodes[map][opCodeExt]}"); + } + if (true) // Debug.debug + { if ((oldstring != null) && (oldstring != opcodes[map][opCodeExt])) { Console.WriteLine($"WARNING: REPLACEMENT WAS DIFFERENT"); @@ -1090,12 +1174,21 @@ private void WriteCode() Console.WriteLine(" // I4B // Instruction includes 4 bytes of immediates"); Console.WriteLine(" // I8B // Instruction includes 8 bytes of immediates"); Console.WriteLine(" // Unknown // Instruction samples did not include a modrm configured to produce RIP addressing"); - Console.WriteLine(" // L // Flags depend on L bit in encoding. L__or_"); - Console.WriteLine(" // LL // Flags depend on L'L bits in EVEX encoding. LL___"); - Console.WriteLine(" // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector"); - Console.WriteLine(" // W // Flags depend on W bit in encoding. W__or_"); - Console.WriteLine(" // P // Flags depend on OpSize prefix for encoding. P__or_"); - Console.WriteLine(" // WP // Flags depend on W bit in encoding and OpSize prefix. WP__or__or_"); + Console.WriteLine(" // L // Flags depend on L bit in encoding."); + Console.WriteLine(" // // L__or_"); + Console.WriteLine(" // // L__or_"); + Console.WriteLine(" // LL // Flags depend on L'L bits in EVEX encoding."); + Console.WriteLine(" // // LL___"); + Console.WriteLine(" // // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector"); + Console.WriteLine(" // W // Flags depend on W bit in encoding."); + Console.WriteLine(" // // W__or_"); + Console.WriteLine(" // // W__or_"); + Console.WriteLine(" // P // Flags depend on OpSize prefix for encoding."); + Console.WriteLine(" // // P__or_"); + Console.WriteLine(" // // P__or_"); + Console.WriteLine(" // WP // Flags depend on W bit in encoding and OpSize prefix."); + Console.WriteLine(" // // WP__or__or_"); + Console.WriteLine(" // // WP__or__or_"); Console.WriteLine(" // WLL // Flags depend on W and L'L bits."); Console.WriteLine(" // // WLL____or___"); Console.WriteLine(" // bLL // Flags depend on EVEX.b and L'L bits."); @@ -1114,15 +1207,15 @@ private void WriteCode() continue; Console.WriteLine($" {rule},"); } - Console.WriteLine($" Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location in encoded in lower bits"); + Console.WriteLine($" Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location is encoded in lower bits."); Console.WriteLine(" };"); Console.WriteLine(); - Console.WriteLine(" // The following instrForm maps correspond to the amd64 instr maps"); - Console.WriteLine(" // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics"); - Console.WriteLine(" // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp"); - Console.WriteLine(" // - For Vex* the pp is directly included in the encoding"); - Console.WriteLine(" // - For the Secondary, F38, and F3A pages the pp is not defined in the encoding, but affects instr form."); + Console.WriteLine(" // The following instrForm maps correspond to the amd64 instruction maps."); + Console.WriteLine(" // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics."); + Console.WriteLine(" // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp. For example, 0x123 is opcode 0x12, pp=0x3."); + Console.WriteLine(" // - For Vex* and EVEX the pp is directly included in the encoding"); + Console.WriteLine(" // - For the Secondary (0F), 0F 38, and 0F 3A pages the pp is not defined in the encoding, but affects instruction form."); Console.WriteLine(" // - pp = 0 implies no prefix."); Console.WriteLine(" // - pp = 1 implies 0x66 OpSize prefix only."); Console.WriteLine(" // - pp = 2 implies 0xF3 prefix."); @@ -1130,9 +1223,9 @@ private void WriteCode() Console.WriteLine(" // - For the primary map, pp is not used and is always 0 in the comments."); Console.WriteLine(); Console.WriteLine(); - Console.WriteLine(" // Instruction which change forms based on modrm.reg are encoded in this extension table."); - Console.WriteLine(" // Since there are 8 modrm.reg values, they occur is groups of 8."); - Console.WriteLine(" // Each group is referenced from the other tables below using Extension|(index >> 3)."); + Console.WriteLine(" // Instructions which change forms based on modrm.reg are encoded in this extension table."); + Console.WriteLine(" // Since there are 8 modrm.reg values, they occur in groups of 8."); + Console.WriteLine(" // Each group is referenced from the other tables below using (Extension|(index >> 3))."); currentExtension += 8; Console.WriteLine($" static const InstrForm instrFormExtension[{currentExtension + 1}]"); Console.WriteLine(" {"); @@ -1167,7 +1260,8 @@ private void WriteCode() ("Vex3", Map.Vex3), ("Evex_0F", Map.Evex_0F), ("Evex_0F38", Map.Evex_0F38), - ("Evex_0F3A", Map.Evex_0F3A) + ("Evex_0F3A", Map.Evex_0F3A), + ("Evex_4", Map.Evex_4) }; foreach ((string name, Map map) in mapTuples) diff --git a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp index 963401812ca819..abaaa165af2555 100644 --- a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp +++ b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp @@ -2,19 +2,89 @@ // The .NET Foundation licenses this file to you under the MIT license. #include - + #define ARRAYSIZE(a) (sizeof(a)/sizeof((a)[0])) +void generatePostamble(int bytesEmitted) +{ + // We need a postamble of single-byte instructions so the disassembler can get back on track + // after a bad instruction. We always pad up to 16 bytes total codes: the maximum x86 instruction + // size is 15, so the disassembler will find at worst a 15 byte instruction followed by a single byte + // padding instruction. The minimum byte sequence we generate below is a single opcode plus a modrm, + // so we need 14 possible postamble/padding bytes. + const char* postamble[] = { + "0x50, ", + "0x51, ", + "0x52, ", + "0x53, ", + "0x54, ", + "0x55, ", + "0x56, ", + "0x57, ", + "0x58, ", + "0x59, ", + "0x59, ", + "0x59, ", + "0x59, ", + "0x59, " + }; + + int bytesToEmit = 16 - bytesEmitted; + for (int i = 0; i < bytesToEmit; i++) + { + printf("%s", postamble[i]); + } + printf("\n"); +} + int main(int argc, char* argv[]) { printf("#include \n"); printf("#include \n"); - const char* postamble = "0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,\n"; + // The sequence of generated codes is important: the tool which reads the disassembled instructions + // processes one "opcode" at a time. When the opcode changes, it summarizes the opcode and moves on + // to process the next one. Here, "opcode" means a single instruction. In the x64 encoding, this + // can be determined by the primary opcode byte, the prefix (0x66, 0xF2, 0xF3) or equivalent "pp" field + // in the VEX/EVEX prefix, and the "reg/opcode" field of the ModRM byte, which sometimes provides additional + // "opcode" bits. + // + // When generating codes, for every primary opcode, we output the following ModRM bytes: + // 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d + // this corresponds to modrm.rm=0x5 and modrm.reg=0,1,2,3,4,5,6,7. That is, all possible modrm.reg values. + // modrm.mod=0/modrm.rm=0x5 corresponds to RIP-relative addressing. The purpose of varying modrm.reg + // is to find all cases where an instruction encoding depends on modrm.reg. + // + // Thus, the 'modrm' loop needs to be less nested than the opcode/prefix loop, since varying modrm + // can change the "instruction". + // + // Note: it might be more robust to not have this ordering restriction but that would require the + // processing tool to save all in-progress calculations, for all instructions -- perhaps using a + // lot of memory? printf("uint8_t opcodes[] = {\n"); - printf("// Primary Opcode\n"); + struct byteSequence { + const char* string; + int numBytes; + }; + + // Opcodes in legacy map 0 don't change the instruction based on the 0x66 prefix (unlike in + // other maps), so the 0x66 prefix can vary inside (in a more nested loop) the modrm loop. + const byteSequence legacyMap0PrefixStrings[] = { + { "", 0 }, + { "0x66, ", 1 }, // Operand size prefix 0x66 + { "0x40, ", 1 }, // REX + { "0x66, 0x40, ", 2 }, // Operand size prefix 0x66 + REX + { "0x4F, ", 1 }, // REX.WRXB + { "0x66, 0x4F, ", 2 }, // Operand size prefix 0x66 + REX.WRXB + { "0xD5, 0x00, ", 2 }, // REX2.M0=0.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0x66, 0xD5, 0x00, ", 3 }, // Operand size prefix 0x66 + REX2.M0=0.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0xD5, 0x7F, ", 2 }, // REX2.M0=0.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + { "0x66, 0xD5, 0x7F, ", 3 } // Operand size prefix 0x66 + REX2.M0=0.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + }; + + printf("// Primary Opcode (legacy map 0)\n"); for (int i = 0; i < 256; ++i) { switch(i) @@ -47,6 +117,7 @@ int main(int argc, char* argv[]) case 0x67: // AddrSize case 0xc4: // Vex 3 Byte case 0xc5: // Vex 2 Byte + case 0xd5: // REX2 case 0xf0: // Lock case 0xf2: // Repne case 0xf3: // Rep @@ -54,24 +125,36 @@ int main(int argc, char* argv[]) default: break; } + for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x%02x, 0x%02x, %s", i, modrm, postamble); - // REX - printf( "0x40, 0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x40, 0x%02x, 0x%02x, %s", i, modrm, postamble); - // REX.WRXB - printf( "0x4f, 0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x4f, 0x%02x, 0x%02x, %s", i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap0PrefixStrings); ++prefixNum) + { + printf("%s0x%02x, 0x%02x, ", legacyMap0PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(legacyMap0PrefixStrings[prefixNum].numBytes + 2); + } } printf("\n"); } // `66 F2` is only used for `0F 38 F*` ("row F") - const char* const ppString[] = {"", "0x66, ", "0xf3, ", "0xf2, ", "0x66, 0xf2, "}; + const byteSequence ppString[] = { + { "", 0 }, + { "0x66, ", 1 }, + { "0xf3, ", 1 }, + { "0xf2, ", 1 }, + { "0x66, 0xf2, ", 2 } + }; - printf("// Secondary Opcode\n"); + const byteSequence legacyMap1PrefixStrings[] = { + { "0x0F, ", 1 }, // Escape prefix + { "0x40, 0x0F, ", 2 }, // REX + { "0x4F, 0x0F, ", 2 }, // REX.WRXB + { "0xD5, 0x80, ", 2 }, // REX2.M0=1.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0xD5, 0xFF, ", 2 } // REX2.M0=1.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + }; + + printf("// Secondary Opcode: 0F (legacy map 1)\n"); for (int i = 0; i < 256; ++i) { if (i == 0x38) // extension: 0F 38 @@ -83,17 +166,23 @@ int main(int argc, char* argv[]) { for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "0x40, %s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "0x4f, %s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap1PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap1PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap1PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); } - printf("// 0F 38\n"); + const byteSequence legacyMap2PrefixStrings[] = { + { "0x0F, 0x38, ", 2 }, + { "0x40, 0x0F, 0x38, ", 3 }, // REX + { "0x4F, 0x0F, 0x38, ", 3 } // REX.WRXB + }; + + printf("// 0F 38 (legacy map 2)\n"); for (int i = 0; i < 256; ++i) { for (int pp = 0; pp < 5; ++pp) @@ -104,28 +193,34 @@ int main(int argc, char* argv[]) for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "%s0x40, 0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "%s0x4f, 0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap2PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap2PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap2PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); } - printf("// 0F 3A\n"); + const byteSequence legacyMap3PrefixStrings[] = { + { "0x0F, 0x3A, ", 2 }, + { "0x40, 0x0F, 0x3A, ", 3 }, // REX + { "0x4F, 0x0F, 0x3A, ", 3 } // REX.WRXB + }; + + printf("// 0F 3A (legacy map 3)\n"); for (int i = 0; i < 256; ++i) { for (int pp = 0; pp < 2; ++pp) // only 66 prefix is used (no F3, F2) (F2 is used in VEX 0F 3A) { for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "%s0x40, 0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "%s0x4f, 0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap3PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap3PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap3PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); @@ -157,7 +252,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe1, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe1, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -173,7 +269,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe2, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe2, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -189,7 +286,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe3, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe3, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -199,21 +297,30 @@ int main(int argc, char* argv[]) // Interesting cases for the EVEX prefix. Several cases are added below, in the loops, to ensure desired // ordering: // 1. cases of `mmm` (which defines the opcode decoding map) are the outer loops. - // 2. cases of `pp`, next inner loops. - // 3. cases of ModR/M byte, innermost loops. + // 2. one-byte instruction opcode, next inner loops. + // 3. cases of `pp`, next inner loops. + // 4. cases of ModR/M byte, next inner loops. + // 5. various EVEX cases, innermost loops. + // NOTE: 4 & 5 can probably (and possibly should, for consistency with above loops) be swapped. // // In all cases, we have: // P0: // P[3] = P0[3] = 0 // required by specification + // -- For APX, mmm=0b100, P[3] = B4, 0 is ok // EVEX.R'=1 (inverted) + // -- For APX, mmm=0b100, EVEX.R' = EVEX.R4 (inverted) = P[4]. 1 (inverted value) is ok // EVEX.RXB=111 (inverted) + // -- For APX, mmm=0b100, EVEX.RXB (inverted) = EVEX.R3.X3.B3 (inverted), so 111 is ok. // P1: // P[10] = P1[2] = 1 // required by specification + // -- For APX, mmm=0b100, EVEX.X4/1 (inverted) so 1 is ok // P2: - // EVEX.aaa = 0 // opmask register k0 (no masking) - // EVEX.V'=1 (inverted) - // EVEX.b=0 // no broadcast (REVIEW: need to handle broadcast as it changes the size of the memory operand) - // EVEX.z=0 // always merge + // P[18:16] = P2[2:0] = EVEX.aaa = 0 // opmask register k0 (no masking) + // -- For APX, mmm=0b100, P2[0] = P2[1] = 0, P2[2] = NF = 0 (same as non-APX) + // P[19] = P2[3] = EVEX.V'=1 (inverted) + // -- For APX, mmm=0b100, EVEX.V' = EVEX.V4 (inverted), so 1 is ok. + // P[23] = P2[7] = EVEX.z=0 // always merge + // -- For APX, mmm=0b100, P[23] = 0. // // Note that we don't need to consider disp8*N compressed displacement support since that is not used for // RIP-relative addressing, which is all we care about. @@ -222,6 +329,10 @@ int main(int argc, char* argv[]) const int evex_p1_base = 0x04; const int evex_p2_base = 0x08; + const int evex_4_p0_base = 0xf0; + const int evex_4_p1_base = 0x7c; + const int evex_4_p2_base = 0x08; + const int evex_w_cases[] = // EVEX.W in P1 { 0, @@ -229,6 +340,8 @@ int main(int argc, char* argv[]) }; const size_t evex_w_cases_size = ARRAYSIZE(evex_w_cases); + // For APX, mmm=0b100, EVEX.vvvv is used to store NDD register if EVEX.ND=1. We never set EVEX.ND=1 + // since it doesn't affect instruction size or RIP-relative memory information. const int evex_vvvv_cases[] = // EVEX.vvvv in P1 { 0, // 0000b (xmm15) @@ -236,6 +349,7 @@ int main(int argc, char* argv[]) }; const size_t evex_vvvv_cases_size = ARRAYSIZE(evex_vvvv_cases); + // For APX, mmm=0b100, P[22:21] = P2[6:5] = EVEX.L'L and must be zero. const int evex_LprimeL_cases[] = // EVEX.L'L in P2 { 0, // 00b = 128-bit vectors @@ -244,6 +358,7 @@ int main(int argc, char* argv[]) }; const size_t evex_LprimeL_cases_size = ARRAYSIZE(evex_LprimeL_cases); + // -- For APX, mmm=0b100, P[20] = P2[4] = EVEX.b = EVEX.ND, so we keep it zero const int evex_b_cases[] = // EVEX.b in P2 { 0, // 0b = no broadcast @@ -251,14 +366,17 @@ int main(int argc, char* argv[]) }; const size_t evex_b_cases_size = ARRAYSIZE(evex_b_cases); - const size_t total_evex_cases = evex_w_cases_size * evex_vvvv_cases_size * evex_LprimeL_cases_size * evex_b_cases_size; + const size_t total_evex_cases = evex_w_cases_size * evex_vvvv_cases_size * evex_LprimeL_cases_size * evex_b_cases_size; + const size_t total_evex_4_cases = evex_w_cases_size; struct EvexBytes { int p0, p1, p2; - } - EvexCases[total_evex_cases]; - + }; + + EvexBytes EvexCases[total_evex_cases]; // cases for mmm=0b001, 0b010, 0b011 + EvexBytes Evex4Cases[total_evex_4_cases]; // cases for mmm=0b100 + size_t evex_case = 0; for (size_t i = 0; i < evex_w_cases_size; i++) { @@ -277,6 +395,15 @@ int main(int argc, char* argv[]) } } + evex_case = 0; + for (size_t i = 0; i < evex_w_cases_size; i++) + { + Evex4Cases[evex_case].p0 = evex_4_p0_base; + Evex4Cases[evex_case].p1 = evex_4_p1_base | evex_w_cases[i]; + Evex4Cases[evex_case].p2 = evex_4_p2_base; + ++evex_case; + } + printf("// EVEX: mmm=001 (0F)\n"); for (int i = 0; i < 256; ++i) { @@ -289,7 +416,8 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x1; // mmm=001 (0F) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } @@ -308,7 +436,8 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x2; // mmm=010 (0F 38) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } @@ -327,7 +456,28 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x3; // mmm=011 (0F 3A) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); + } + } + } + printf("\n"); + } + + printf("// EVEX: mmm=100 (extended EVEX; APX promoted legacy map 0 instructions)\n"); + for (int i = 0; i < 256; ++i) + { + for (int pp = 0; pp < 4; ++pp) + { + for (int modrm = 0x5; modrm < 64; modrm += 8) + { + for (int c = 0; c < ARRAYSIZE(Evex4Cases); ++c) + { + int evex_p0 = Evex4Cases[c].p0 | 0x4; // mmm=100 + int evex_p1 = Evex4Cases[c].p1 | pp; + int evex_p2 = Evex4Cases[c].p2; + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } diff --git a/src/coreclr/debug/ee/amd64/walker.cpp b/src/coreclr/debug/ee/amd64/walker.cpp index 4eef90d526a2b1..5ed3b9c8e319be 100644 --- a/src/coreclr/debug/ee/amd64/walker.cpp +++ b/src/coreclr/debug/ee/amd64/walker.cpp @@ -23,6 +23,8 @@ // void NativeWalker::Decode() { + LOG((LF_CORDB, LL_INFO100000, "NW:Decode: m_ip 0x%p\n", m_ip)); + const BYTE *ip = m_ip; m_type = WALK_UNKNOWN; @@ -30,13 +32,13 @@ void NativeWalker::Decode() m_nextIP = NULL; BYTE rex = 0; - - LOG((LF_CORDB, LL_INFO100000, "NW:Decode: m_ip 0x%p\n", m_ip)); + BYTE rex2_payload = 0; + bool has_rex2 = false; BYTE prefix = *ip; if (prefix == 0xcc) { - prefix = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); + prefix = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); // REVIEW: change `m_ip` to `ip`? LOG((LF_CORDB, LL_INFO100000, "NW:Decode 1st byte was patched, might have been prefix\n")); } @@ -65,8 +67,13 @@ void NativeWalker::Decode() // String REP prefixes case 0xf2: // REPNE/REPNZ case 0xf3: - LOG((LF_CORDB, LL_INFO10000, "NW:Decode: prefix:%0.2x ", prefix)); + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: prefix:%02x ", prefix)); ip++; + // REVIEW: it looks like a bug that we don't loop here looking for additional + // prefixes (the 'continue' branches to the 'while (0)' which exits the loop). + // Thus, we will only process a single prefix. For example, we won't process + // "66 40", which is an operand size prefix followed by a REX prefix, and is legal. + // REX and REX2 need to be the final prefixes, but even then, looping would be safe. continue; // REX register extension prefixes @@ -86,13 +93,27 @@ void NativeWalker::Decode() case 0x4d: case 0x4e: case 0x4f: - LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX prefix:%0.2x ", prefix)); + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX prefix:%02x ", prefix)); // make sure to set rex to prefix, not *ip because *ip still represents the // codestream which has a 0xcc in it. rex = prefix; ip++; continue; + // REX2 register extension prefix + case 0xd5: + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX2 prefix:%02x ", prefix)); + has_rex2 = true; + ip++; + rex2_payload = *ip; // Get the REX2 payload byte + if (rex2_payload == 0xcc) + { + rex2_payload = (BYTE)DebuggerController::GetPatchedOpcode(ip); + LOG((LF_CORDB, LL_INFO100000, "NW:Decode 2nd byte was patched, REX2 prefix payload byte\n")); + } + ip++; + continue; + default: break; } @@ -101,18 +122,18 @@ void NativeWalker::Decode() // Read the opcode m_opcode = *ip++; - LOG((LF_CORDB, LL_INFO100000, "NW:Decode: ip 0x%p, m_opcode:%0.2x\n", ip, m_opcode)); + LOG((LF_CORDB, LL_INFO100000, "NW:Decode: ip 0x%p, m_opcode:%02x\n", ip, m_opcode)); // Don't remove this, when we did the check above for the prefix we didn't modify the codestream // and since m_opcode was just taken directly from the code stream it will be patched if we // didn't have a prefix if (m_opcode == 0xcc) { - m_opcode = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); - LOG((LF_CORDB, LL_INFO100000, "NW:Decode after patch look up: m_opcode:%0.2x\n", m_opcode)); + m_opcode = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); // REVIEW: it looks like a bug that we use 'm_ip' instead of 'ip' here. + LOG((LF_CORDB, LL_INFO100000, "NW:Decode after patch look up: m_opcode:%02x\n", m_opcode)); } - // Setup rex bits if needed + // Setup REX bits if needed BYTE rex_b = 0; BYTE rex_x = 0; BYTE rex_r = 0; @@ -124,29 +145,52 @@ void NativeWalker::Decode() rex_r = (rex & 0x4) >> 2; // high bit to modrm reg field } + // Setup REX2 bits if needed + BYTE rex2_b3 = 0; + BYTE rex2_b4 = 0; + BYTE rex2_x3 = 0; + BYTE rex2_x4 = 0; + BYTE rex2_r3 = 0; + BYTE rex2_r4 = 0; + + // We could have a REX2 prefix with a zero payload byte, but that would leave these fields all zero, which is correct. + if (rex2_payload != 0) + { + rex2_b3 = rex2_payload & 0x1; + rex2_x3 = (rex2_payload >> 1) & 0x1; + rex2_r3 = (rex2_payload >> 2) & 0x1; + rex2_b4 = (rex2_payload >> 4) & 0x1; + rex2_x4 = (rex2_payload >> 5) & 0x1; + rex2_r4 = (rex2_payload >> 6) & 0x1; + } + // Analyze what we can of the opcode switch (m_opcode) { + // Look for CALL, JMP with opcode 0xFF, modrm.reg=2,3,4,5 case 0xff: { BYTE modrm = *ip++; - // Ignore "inc dword ptr [reg]" instructions - if (modrm == 0) - break; - BYTE mod = (modrm & 0xC0) >> 6; BYTE reg = (modrm & 0x38) >> 3; BYTE rm = (modrm & 0x07); - reg |= (rex_r << 3); - rm |= (rex_b << 3); - - if ((reg < 2) || (reg > 5 && reg < 8) || (reg > 15)) { - // not a valid register for a CALL or BRANCH + if ((reg < 2) || (reg > 5)) { + // Not a CALL/JMP instruction (modrm.reg field is an opcode extension for opcode FF) return; } + BYTE rm_reg = rm; + if (rex != 0) + { + rm_reg |= (rex_b << 3); + } + else if (rex2_payload != 0) + { + rm_reg |= (rex2_b3 << 3) | (rex2_b4 << 4); + } + BYTE *result; WORD displace; @@ -158,12 +202,12 @@ void NativeWalker::Decode() case 0: case 1: case 2: - if ((rm & 0x07) == 4) // we have an SIB byte following + if (rm == 4) // we have an SIB byte following { // // Get values from the SIB byte // - BYTE sib = *ip; + BYTE sib = *ip; _ASSERT(sib != 0); @@ -171,21 +215,31 @@ void NativeWalker::Decode() BYTE index = (sib & 0x38) >> 3; BYTE base = (sib & 0x07); - index |= (rex_x << 3); - base |= (rex_b << 3); + BYTE index_reg = index; + BYTE base_reg = base; + if (rex != 0) + { + index_reg |= (rex_x << 3); + base_reg |= (rex_b << 3); + } + else if (rex2_payload != 0) + { + index_reg |= (rex2_x3 << 3) | (rex2_x4 << 4); + base_reg |= (rex2_b3 << 3) | (rex2_b4 << 4); + } ip++; // // Get starting value // - if ((mod == 0) && ((base & 0x07) == 5)) + if ((mod == 0) && (base == 5)) { result = 0; } else { - result = (BYTE *)(size_t)GetRegisterValue(base); + result = (BYTE *)(size_t)GetRegisterValue(base_reg); } // @@ -193,7 +247,7 @@ void NativeWalker::Decode() // if (index != 0x4) { - result = result + (GetRegisterValue(index) << ss); + result = result + (GetRegisterValue(index_reg) << ss); } // @@ -201,7 +255,7 @@ void NativeWalker::Decode() // if (mod == 0) { - if ((base & 0x07) == 5) + if (base == 5) { result = result + *((INT32*)ip); displace = 7; @@ -221,7 +275,6 @@ void NativeWalker::Decode() result = result + *((INT32*)ip); displace = 7; } - } else { @@ -230,28 +283,32 @@ void NativeWalker::Decode() // // Check for RIP-relative addressing mode. - if ((mod == 0) && ((rm & 0x07) == 5)) + if ((mod == 0) && (rm == 5)) { + // [RIP + disp32] displace = 6; // 1 byte opcode + 1 byte modrm + 4 byte displacement (signed) result = const_cast(m_ip) + displace + *(reinterpret_cast(ip)); } else { - result = (BYTE *)GetRegisterValue(rm); + result = (BYTE *)GetRegisterValue(rm_reg); if (mod == 0) { - displace = 2; + // [modrm.rm] + displace = 2; // 1 byte opcode + 1 byte modrm } else if (mod == 1) { + // [modrm.rm + disp8] result = result + *((INT8*)ip); - displace = 3; + displace = 3; // 1 byte opcode + 1 byte modrm + 1 byte displacement } else // mod == 2 { + // [modrm.rm + disp32] result = result + *((INT32*)ip); - displace = 6; + displace = 6; // 1 byte opcode + 1 byte modrm + 4 byte displacement (signed) } } } @@ -266,9 +323,9 @@ void NativeWalker::Decode() case 3: default: // The operand is stored in a register. - result = (BYTE *)GetRegisterValue(rm); - displace = 2; - + // [modrm.rm] + result = (BYTE *)GetRegisterValue(rm_reg); + displace = 2; // 1 byte opcode + 1 byte modrm break; } @@ -280,6 +337,11 @@ void NativeWalker::Decode() displace++; } + if (has_rex2) // Can't just check `rex2_payload` since that payload byte might be zero. + { + displace += 2; // adjust for the size of the REX2 prefix + } + // because we already checked register validity for CALL/BRANCH // instructions above we can assume that there is no other option if ((reg == 4) || (reg == 5)) @@ -344,52 +406,71 @@ UINT64 NativeWalker::GetRegisterValue(int registerNumber) { case 0: return m_registers->pCurrentContext->Rax; - break; case 1: return m_registers->pCurrentContext->Rcx; - break; case 2: return m_registers->pCurrentContext->Rdx; - break; case 3: return m_registers->pCurrentContext->Rbx; - break; case 4: return m_registers->pCurrentContext->Rsp; - break; case 5: return m_registers->pCurrentContext->Rbp; - break; case 6: return m_registers->pCurrentContext->Rsi; - break; case 7: return m_registers->pCurrentContext->Rdi; - break; case 8: return m_registers->pCurrentContext->R8; - break; case 9: return m_registers->pCurrentContext->R9; - break; case 10: return m_registers->pCurrentContext->R10; - break; case 11: return m_registers->pCurrentContext->R11; - break; case 12: return m_registers->pCurrentContext->R12; - break; case 13: return m_registers->pCurrentContext->R13; - break; case 14: return m_registers->pCurrentContext->R14; - break; case 15: return m_registers->pCurrentContext->R15; - break; +#if 0 + // TODO-XArch-APX: The Windows SDK doesn't define the APX eGPR registers yet. + case 16: + return m_registers->pCurrentContext->R16; + case 17: + return m_registers->pCurrentContext->R17; + case 18: + return m_registers->pCurrentContext->R18; + case 19: + return m_registers->pCurrentContext->R19; + case 20: + return m_registers->pCurrentContext->R21; + case 21: + return m_registers->pCurrentContext->R21; + case 22: + return m_registers->pCurrentContext->R22; + case 23: + return m_registers->pCurrentContext->R23; + case 24: + return m_registers->pCurrentContext->R24; + case 25: + return m_registers->pCurrentContext->R25; + case 26: + return m_registers->pCurrentContext->R26; + case 27: + return m_registers->pCurrentContext->R27; + case 28: + return m_registers->pCurrentContext->R28; + case 29: + return m_registers->pCurrentContext->R29; + case 30: + return m_registers->pCurrentContext->R30; + case 31: + return m_registers->pCurrentContext->R31; +#endif default: _ASSERTE(!"Invalid register number!"); } @@ -431,8 +512,6 @@ static bool InstructionHasModRMByte(Amd64InstrDecode::InstrForm form, bool W) modrm = false; break; default: - if (form & Amd64InstrDecode::InstrForm::Extension) - modrm = true; break; } return modrm; @@ -446,15 +525,15 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) // M1st cases (memory operand comes first) case Amd64InstrDecode::InstrForm::M1st_I1B_L_M16B_or_M8B: case Amd64InstrDecode::InstrForm::M1st_I1B_LL_M8B_M16B_M32B: + case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::M1st_I1B_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_L_M32B_or_M16B: case Amd64InstrDecode::InstrForm::M1st_LL_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::M1st_LL_M2B_M4B_M8B: case Amd64InstrDecode::InstrForm::M1st_LL_M4B_M8B_M16B: case Amd64InstrDecode::InstrForm::M1st_LL_M8B_M16B_M32B: - case Amd64InstrDecode::InstrForm::M1st_bLL_M4B_M16B_M32B_M64B: - case Amd64InstrDecode::InstrForm::M1st_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::M1st_M16B: case Amd64InstrDecode::InstrForm::M1st_M16B_I1B: case Amd64InstrDecode::InstrForm::M1st_M1B: @@ -469,6 +548,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) case Amd64InstrDecode::InstrForm::M1st_W_M4B_or_M1B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::M1st_WP_M8B_or_M4B_or_M2B: @@ -482,6 +562,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) case Amd64InstrDecode::InstrForm::MOnly_P_M6B_or_M4B: case Amd64InstrDecode::InstrForm::MOnly_W_M16B_or_M8B: case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M8B_or_M2B: isWrite = true; @@ -495,7 +576,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, bool W, bool L, bool evex_b, int LL, bool fPrefix66) { uint8_t opSize = 0; - bool P = !((pp == 1) || fPrefix66); + const bool P = ((pp == 1) || fPrefix66); switch (form) { // M32B @@ -545,6 +626,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // W_M8B_or_M4B case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::MOp_I1B_W_M8B_or_M4B: @@ -553,7 +635,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // WP_M8B_or_M8B_or_M2B case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M8B_or_M2B: - opSize = W ? 8 : P ? 8 : 2; + opSize = W ? 8 : P ? 2 : 8; break; // WP_M8B_or_M4B_or_M2B case Amd64InstrDecode::InstrForm::M1st_I1B_WP_M8B_or_M4B_or_M2B: @@ -563,11 +645,14 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, case Amd64InstrDecode::InstrForm::MOp_I1B_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_or_M4B_or_M2B: - opSize = W ? 8 : P ? 4 : 2; + opSize = W ? 8 : P ? 2 : 4; break; // W_M8B_or_M2B + case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M2B: + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_W_M8B_or_M2B: + case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M2B: opSize = W ? 8 : 2; break; // M8B @@ -581,7 +666,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // P_M6B_or_M4B case Amd64InstrDecode::InstrForm::MOnly_P_M6B_or_M4B: - opSize = P ? 6 : 4; + opSize = P ? 4 : 6; break; // M4B case Amd64InstrDecode::InstrForm::M1st_M4B: @@ -660,7 +745,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // bLL_M4B_M16B_M32B_M64B - case Amd64InstrDecode::InstrForm::M1st_bLL_M4B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_I1B_bLL_M4B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_bLL_M4B_M16B_M32B_M64B: if (evex_b) @@ -674,7 +758,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // bLL_M8B_M16B_M32B_M64B - case Amd64InstrDecode::InstrForm::M1st_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_I1B_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_bLL_M8B_M16B_M32B_M64B: if (evex_b) @@ -779,7 +862,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, } break; - // MUnknown case Amd64InstrDecode::InstrForm::M1st_MUnknown: case Amd64InstrDecode::InstrForm::MOnly_MUnknown: @@ -796,7 +878,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, bool fPrefix66) { int immSize = 0; - bool P = !((pp == 1) || fPrefix66); + const bool P = ((pp == 1) || fPrefix66); switch (form) { case Amd64InstrDecode::InstrForm::I1B: @@ -833,6 +915,7 @@ static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, immSize = 3; break; case Amd64InstrDecode::InstrForm::I4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: immSize = 4; break; case Amd64InstrDecode::InstrForm::I8B: @@ -841,10 +924,13 @@ static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, case Amd64InstrDecode::InstrForm::M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::WP_I4B_or_I4B_or_I2B: - immSize = W ? 4 : P ? 4 : 2; + immSize = W ? 4 : P ? 2 : 4; break; case Amd64InstrDecode::InstrForm::WP_I8B_or_I4B_or_I2B: - immSize = W ? 8 : P ? 4 : 2; + immSize = W ? 8 : P ? 2 : 4; + break; + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: + immSize = W ? 4 : 2; break; default: @@ -966,12 +1052,14 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio VexMapC40F3A = 0xc403, EvexMap0F = 0x6201, EvexMap0F38 = 0x6202, - EvexMap0F3A = 0x6203 + EvexMap0F3A = 0x6203, + EvexMap4 = 0x6204 } opCodeMap; switch (*address) { case 0xf: + { switch (address[1]) { case 0x38: @@ -999,6 +1087,7 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio else if (fPrefixF3) pp = 0x2; break; + } case 0xc4: // Vex 3-byte { @@ -1052,7 +1141,10 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio case 0x62: // Evex { - BYTE evex_mmm = address[1] & 0x7; + BYTE evex_p0 = address[1]; + BYTE evex_p1 = address[2]; + BYTE evex_p2 = address[3]; + BYTE evex_mmm = evex_p0 & 0x7; switch (evex_mmm) { case 0x1: @@ -1067,29 +1159,64 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio LOG((LF_CORDB, LL_INFO10000, "map:Evex0F3A ")); opCodeMap = EvexMap0F3A; break; + case 0x4: + LOG((LF_CORDB, LL_INFO10000, "map:Evex4 ")); + opCodeMap = EvexMap4; + break; default: _ASSERT(!"Unknown Evex 'mmm' bytes"); return; } - BYTE evex_w = address[2] & 0x80; + BYTE evex_w = evex_p1 & 0x80; if (evex_w != 0) { W = true; } - if ((address[2] & 0x10) != 0) + if (evex_mmm != 4) { - evex_b = true; - } + if ((evex_p2 & 0x10) != 0) + { + evex_b = true; + } - evex_LL = (address[2] >> 5) & 0x3; + evex_LL = (evex_p2 >> 5) & 0x3; + } - pp = address[1] & 0x3; + pp = evex_p1 & 0x3; address += 4; break; } + case 0xD5: // REX2 + { + BYTE rex2_byte1 = address[1]; + address += 2; + + BYTE rex2_w = rex2_byte1 & 0x08; + if (rex2_w != 0) + { + W = true; + } + + if (fPrefix66) + { + pp = 0x1; + } + + BYTE rex2_m0 = rex2_byte1 & 0x80; + if (rex2_m0 == 0) + { + opCodeMap = Primary; + } + else + { + opCodeMap = Secondary; + } + break; + } + default: opCodeMap = Primary; break; @@ -1105,7 +1232,7 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio switch (opCodeMap) { case Primary: - form = Amd64InstrDecode::instrFormPrimary[opCode]; + form = Amd64InstrDecode::instrFormPrimary[opCode]; // NOTE: instrFormPrimary is the only map that uses 'opCode', not 'opCodeExt'. break; case Secondary: form = Amd64InstrDecode::instrFormSecondary[opCodeExt]; @@ -1134,6 +1261,9 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio case EvexMap0F3A: form = Amd64InstrDecode::instrFormEvex_0F3A[opCodeExt]; break; + case EvexMap4: + form = Amd64InstrDecode::instrFormEvex_4[opCodeExt]; + break; default: _ASSERTE(false); } @@ -1227,4 +1357,3 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio } #endif // TARGET_AMD64 -