Skip to content

Commit 3aeb282

Browse files
committed
Fixed size register & mnemonic copying
1 parent f3e5b9c commit 3aeb282

File tree

7 files changed

+51
-28
lines changed

7 files changed

+51
-28
lines changed

disOps/disOps.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#
22
# disOps.py v 1.0.0
33
#
4-
# Copyright (C) 2003-2018 Gil Dabah, http://ragestorm.net/distorm/
4+
# Copyright (C) 2003-2020 Gil Dabah, http://ragestorm.net/distorm/
55
#
66
# disOps is a part of the diStorm project, but can be used for anything.
77
# The generated output is tightly coupled with diStorm data structures which can be found at instructions.h.
@@ -80,7 +80,8 @@ def CreateMnemonicsC(mnemonicsIds):
8080
s += "\"\\\\x%02x\" \"%s\\\\0\" " % (len(i[0]), i[0])
8181
if len(s) - s.rfind("\n") >= 76:
8282
s += "\\\\\n"
83-
s = s[:-1] + ";" # Ignore last space.
83+
s = s[:-1] # Ignore last space.
84+
s += " \\\\\\n\"" + "\\\\x00" * 20 + "\"; /* Sentinel mnemonic. */"
8485
# Return enum & mnemonics.
8586
return (opsEnum, s)
8687

@@ -133,7 +134,7 @@ def WriteMnemonicsC(mnemonicsIds):
133134
path = os.path.join("..", "src", "mnemonics.c")
134135
print("- Try rewriting mnemonics for %s." % path)
135136
old = open(path, "r").read()
136-
rePattern = "const unsigned char _MNEMONICS\[\] =.*?;"
137+
rePattern = "const unsigned char _MNEMONICS\[\] =.*?\*/"
137138
if re.compile(rePattern, reFlags).search(old) == None:
138139
raise Exception("Couldn't find matching mnemonics text block for substitution in " + path)
139140
new = re.sub(rePattern, m, old, 1, reFlags)

disOps/registers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
regsText += " "
3737
regsEnum += " "
3838
old = i
39-
regsText += "{%d, \"%s\"}\n};\n" % (len(old), old)
39+
regsText += "{%d, \"%s\"},\n\t{0, \"\"} /* There must be an empty last reg see, strcat_WSR. */\n};\n" % (len(old), old)
4040
regsEnum += "R_" + old + "\n} _RegisterType;\n"
4141

4242
print(regsEnum)

include/mnemonics.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ typedef struct WMnemonic {
2525

2626
typedef struct WRegister {
2727
unsigned int length;
28-
unsigned char p[6]; /* p is a null terminated string. */
28+
unsigned char p[12]; /* p is a null terminated string. */
2929
} _WRegister;
3030

3131
extern const unsigned char _MNEMONICS[];

src/distorm.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
192192
switch (di->ops[i].type)
193193
{
194194
case O_REG:
195-
strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
195+
strcat_WSR(str, &_REGISTERS[di->ops[i].index]);
196196
break;
197197
case O_IMM:
198198
/* If the instruction is 'push', show explicit size (except byte imm). */
@@ -218,7 +218,7 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
218218
distorm_format_size(str, di, i);
219219
chrcat_WS(str, OPEN_CHR);
220220
if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
221-
strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
221+
strcat_WSR(str, &_REGISTERS[SEGMENT_GET(di->segment)]);
222222
chrcat_WS(str, SEG_OFF_CHR);
223223
}
224224
tmpDisp64 = di->disp & addrMask;
@@ -254,11 +254,11 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
254254
case I_SCAS: isDefault = FALSE; break;
255255
}
256256
if (!isDefault && (segment != R_NONE)) {
257-
strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
257+
strcat_WSR(str, &_REGISTERS[segment]);
258258
chrcat_WS(str, SEG_OFF_CHR);
259259
}
260260

261-
strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
261+
strcat_WSR(str, &_REGISTERS[di->ops[i].index]);
262262

263263
distorm_format_signed_disp(str, di, addrMask);
264264
chrcat_WS(str, CLOSE_CHR);
@@ -267,14 +267,14 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
267267
distorm_format_size(str, di, i);
268268
chrcat_WS(str, OPEN_CHR);
269269
if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
270-
strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
270+
strcat_WSR(str, &_REGISTERS[SEGMENT_GET(di->segment)]);
271271
chrcat_WS(str, SEG_OFF_CHR);
272272
}
273273
if (di->base != R_NONE) {
274-
strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
274+
strcat_WSR(str, &_REGISTERS[di->base]);
275275
chrcat_WS(str, PLUS_DISP_CHR);
276276
}
277-
strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
277+
strcat_WSR(str, &_REGISTERS[di->ops[i].index]);
278278
if (di->scale != 0) {
279279
chrcat_WS(str, '*');
280280
if (di->scale == 2) chrcat_WS(str, '2');
@@ -334,7 +334,12 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
334334
str->length = 0;
335335
}
336336

337-
memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
337+
/*
338+
* Always copy 16 bytes from the mnemonic, we have a sentinel padding so we can read past.
339+
* This helps the compiler to remove the call to memcpy and therefore makes this copying much faster.
340+
* The longest instruction is exactly 16 chars long, but we null terminate the string below.
341+
*/
342+
memcpy((int8_t*)&str->p[str->length], mnemonic->p, 16);
338343
str->length += mnemonic->length;
339344

340345
if (suffixSize) {
@@ -347,6 +352,8 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
347352
}
348353
}
349354

355+
str->p[str->length] = 0;
356+
350357
result->offset = offset;
351358
result->size = size;
352359
}

src/insts.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ This library is licensed under the BSD license. See the file COPYING.
1515

1616

1717
/*
18-
* GENERATED BY disOps at Tue Apr 21 19:11:41 2020
18+
* GENERATED BY disOps at Tue Apr 21 23:47:49 2020
1919
*/
2020

2121
_InstInfo II_MOVSXD = /*II*/ {0x1d4, 10027};

src/mnemonics.c

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -291,22 +291,24 @@ const unsigned char _MNEMONICS[] =
291291
"\x08" "RDGSBASE\0" "\x07" "LDMXCSR\0" "\x08" "WRFSBASE\0" "\x08" "VLDMXCSR\0" \
292292
"\x07" "STMXCSR\0" "\x08" "WRGSBASE\0" "\x08" "VSTMXCSR\0" "\x07" "VMPTRLD\0" \
293293
"\x07" "VMCLEAR\0" "\x05" "VMXON\0" "\x06" "MOVSXD\0" "\x05" "PAUSE\0" \
294-
"\x04" "WAIT\0" "\x06" "RDRAND\0" "\x06" "_3DNOW\0";
294+
"\x04" "WAIT\0" "\x06" "RDRAND\0" "\x06" "_3DNOW\0" \
295+
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; /* Sentinel mnemonic. */
295296

296297
const _WRegister _REGISTERS[] = {
297-
{ 3, "RAX" }, { 3, "RCX" }, { 3, "RDX" }, { 3, "RBX" }, { 3, "RSP" }, { 3, "RBP" }, { 3, "RSI" }, { 3, "RDI" }, { 2, "R8" }, { 2, "R9" }, { 3, "R10" }, { 3, "R11" }, { 3, "R12" }, { 3, "R13" }, { 3, "R14" }, { 3, "R15" },
298-
{ 3, "EAX" }, { 3, "ECX" }, { 3, "EDX" }, { 3, "EBX" }, { 3, "ESP" }, { 3, "EBP" }, { 3, "ESI" }, { 3, "EDI" }, { 3, "R8D" }, { 3, "R9D" }, { 4, "R10D" }, { 4, "R11D" }, { 4, "R12D" }, { 4, "R13D" }, { 4, "R14D" }, { 4, "R15D" },
299-
{ 2, "AX" }, { 2, "CX" }, { 2, "DX" }, { 2, "BX" }, { 2, "SP" }, { 2, "BP" }, { 2, "SI" }, { 2, "DI" }, { 3, "R8W" }, { 3, "R9W" }, { 4, "R10W" }, { 4, "R11W" }, { 4, "R12W" }, { 4, "R13W" }, { 4, "R14W" }, { 4, "R15W" },
300-
{ 2, "AL" }, { 2, "CL" }, { 2, "DL" }, { 2, "BL" }, { 2, "AH" }, { 2, "CH" }, { 2, "DH" }, { 2, "BH" }, { 3, "R8B" }, { 3, "R9B" }, { 4, "R10B" }, { 4, "R11B" }, { 4, "R12B" }, { 4, "R13B" }, { 4, "R14B" }, { 4, "R15B" },
301-
{ 3, "SPL" }, { 3, "BPL" }, { 3, "SIL" }, { 3, "DIL" },
302-
{ 2, "ES" }, { 2, "CS" }, { 2, "SS" }, { 2, "DS" }, { 2, "FS" }, { 2, "GS" },
303-
{ 3, "RIP" },
304-
{ 3, "ST0" }, { 3, "ST1" }, { 3, "ST2" }, { 3, "ST3" }, { 3, "ST4" }, { 3, "ST5" }, { 3, "ST6" }, { 3, "ST7" },
305-
{ 3, "MM0" }, { 3, "MM1" }, { 3, "MM2" }, { 3, "MM3" }, { 3, "MM4" }, { 3, "MM5" }, { 3, "MM6" }, { 3, "MM7" },
306-
{ 4, "XMM0" }, { 4, "XMM1" }, { 4, "XMM2" }, { 4, "XMM3" }, { 4, "XMM4" }, { 4, "XMM5" }, { 4, "XMM6" }, { 4, "XMM7" }, { 4, "XMM8" }, { 4, "XMM9" }, { 5, "XMM10" }, { 5, "XMM11" }, { 5, "XMM12" }, { 5, "XMM13" }, { 5, "XMM14" }, { 5, "XMM15" },
307-
{ 4, "YMM0" }, { 4, "YMM1" }, { 4, "YMM2" }, { 4, "YMM3" }, { 4, "YMM4" }, { 4, "YMM5" }, { 4, "YMM6" }, { 4, "YMM7" }, { 4, "YMM8" }, { 4, "YMM9" }, { 5, "YMM10" }, { 5, "YMM11" }, { 5, "YMM12" }, { 5, "YMM13" }, { 5, "YMM14" }, { 5, "YMM15" },
308-
{ 3, "CR0" }, { 0, "" }, { 3, "CR2" }, { 3, "CR3" }, { 3, "CR4" }, { 0, "" }, { 0, "" }, { 0, "" }, { 3, "CR8" },
309-
{ 3, "DR0" }, { 3, "DR1" }, { 3, "DR2" }, { 3, "DR3" }, { 0, "" }, { 0, "" }, { 3, "DR6" }, { 3, "DR7" }
298+
{3, "RAX"}, {3, "RCX"}, {3, "RDX"}, {3, "RBX"}, {3, "RSP"}, {3, "RBP"}, {3, "RSI"}, {3, "RDI"}, {2, "R8"}, {2, "R9"}, {3, "R10"}, {3, "R11"}, {3, "R12"}, {3, "R13"}, {3, "R14"}, {3, "R15"},
299+
{3, "EAX"}, {3, "ECX"}, {3, "EDX"}, {3, "EBX"}, {3, "ESP"}, {3, "EBP"}, {3, "ESI"}, {3, "EDI"}, {3, "R8D"}, {3, "R9D"}, {4, "R10D"}, {4, "R11D"}, {4, "R12D"}, {4, "R13D"}, {4, "R14D"}, {4, "R15D"},
300+
{2, "AX"}, {2, "CX"}, {2, "DX"}, {2, "BX"}, {2, "SP"}, {2, "BP"}, {2, "SI"}, {2, "DI"}, {3, "R8W"}, {3, "R9W"}, {4, "R10W"}, {4, "R11W"}, {4, "R12W"}, {4, "R13W"}, {4, "R14W"}, {4, "R15W"},
301+
{2, "AL"}, {2, "CL"}, {2, "DL"}, {2, "BL"}, {2, "AH"}, {2, "CH"}, {2, "DH"}, {2, "BH"}, {3, "R8B"}, {3, "R9B"}, {4, "R10B"}, {4, "R11B"}, {4, "R12B"}, {4, "R13B"}, {4, "R14B"}, {4, "R15B"},
302+
{3, "SPL"}, {3, "BPL"}, {3, "SIL"}, {3, "DIL"},
303+
{2, "ES"}, {2, "CS"}, {2, "SS"}, {2, "DS"}, {2, "FS"}, {2, "GS"},
304+
{3, "RIP"},
305+
{3, "ST0"}, {3, "ST1"}, {3, "ST2"}, {3, "ST3"}, {3, "ST4"}, {3, "ST5"}, {3, "ST6"}, {3, "ST7"},
306+
{3, "MM0"}, {3, "MM1"}, {3, "MM2"}, {3, "MM3"}, {3, "MM4"}, {3, "MM5"}, {3, "MM6"}, {3, "MM7"},
307+
{4, "XMM0"}, {4, "XMM1"}, {4, "XMM2"}, {4, "XMM3"}, {4, "XMM4"}, {4, "XMM5"}, {4, "XMM6"}, {4, "XMM7"}, {4, "XMM8"}, {4, "XMM9"}, {5, "XMM10"}, {5, "XMM11"}, {5, "XMM12"}, {5, "XMM13"}, {5, "XMM14"}, {5, "XMM15"},
308+
{4, "YMM0"}, {4, "YMM1"}, {4, "YMM2"}, {4, "YMM3"}, {4, "YMM4"}, {4, "YMM5"}, {4, "YMM6"}, {4, "YMM7"}, {4, "YMM8"}, {4, "YMM9"}, {5, "YMM10"}, {5, "YMM11"}, {5, "YMM12"}, {5, "YMM13"}, {5, "YMM14"}, {5, "YMM15"},
309+
{3, "CR0"}, {0, ""}, {3, "CR2"}, {3, "CR3"}, {3, "CR4"}, {0, ""}, {0, ""}, {0, ""}, {3, "CR8"},
310+
{3, "DR0"}, {3, "DR1"}, {3, "DR2"}, {3, "DR3"}, {0, ""}, {0, ""}, {3, "DR6"}, {3, "DR7"},
311+
{0, ""} /* There must be an empty last reg see, strcat_WSR. */
310312
};
311313

312314
#endif /* DISTORM_LIGHT */

src/wstring.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ This library is licensed under the BSD license. See the file COPYING.
1313
#define WSTRING_H
1414

1515
#include "config.h"
16+
#include "../include/mnemonics.h"
1617

1718
#ifndef DISTORM_LIGHT
1819

@@ -22,6 +23,18 @@ void strcpylen_WS(_WString* s, const int8_t* buf, unsigned int len);
2223
void strcatlen_WS(_WString* s, const int8_t* buf, unsigned int len);
2324
void strcat_WS(_WString* s, const _WString* s2);
2425

26+
_INLINE_ void strcat_WSR(_WString* str, const _WRegister* reg)
27+
{
28+
/*
29+
* Longest register name is YMM15 - 5 characters,
30+
* copy 8 so compiler can do a QWORD move.
31+
* We copy nul termination and fix the length, so it's okay to copy more to the output buffer.
32+
* There's a sentinel register to make sure we don't read past the end of the registers table.
33+
*/
34+
memcpy((int8_t*)&str->p[str->length], (const int8_t*)reg->p, 8);
35+
str->length += reg->length;
36+
}
37+
2538
/*
2639
* Warning, this macro should be used only when the compiler knows the size of string in advance!
2740
* This macro is used in order to spare the call to strlen when the strings are known already.

0 commit comments

Comments
 (0)