Skip to content

Commit f3e5b9c

Browse files
committed
Instruction formatting works in-place and spares a memcpy
1 parent 589ecbb commit f3e5b9c

File tree

4 files changed

+81
-77
lines changed

4 files changed

+81
-77
lines changed

examples/cs/distorm-net/diStorm3.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ public struct _WString
3535
[StructLayout(LayoutKind.Sequential, Pack = 8)]
3636
public struct _DecodedInst
3737
{
38+
public IntPtr offset; /* Start offset of the decoded instruction. */
39+
public uint size; /* Size of decoded instruction. */
3840
public _WString mnemonic; /* Mnemonic of decoded instruction, prefixed if required by REP, LOCK etc. */
3941
public _WString operands; /* Operands of the decoded instruction, up to 3 operands, comma-seperated. */
4042
public _WString instructionHex; /* Hex dump - little endian, including prefixes. */
41-
public uint size; /* Size of decoded instruction. */
42-
public IntPtr offset; /* Start offset of the decoded instruction. */
4343
};
4444

4545

@@ -199,11 +199,11 @@ private static unsafe void Free(void* mem)
199199
private static unsafe DecodedInst CreateDecodedInstObj(_DecodedInst* inst)
200200
{
201201
return new DecodedInst {
202+
Offset = inst->offset,
203+
Size = inst->size,
202204
Mnemonic = new String(inst->mnemonic.p),
203205
Operands = new String(inst->operands.p),
204-
Hex = new string(inst->instructionHex.p),
205-
Size = inst->size,
206-
Offset = inst->offset
206+
Hex = new string(inst->instructionHex.p)
207207
};
208208
}
209209

include/distorm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,11 @@ typedef struct {
271271
* This structure holds all information the disassembler generates per instruction.
272272
*/
273273
typedef struct {
274+
_OffsetType offset; /* Start offset of the decoded instruction. */
275+
unsigned int size; /* Size of decoded instruction in bytes. */
274276
_WString mnemonic; /* Mnemonic of decoded instruction, prefixed if required by REP, LOCK etc. */
275277
_WString operands; /* Operands of the decoded instruction, up to 3 operands, comma-seperated. */
276278
_WString instructionHex; /* Hex dump - little endian, including prefixes. */
277-
unsigned int size; /* Size of decoded instruction in bytes. */
278-
_OffsetType offset; /* Start offset of the decoded instruction. */
279279
} _DecodedInst;
280280

281281
#endif /* DISTORM_LIGHT */

python/distorm3/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,11 @@ class _CodeInfo (Structure):
101101

102102
class _DecodedInst (Structure):
103103
_fields_ = [
104+
('offset', _OffsetType),
105+
('size', c_uint),
104106
('mnemonic', _WString),
105107
('operands', _WString),
106-
('instructionHex', _WString),
107-
('size', c_uint),
108-
('offset', _OffsetType),
108+
('instructionHex', _WString)
109109
]
110110

111111
# _OperandType enum

src/distorm.c

Lines changed: 71 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,17 @@ This library is licensed under the BSD license. See the file COPYING.
2929
return DECRES_SUCCESS;
3030
}
3131

32-
/* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */
33-
*usedInstructionsCount = 0;
34-
3532
if ((ci == NULL) ||
3633
(ci->codeLen < 0) ||
37-
((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) ||
34+
((unsigned)ci->dt > (unsigned)Decode64Bits) ||
3835
(ci->code == NULL) ||
3936
(result == NULL) ||
37+
(maxInstructions == 0) ||
4038
((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)))
4139
{
4240
return DECRES_INPUTERR;
4341
}
4442

45-
/* Assume length=0 is success. */
46-
if (ci->codeLen == 0) {
47-
return DECRES_SUCCESS;
48-
}
49-
5043
return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount);
5144
}
5245

@@ -128,6 +121,7 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
128121
}
129122
}
130123

124+
/* WARNING: This function is written carefully to be able to work with same input and output buffer in-place! */
131125
#ifdef SUPPORT_64BIT_OFFSET
132126
_DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
133127
#else
@@ -140,23 +134,27 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
140134
uint64_t addrMask = (uint64_t)-1;
141135
uint8_t segment;
142136
const _WMnemonic* mnemonic;
137+
unsigned int suffixSize = 0;
143138

144139
/* Set address mask, when default is for 64bits addresses. */
145140
if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
146141
else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;
147142

148-
/* Copy other fields. */
149-
result->size = di->size;
150-
result->offset = di->addr;
151-
152143
if (di->flags == FLAG_NOT_DECODABLE) {
144+
/* In-place considerations: DI is RESULT. Deref fields first. */
145+
unsigned int size = di->size;
146+
unsigned int byte = di->imm.byte;
147+
_OffsetType offset = di->addr & addrMask;
148+
153149
str = &result->mnemonic;
154-
result->offset &= addrMask;
150+
strclear_WS(&result->instructionHex);
151+
str_hex_b(&result->instructionHex, byte);
152+
153+
result->size = size;
154+
result->offset = offset;
155155
strclear_WS(&result->operands);
156156
strcpy_WSN(str, "DB ");
157-
str_code_hb(str, di->imm.byte);
158-
strclear_WS(&result->instructionHex);
159-
str_hex_b(&result->instructionHex, di->imm.byte);
157+
str_code_hb(str, byte);
160158
return; /* Skip to next instruction. */
161159
}
162160

@@ -166,33 +164,6 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
166164
for (i = 0; i < di->size; i++)
167165
str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);
168166

169-
/* Truncate address now. */
170-
result->offset &= addrMask;
171-
172-
str = &result->mnemonic;
173-
switch (FLAG_GET_PREFIX(di->flags))
174-
{
175-
case FLAG_LOCK:
176-
strcpy_WSN(str, "LOCK ");
177-
break;
178-
case FLAG_REP:
179-
/* REP prefix for CMPS and SCAS is really a REPZ. */
180-
if ((di->opcode == I_CMPS) || (di->opcode == I_SCAS)) strcpy_WSN(str, "REPZ ");
181-
else strcpy_WSN(str, "REP ");
182-
break;
183-
case FLAG_REPNZ:
184-
strcpy_WSN(str, "REPNZ ");
185-
break;
186-
default:
187-
/* Init mnemonic string, cause next touch is concatenation. */
188-
strclear_WS(str);
189-
break;
190-
}
191-
192-
mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
193-
memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
194-
str->length += mnemonic->length;
195-
196167
/* Format operands: */
197168
str = &result->operands;
198169
strclear_WS(str);
@@ -211,15 +182,8 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
211182
* to indicate size of operation and continue to next instruction.
212183
*/
213184
if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) {
214-
str = &result->mnemonic;
215-
switch (di->ops[0].size)
216-
{
217-
case 8: chrcat_WS(str, 'B'); break;
218-
case 16: chrcat_WS(str, 'W'); break;
219-
case 32: chrcat_WS(str, 'D'); break;
220-
case 64: chrcat_WS(str, 'Q'); break;
221-
}
222-
return;
185+
suffixSize = di->ops[0].size / 8;
186+
goto skipOperands;
223187
}
224188
}
225189

@@ -338,6 +302,54 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
338302

339303
if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
340304
else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
305+
306+
skipOperands:
307+
{
308+
/* In-place considerations: DI is RESULT. Deref fields first. */
309+
unsigned int size = di->size;
310+
_OffsetType offset = di->addr & addrMask;
311+
unsigned int prefix = FLAG_GET_PREFIX(di->flags);
312+
313+
mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
314+
315+
str = &result->mnemonic;
316+
if (prefix) {
317+
switch (prefix)
318+
{
319+
case FLAG_LOCK:
320+
strcpy_WSN(str, "LOCK ");
321+
break;
322+
case FLAG_REP:
323+
/* REP prefix for CMPS and SCAS is really a REPZ. */
324+
if ((di->opcode == I_CMPS) || (di->opcode == I_SCAS)) strcpy_WSN(str, "REPZ ");
325+
else strcpy_WSN(str, "REP ");
326+
break;
327+
case FLAG_REPNZ:
328+
strcpy_WSN(str, "REPNZ ");
329+
break;
330+
}
331+
}
332+
else {
333+
/* Init mnemonic string. */
334+
str->length = 0;
335+
}
336+
337+
memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
338+
str->length += mnemonic->length;
339+
340+
if (suffixSize) {
341+
switch (suffixSize)
342+
{
343+
case 1: chrcat_WS(str, 'B'); break;
344+
case 2: chrcat_WS(str, 'W'); break;
345+
case 4: chrcat_WS(str, 'D'); break;
346+
case 8: chrcat_WS(str, 'Q'); break;
347+
}
348+
}
349+
350+
result->offset = offset;
351+
result->size = size;
352+
}
341353
}
342354

343355
#ifdef SUPPORT_64BIT_OFFSET
@@ -347,7 +359,6 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
347359
#endif
348360
{
349361
_DecodeResult res;
350-
_DInst di;
351362
_CodeInfo ci;
352363
unsigned int instsCount = 0, i;
353364

@@ -358,19 +369,15 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
358369
return DECRES_INPUTERR;
359370
}
360371

361-
if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) {
372+
if ((unsigned)dt > (unsigned)Decode64Bits) {
362373
return DECRES_INPUTERR;
363374
}
364375

365-
if (code == NULL || result == NULL) {
376+
/* Make sure there's at least one instruction in the result buffer. */
377+
if ((code == NULL) || (result == NULL) || (maxInstructions == 0)) {
366378
return DECRES_INPUTERR;
367379
}
368380

369-
/* Assume length=0 is success. */
370-
if (codeLen == 0) {
371-
return DECRES_SUCCESS;
372-
}
373-
374381
/*
375382
* We have to format the result into text. But the interal decoder works with the new structure of _DInst.
376383
* Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's.
@@ -390,14 +397,11 @@ static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t
390397

391398
res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount);
392399
for (i = 0; i < instsCount; i++) {
393-
if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR;
394-
395-
/* Copy the current decomposed result to a temp structure, so we can override the result with text. */
396-
memcpy(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst));
400+
/* distorm_format is optimized and can work with same input/output buffer in-place. */
397401
#ifdef SUPPORT_64BIT_OFFSET
398-
distorm_format64(&ci, &di, &result[i]);
402+
distorm_format64(&ci, (_DInst*)&result[i], &result[i]);
399403
#else
400-
distorm_format32(&ci, &di, &result[i]);
404+
distorm_format32(&ci, (_DInst*)&result[i], &result[i]);
401405
#endif
402406
}
403407

0 commit comments

Comments
 (0)