Skip to content

Commit

Permalink
Modify irbloc destination mecanism. Rework API in consequence.
Browse files Browse the repository at this point in the history
Fat patch here: some API have changed.

Each irbloc now affects a special "IRDst" register which is used to
describe the destination irbloc. It allows simple description of
architectures using delay slots. Architectures semantic and tcc/python
jitter are modified in consequence. LLVM jitter is disabled for now,
but should be patch soon.
  • Loading branch information
serpilliere committed Sep 5, 2014
1 parent e8d0fcf commit 6e09df7
Show file tree
Hide file tree
Showing 50 changed files with 1,195 additions and 855 deletions.
60 changes: 41 additions & 19 deletions example/asm_x86.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,49 @@ def my_ast_id2expr(t):

blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, '''
main:
PUSH EBP
MOV EBP, ESP
SUB ESP, 0x100
MOV EAX, 0x1337
LEA ESI, DWORD PTR [mystr]
CALL toto
toto:
POP EDI
PUSH 0
FLD1
FLD1
FADD ST, ST(1)
FIST DWORD PTR [ESP]
POP EAX
MOV ESP, EBP
POP EBP
RET
PUSH EBP
MOV EBP, ESP
SUB ESP, 0x100
MOV EAX, 0x1337
; test ptr manip
LEA ESI, DWORD PTR [mystr^toto]
CALL toto
mystr:
.string "test string"
toto:
POP EDI
PUSH EDI
; test scasb
XOR EAX, EAX
XOR ECX, ECX
DEC ECX
REPNE SCASB
NOT ECX
DEC ECX
; test movsb
POP ESI
LEA EDI, DWORD PTR [EBP-0x100]
REPE MOVSB
; test float
PUSH 0
FLD1
FLD1
FADD ST, ST(1)
FIST DWORD PTR [ESP]
POP EAX
; test cond mnemo
NOP
NOP
CMOVZ EAX, EBX
MOV ESP, EBP
POP EBP
RET
''')

# fix shellcode addr
Expand Down
10 changes: 5 additions & 5 deletions example/expression/manip_expression2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@
"""

arch = mn_x86
my_ir = ir_a_x86_32()
ir_arch = ir_a_x86_32()

l = arch.fromstring('LODSB', 32)
l.offset, l.l = 0, 15
my_ir.add_instr(l)
ir_arch.add_instr(l)

print '*' * 80
for lbl, b in my_ir.blocs.items():
for lbl, b in ir_arch.blocs.items():
print b
for irs in b.irs:
o_r, o_w = get_rw(irs)
print 'read: ', [str(x) for x in o_r]
print 'written:', [str(x) for x in o_w]
print
my_ir.gen_graph()
g = my_ir.graph()
ir_arch.gen_graph()
g = ir_arch.graph()
open('graph_instr.txt', 'w').write(g)
34 changes: 17 additions & 17 deletions example/expression/manip_expression4.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ def get_modified_symbols(sb):
return out


def intra_bloc_flow_symb(my_ir, flow_graph, irbloc):
def intra_bloc_flow_symb(ir_arch, flow_graph, irbloc):
symbols_init = {}
for i, r in enumerate(all_regs_ids):
symbols_init[r] = all_regs_ids_init[i]
sb = symbexec(mn_x86, symbols_init)
sb = symbexec(ir_arch, symbols_init)
sb.emulbloc(irbloc)
print '*' * 40
print irbloc
Expand Down Expand Up @@ -138,19 +138,19 @@ def node2str(self, n):
return out


def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool):
def gen_bloc_data_flow_graph(ir_arch, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool):
out_str = ""

# my_ir = ir_x86_32(symbol_pool)
# ir_arch = ir_x86_32(symbol_pool)

for irbloc in my_ir.blocs.values():
for irbloc in ir_arch.blocs.values():
print irbloc

my_ir.gen_graph()
my_ir.dead_simp()
ir_arch.gen_graph()
ir_arch.dead_simp()

irbloc_0 = None
for irbloc in my_ir.blocs.values():
for irbloc in ir_arch.blocs.values():
if irbloc.label.offset == ad:
irbloc_0 = irbloc
break
Expand All @@ -162,17 +162,17 @@ def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc

bloc2w = {}

for irbloc in my_ir.blocs.values():
intra_bloc_flow_raw(my_ir, flow_graph, irbloc)
# intra_bloc_flow_symb(my_ir, flow_graph, irbloc)
for irbloc in ir_arch.blocs.values():
intra_bloc_flow_raw(ir_arch, flow_graph, irbloc)
# intra_bloc_flow_symb(ir_arch, flow_graph, irbloc)

for irbloc in my_ir.blocs.values():
for irbloc in ir_arch.blocs.values():
print irbloc
print 'IN', [str(x) for x in irbloc.in_nodes]
print 'OUT', [str(x) for x in irbloc.out_nodes]

print '*' * 20, 'interbloc', '*' * 20
inter_bloc_flow(my_ir, flow_graph, irbloc_0.label)
inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label)

# sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core')
# from graph_qt import graph_qt
Expand All @@ -191,19 +191,19 @@ def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc


print 'generating dataflow graph for:'
my_ir = ir_a_x86_32(mdis.symbol_pool)
ir_arch = ir_a_x86_32(mdis.symbol_pool)

blocs = ab
for bloc in blocs:
print bloc
my_ir.add_bloc(bloc)
for irbloc in my_ir.blocs.values():
ir_arch.add_bloc(bloc)
for irbloc in ir_arch.blocs.values():
print irbloc
if irbloc.label.offset != 0:
continue


out_str = gen_bloc_data_flow_graph(my_ir, mdis.bs, ad)
out_str = gen_bloc_data_flow_graph(ir_arch, mdis.bs, ad)

print '*' * 40
print """
Expand Down
16 changes: 8 additions & 8 deletions example/expression/manip_expression6.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,26 @@ def my_ast_int2expr(a):
resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, 32, blocs, symbol_pool)

# Translate to IR
my_ir = ir_a_x86_32(symbol_pool)
ir_arch = ir_a_x86_32(symbol_pool)
for b in blocs:
print 'add bloc'
print b
my_ir.add_bloc(b)
ir_arch.add_bloc(b)

# Display IR
for lbl, b in my_ir.blocs.items():
for lbl, b in ir_arch.blocs.items():
print b

# Dead propagation
my_ir.gen_graph()
out = my_ir.graph()
ir_arch.gen_graph()
out = ir_arch.graph()
open('graph.txt', 'w').write(out)
print '*' * 80
my_ir.dead_simp()
out2 = my_ir.graph()
ir_arch.dead_simp()
out2 = ir_arch.graph()
open('graph2.txt', 'w').write(out2)

# Display new IR
print 'new ir blocs'
for lbl, b in my_ir.blocs.items():
for lbl, b in ir_arch.blocs.items():
print b
37 changes: 20 additions & 17 deletions example/expression/solve_condition_stp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine

from pdb import pm


filename = os.environ.get('PYTHONSTARTUP')
if filename and os.path.isfile(filename):
Expand All @@ -35,34 +37,34 @@
sys.exit(0)


def get_bloc(my_ir, mdis, ad):
def get_bloc(ir_arch, mdis, ad):
if isinstance(ad, asmbloc.asm_label):
l = ad
else:
l = mdis.symbol_pool.getby_offset_create(ad)
if not l in my_ir.blocs:
if not l in ir_arch.blocs:
ad = l.offset
b = mdis.dis_bloc(ad)
my_ir.add_bloc(b)
b = my_ir.get_bloc(l)
ir_arch.add_bloc(b)
b = ir_arch.get_bloc(l)
if b is None:
raise LookupError('no bloc found at that address: %s' % l)
return b


def emul_symb(my_ir, mdis, states_todo, states_done):
def emul_symb(ir_arch, mdis, states_todo, states_done):
while states_todo:
ad, symbols, conds = states_todo.pop()
print '*' * 40, "addr", ad, '*' * 40
if (ad, symbols, conds) in states_done:
print 'skip', ad
continue
states_done.add((ad, symbols, conds))
sb = symbexec(mn, {})
sb = symbexec(ir_arch, {})
sb.symbols = symbols.copy()
if my_ir.pc in sb.symbols:
del(sb.symbols[my_ir.pc])
b = get_bloc(my_ir, mdis, ad)
if ir_arch.pc in sb.symbols:
del(sb.symbols[ir_arch.pc])
b = get_bloc(ir_arch, mdis, ad)

print 'run bloc'
print b
Expand Down Expand Up @@ -156,35 +158,36 @@ def my_ast_id2expr(t):
my_symbols = dict([(x.name, x) for x in my_symbols])
my_symbols.update(mn_x86.regs.all_regs_ids_byname)

sb = symbexec(mn, symbols_init)
ir_arch = ir_x86_32(mdis.symbol_pool)

sb = symbexec(ir_arch, symbols_init)

blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, '''
PUSH argv
PUSH argc
PUSH ret_addr
''')

my_ir = ir_x86_32(mdis.symbol_pool)

b = blocs[0][0]
print b
# add fake address and len to parsed instructions
for i, l in enumerate(b.lines):
l.offset, l.l = i, 1
my_ir.add_bloc(b)
irb = get_bloc(my_ir, mdis, 0)
ir_arch.add_bloc(b)
irb = get_bloc(ir_arch, mdis, 0)
sb.emulbloc(irb)
sb.dump_mem()

# reset my_ir blocs
my_ir.blocs = {}
# reset ir_arch blocs
ir_arch.blocs = {}

states_todo = set()
states_done = set()
states_todo.add((uint32(ad), sb.symbols, ()))

# emul blocs, propagate states
emul_symb(my_ir, mdis, states_todo, states_done)
emul_symb(ir_arch, mdis, states_todo, states_done)

all_info = []

Expand All @@ -199,7 +202,7 @@ def my_ast_id2expr(t):

all_cases = set()

sb = symbexec(mn, symbols_init)
sb = symbexec(ir_arch, symbols_init)
for ad, reqs_cond in all_info:
all_ids = set()
for k, v in reqs_cond:
Expand Down
2 changes: 1 addition & 1 deletion example/symbol_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
symbols_init = {}
for i, r in enumerate(all_regs_ids):
symbols_init[r] = all_regs_ids_init[i]
symb = symbexec(mn_x86, symbols_init)
symb = symbexec(ir, symbols_init)

block = ir.get_bloc(0)

Expand Down
12 changes: 6 additions & 6 deletions example/test_dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,19 +223,19 @@
if options.gen_ir:
log.info("generating IR")

my_ir = ira(mdis.symbol_pool)
my_ir.blocs = {}
ir_arch = ira(mdis.symbol_pool)
ir_arch.blocs = {}
for ad, all_bloc in all_funcs_blocs.items():
log.info("generating IR... %x" % ad)
for b in all_bloc:
my_ir.add_bloc(b)
ir_arch.add_bloc(b)

log.info("Gen Graph... %x" % ad)

my_ir.gen_graph()
ir_arch.gen_graph()

if options.simplify:
my_ir.dead_simp()
ir_arch.dead_simp()

out = my_ir.graph()
out = ir_arch.graph()
open('graph_irflow.txt', 'w').write(out)
Loading

0 comments on commit 6e09df7

Please sign in to comment.