From 07bdc17e2ecfbd1e5be36d0122e227692055299d Mon Sep 17 00:00:00 2001 From: Xiaozhu Meng Date: Thu, 25 Oct 2018 09:06:21 -0500 Subject: [PATCH] 1. Fix inconsistent block splits 2. Fix non-returning function analysis for PLT stubs, where a PLT stub may first be set to RETURN and then set to NORETURN. 3. When parsing call fallthrough edge, the corresponding call edge may still point to sink (not handled yet), which causes the code to believe it is an indirect call. So, change the code to look up callee by using the callee entry address. --- parseAPI/src/Function.C | 4 +- parseAPI/src/Parser.C | 116 ++++++++++++++++++++++++----------- parseAPI/src/ParserDetails.C | 2 +- 3 files changed, 84 insertions(+), 38 deletions(-) diff --git a/parseAPI/src/Function.C b/parseAPI/src/Function.C index e570c3e2d9..92cde08894 100644 --- a/parseAPI/src/Function.C +++ b/parseAPI/src/Function.C @@ -501,7 +501,9 @@ void Function::set_retstatus(FuncReturnStatus rs) // But on powerpc, the function contains a BLR instruction, // looking like a return instruction, but actually is not. if (obj()->cs()->nonReturning(_name) && rs != NORETURN) return; - + assert(!(_rs == RETURN && rs == NORETURN)); + assert(!(_rs == NORETURN && rs == RETURN)); + // If we are changing the return status, update prev counter if (_rs != UNSET) { if (_rs == NORETURN) { diff --git a/parseAPI/src/Parser.C b/parseAPI/src/Parser.C index 8a80733622..ff7b6e712d 100644 --- a/parseAPI/src/Parser.C +++ b/parseAPI/src/Parser.C @@ -1275,6 +1275,18 @@ namespace { } return NULL; } + inline ParseWorkElem * bundle_call_elem(ParseWorkBundle * b) + { + if(!b) return NULL; + + vector const& elems = b->elems(); + vector::const_iterator it = elems.begin(); + for( ; it != elems.end(); ++it) { + if((*it)->edge()->type() == CALL) + return (*it); + } + return NULL; + } /* * Look up the next block for detection of straight-line @@ -1438,8 +1450,8 @@ Parser::parse_frame_one_iteration(ParseFrame &frame, bool recursive) { continue; } else if (work->order() == ParseWorkElem::call_fallthrough) { // check associated call edge's return status - Edge * ce = bundle_call_edge(work->bundle()); - if (!ce) { + ParseWorkElem * call_elem = bundle_call_elem(work->bundle()); + if (!call_elem) { // odd; no call edge in this bundle parsing_printf("[%s] unexpected missing call edge at %lx\n", FILE__,work->edge()->src()->lastInsnAddr()); @@ -1479,9 +1491,12 @@ Parser::parse_frame_one_iteration(ParseFrame &frame, bool recursive) { factory().destroy_edge(remove, destroyed_noreturn); continue; } - } else if (ce->trg()) { - Address target = ce->trg_addr(); + } else if (call_elem->target() > 0) { + // For indirect calls, since we do not know the callee, + // the call fallthrough edges are assumed to exist + Address target = call_elem->target(); Function * ct = _parse_data->findFunc(frame.codereg,target); + assert(ct); bool is_plt = false; // check if associated call edge's return status is still unknown @@ -1551,7 +1566,7 @@ Parser::parse_frame_one_iteration(ParseFrame &frame, bool recursive) { continue; } else // Invalidate cache_valid for all sharing functions - invalidateContainingFuncs(func, ce->src()); + invalidateContainingFuncs(func, work->edge()->src()); } } } else if (work->order() == ParseWorkElem::seed_addr) { @@ -1707,33 +1722,7 @@ Parser::parse_frame_one_iteration(ParseFrame &frame, bool recursive) { false) ); break; - } else if (curAddr > nextBlockAddr) { - parsing_printf("[%s:%d] inconsistent instruction stream: " - "%lx is within [%lx,%lx)\n", - FILE__,__LINE__,curAddr, - nextBlock->start(),nextBlock->end()); - Address prev_insn; - if (nextBlock->consistent(curAddr, prev_insn)) { - // The two overlapping blocks aligned. - // We need to split the large block, and create new edge to the later block - Block* new_block = split_block(frame.func, nextBlock, curAddr, prev_insn); - leadersToBlock[curAddr] = new_block; - visited[curAddr] = true; - ah->retreat(); - end_block(cur, ah); - if (!set_edge_parsing_status(frame ,cur->last(), cur)) break; - add_edge(frame, frame.func, cur, ah->getAddr(), curAddr, FALLTHROUGH, NULL); - // We break from this loop because no need more stright-line parsing - break; - } - - // NB "cur" hasn't ended, so its range may - // not look like it overlaps with nextBlock - _pcb.overlapping_blocks(cur,nextBlock); - - tie(nextBlockAddr,nextBlock) = - func->get_next_block(frame.curAddr, frame.codereg); - } + } // per-instruction callback notification ParseCallback::insn_details insn_det; @@ -2593,6 +2582,8 @@ bool Parser::set_edge_parsing_status(ParseFrame& frame, Address addr, Block* b) } assert(A->end() == B->end()); Address prev_insn; + bool inconsistent = false; + region_data::edge_data_map::accessor a2; if (A->consistent(B->start(), prev_insn)) { // The edge should stay with the shorter block move_edges_consistent_blocks(A,B); @@ -2600,7 +2591,6 @@ bool Parser::set_edge_parsing_status(ParseFrame& frame, Address addr, Block* b) a1->second.b = B; A->updateEnd(B->start()); A->_lastInsn = prev_insn; - region_data::edge_data_map::accessor a2; bool cont = true; // Iteratively split the block while (!edm->insert(a2, A->last())) { @@ -2628,9 +2618,7 @@ bool Parser::set_edge_parsing_status(ParseFrame& frame, Address addr, Block* b) } } if (!cont) { - // This can happen when the two blocks - // contain overlapping instructions. - // Here, we only handle consistent block split. + inconsistent = true; break; } } @@ -2640,6 +2628,62 @@ bool Parser::set_edge_parsing_status(ParseFrame& frame, Address addr, Block* b) a2->second.f = fA; a2->second.b = A; } + } else { + inconsistent = true; + } + if (inconsistent) { + Block::Insns A_insns, B_insns; + A->getInsns(A_insns); + B->getInsns(B_insns); + for (auto iit = B_insns.begin(); iit != B_insns.end(); ++iit) { + auto ait = A_insns.find(iit->first); + if (ait != A_insns.end()) { + Address addr = iit->first; + --ait; + --iit; + + Block * ret = factory()._mkblock(fA, b->region(),addr); + ret->updateEnd(B->end()); + ret->_lastInsn = B->_lastInsn; + ret->_parsed = true; + + Block * exist = record_block(ret); + bool block_exist = false; + if (exist != ret) { + block_exist = true; + ret = exist; + } + + move_edges_consistent_blocks(A, ret); + move_edges_consistent_blocks(B, ret); + + A->updateEnd(addr); + A->_lastInsn = ait->first; + B->updateEnd(addr); + B->_lastInsn = iit->first; + + if (a2.empty()) { + a1->second.f = fA; + a1->second.b = ret; + } else { + a2->second.f = fA; + a2->second.b = ret; + } + + link_block(A,ret,FALLTHROUGH,false); + link_block(B,ret,FALLTHROUGH,false); + + region_data::edge_data_map::accessor a3; + assert(edm->insert(a3, A->last())); + a3->second.f = fA; + a3->second.b = A; + region_data::edge_data_map::accessor a4; + assert(edm->insert(a4, B->last())); + a4->second.f = fB; + a4->second.b = B; + break; + } + } } return false; } diff --git a/parseAPI/src/ParserDetails.C b/parseAPI/src/ParserDetails.C index fe2d39cffc..fd8af7f635 100644 --- a/parseAPI/src/ParserDetails.C +++ b/parseAPI/src/ParserDetails.C @@ -472,7 +472,7 @@ void Parser::ProcessCFInsn( insn_ret = ah->getReturnStatus(frame.func, frame.num_insns); // Update function return status if possible - if (unlikely(insn_ret != UNSET && frame.func->_rs < RETURN)) { + if (unlikely(insn_ret != UNSET && frame.func->_rs < RETURN) && !HASHDEF(plt_entries, frame.func->addr())) { // insn_ret can only be UNSET, UNKNOWN, or RETURN // UNKNOWN means that there is an unresolved undirect control flow, // such as unresolve jump tables or indirect tail calls.