diff --git a/dataflowAPI/h/slicing.h b/dataflowAPI/h/slicing.h index 2a6c84f01e..02172d4044 100644 --- a/dataflowAPI/h/slicing.h +++ b/dataflowAPI/h/slicing.h @@ -298,7 +298,7 @@ class Slicer { // This function allows users to inspect the current slice graph and determine which abslocs // need further slicing and which abslocs are no longer interesting, by modifying the current // SliceFrame. - DATAFLOW_EXPORT virtual bool modifyCurrentFrame(SliceFrame &, GraphPtr) {return true;} + DATAFLOW_EXPORT virtual bool modifyCurrentFrame(SliceFrame &, GraphPtr, Slicer*) {return true;} DATAFLOW_EXPORT Predicates() : clearCache(false), controlFlowDep(false) {} }; @@ -666,8 +666,11 @@ class Slicer { void getInsns(Location &loc); +public: void getInsnsBackward(Location &loc); +private: + void setAliases(Assignment::Ptr, Element &); SliceNode::Ptr createNode(Element const&); diff --git a/dataflowAPI/src/slicing.C b/dataflowAPI/src/slicing.C index 73087caa4b..86e4136278 100644 --- a/dataflowAPI/src/slicing.C +++ b/dataflowAPI/src/slicing.C @@ -185,7 +185,7 @@ Slicer::sliceInternal( // add to graph insertInitialNode(ret, dir, aP); - if (p.addNodeCallback(a_,visitedEdges) && p.modifyCurrentFrame(initFrame, ret)) { + if (p.addNodeCallback(a_,visitedEdges) && p.modifyCurrentFrame(initFrame, ret, this)) { // initialize slice stack and set for loop detection. // the set may be redundant, but speeds up the loopless case. addrStack.push_back(initFrame.addr()); @@ -446,7 +446,7 @@ bool Slicer::updateAndLink( cand.active[matches[i].reg].push_back(matches[i]); } } - return p.modifyCurrentFrame(cand, g); + return p.modifyCurrentFrame(cand, g, this); } // similar to updateAndLink, but this version only looks at the diff --git a/parseAPI/h/CodeObject.h b/parseAPI/h/CodeObject.h index c8af4d7908..a294765947 100644 --- a/parseAPI/h/CodeObject.h +++ b/parseAPI/h/CodeObject.h @@ -108,6 +108,11 @@ class CodeObject { PARSER_EXPORT int findFuncs(CodeRegion * cr, Address start, Address end, std::set & funcs); + PARSER_EXPORT int findCurrentFuncs(CodeRegion * cr, + Address addr, + std::set & funcs); + + PARSER_EXPORT const funclist & funcs() { return flist; } // blocks diff --git a/parseAPI/src/BoundFactData.C b/parseAPI/src/BoundFactData.C index d18412899f..4883b88a0f 100644 --- a/parseAPI/src/BoundFactData.C +++ b/parseAPI/src/BoundFactData.C @@ -153,7 +153,7 @@ void StridedInterval::Sub(const StridedInterval& minuend) { void StridedInterval::And(const StridedInterval &rhs) { // Currently only consider the case where at least one of them is constant if (stride == 0) { - // CONSTANT and any thing ==> 1[1, CONSTANT] + // CONSTANT and any thing ==> 1[0, CONSTANT] low = 0; stride = 1; } else if (rhs.stride == 0) { diff --git a/parseAPI/src/CodeObject.C b/parseAPI/src/CodeObject.C index 2ab2e56205..da33a1452d 100644 --- a/parseAPI/src/CodeObject.C +++ b/parseAPI/src/CodeObject.C @@ -147,6 +147,11 @@ int CodeObject::findCurrentBlocks(CodeRegion * cr, Address addr, set & b return parser->findCurrentBlocks(cr,addr,blocks); } +int CodeObject::findCurrentFuncs(CodeRegion * cr, Address addr, set & funcs) +{ + return parser->findCurrentFuncs(cr,addr,funcs); +} + void CodeObject::parse() { if(!parser) { diff --git a/parseAPI/src/IndirectASTVisitor.C b/parseAPI/src/IndirectASTVisitor.C index 2927db92da..c71339e103 100644 --- a/parseAPI/src/IndirectASTVisitor.C +++ b/parseAPI/src/IndirectASTVisitor.C @@ -57,6 +57,7 @@ AST::Ptr BoundCalcVisitor::visit(DataflowAPI::RoseAST *ast) { // a cmp bound not found yet. So we only apply and // bound when this is the last attempt if (handleOneByteRead) { + parsing_printf("\tTry to generate bound for AND\n"); StridedInterval *val = NULL; if (IsResultBounded(ast->child(0))) val = new StridedInterval(*GetResultBound(ast->child(0))); @@ -136,6 +137,7 @@ AST::Ptr BoundCalcVisitor::visit(DataflowAPI::ConstantAST *ast) { // and change it to a negative number value = -(((~value) & ((1ULL << v.size) - 1)) + 1); } + parsing_printf("\t\tGet a constant %ld\n", value); bound.insert(make_pair(ast, new StridedInterval(value))); return AST::Ptr(); } diff --git a/parseAPI/src/IndirectAnalyzer.C b/parseAPI/src/IndirectAnalyzer.C index 2d178f3977..7218260c2b 100644 --- a/parseAPI/src/IndirectAnalyzer.C +++ b/parseAPI/src/IndirectAnalyzer.C @@ -23,7 +23,7 @@ bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vectorlast()); parsing_printf("Looking for thunk\n"); - if (block->last() == 0x4f709c) dyn_debug_parsing=1; else dyn_debug_parsing=0; +// if (block->last() == 0x4c6dcc) dyn_debug_parsing=1; else dyn_debug_parsing=0; // Find all blocks that reach the block containing the indirect jump // This is a prerequisit for finding thunks @@ -51,7 +51,7 @@ bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vectorname().c_str(), block->last(), jtfp.format().c_str(), jtfp.indexLoc ? jtfp.indexLoc->format().c_str() : ""); + parsing_printf("In function %s, Address %lx, jump target format %s, index loc %s, index variable %s", func->name().c_str(), block->last(), jtfp.format().c_str(), jtfp.indexLoc ? jtfp.indexLoc->format().c_str() : "" , jtfp.index.format().c_str() ); if (!jtfp.isJumpTableFormat()) { parsing_printf(" not jump table\n"); @@ -63,7 +63,9 @@ bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vectorobj()->cs()->getArch() != Arch_aarch64) { +// if (!jtip.findBound && block->obj()->cs()->getArch() != Arch_aarch64) { + if (!jtip.findBound ) { + // After the slicing is done, we do one last check to // see if we can resolve the indirect jump by assuming // one byte read is in bound [0,255] @@ -208,6 +210,14 @@ void IndirectControlFlowAnalyzer::ReadTable(AST::Ptr jumpTargetExpr, } } } + set funcs; + block->obj()->findCurrentFuncs(block->region(), jtrv.targetAddress, funcs); + for (auto fit = funcs.begin(); fit != funcs.end(); ++fit) { + if (*fit != func) { + overlap = true; + parsing_printf("WARNING: resolving jump tables leads to address %lx in another function at %lx\n", jtrv.targetAddress, (*fit)->addr()); + } + } if (overlap) break; jumpTargets.insert(jtrv.targetAddress); } else { diff --git a/parseAPI/src/JumpTableFormatPred.C b/parseAPI/src/JumpTableFormatPred.C index e4bcc89ac6..1d65c2b264 100644 --- a/parseAPI/src/JumpTableFormatPred.C +++ b/parseAPI/src/JumpTableFormatPred.C @@ -16,7 +16,7 @@ static int CountInDegree(SliceNode::Ptr n) { return count; } -bool JumpTableFormatPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g) { +bool JumpTableFormatPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g, Slicer* s) { if (!jumpTableFormat) return false; if (unknownInstruction) return false; @@ -47,10 +47,10 @@ bool JumpTableFormatPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::P firstMemoryRead = false; frame.active.erase(rit); } else { - // For a later memory read, if we have not disqualified this indirect, + // For a later memory read, if we have not disqualified this indirect jump, // it is likely to be a jump table. This memory read is assumed - // and likely to be a spill for a certain register. - // We keep slicing on the register. + // and likely to be a spill for a certain register. We syntactically find the location + // where the memory is written and keep slicing on the source register SliceNode::Ptr readNode; parsing_printf("\t\tfind another memory read %s %s\n", rit->first.format().c_str(), rit->second[0].ptr->format().c_str()); if (!findSpillRead(g, readNode)) { @@ -58,9 +58,16 @@ bool JumpTableFormatPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::P jumpTableFormat = false; return false; } - // We then delete all absregions introduced by this read node from the active map - // and add back the original absregion - adjustActiveMap(frame, readNode); + // We then do the following things + // 1. delete all absregions introduced by this read node from the active map + // 2. search for the closest instruction that writes the same memory location, + // through memoery operand ast matching + // 3. change the slicing location and add back the source + if (!adjustSliceFrame(frame, readNode, s)) { + parsing_printf("Cannot track through the memory read\n"); + jumpTableFormat = false; + return false; + } g->deleteNode(readNode); return true; @@ -124,6 +131,12 @@ bool JumpTableFormatPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::P // We start plug in ASTs from predecessors n->ins(nbegin, nend); map inputs; + if (aliases.find(n->assign()) != aliases.end()) { + inputs.insert(aliases[n->assign()]); + parsing_printf("\t Replacing %s with %s\n", aliases[n->assign()].first->format().c_str(),aliases[n->assign()].second->format().c_str()); + exp = SymbolicExpression::SubstituteAnAST(exp, inputs); + inputs.clear(); + } for (; nbegin != nend; ++nbegin) { SliceNode::Ptr p = boost::static_pointer_cast(*nbegin); if (exprs.find(p->assign()) == exprs.end()) { @@ -253,20 +266,123 @@ bool JumpTableFormatPred::findSpillRead(Graph::Ptr g, SliceNode::Ptr &readNode) return false; } -void JumpTableFormatPred::adjustActiveMap(Slicer::SliceFrame &frame, SliceNode::Ptr n) { +static Assignment::Ptr SearchForWrite(SliceNode::Ptr n, AbsRegion &src, Slicer::Location &loc, Slicer *s) { + + + queue workingList; + set inQueue; + workingList.push(n->block()); + inQueue.insert(n->block()); + + set memReads; + n->assign()->insn()->getMemoryReadOperands(memReads); + if (memReads.size() != 1) { + parsing_printf("\tThe instruction has %d memory read operands, Should have only one\n", memReads.size()); + return Assignment::Ptr(); + } + Expression::Ptr memRead = *memReads.begin(); + parsing_printf("\tsearch for memory operand %s\n", memRead->format().c_str()); + Block* targetBlock = NULL; + Instruction::Ptr targetInsn; + Address targetAddr; + + while (!workingList.empty() && targetBlock == NULL) { + Block* curBlock = workingList.front(); + workingList.pop(); + // If the current block is the starting block, + // we need to make sure we only inspect instructions before the starting instruction + Address addr = 0; + if (curBlock == n->block()) { + addr = n->addr(); + } + + Block::Insns insns; + curBlock->getInsns(insns); + + for (auto iit = insns.rbegin(); iit != insns.rend(); ++iit) { + if (addr > 0 && iit->first > addr) continue; + Instruction::Ptr i = iit->second; + // We find an the first instruction that only writes to memory + // and the memory operand has the exact AST as the memory read + if (!i->readsMemory() && i->writesMemory()) { + set memWrites; + i->getMemoryWriteOperands(memWrites); + if (memWrites.size() == 1 && *memRead == *(*memWrites.begin())) { + targetBlock = curBlock; + targetInsn = i; + targetAddr = iit->first; + parsing_printf("\t\tFind matching at %lx\n", targetAddr); + + // Now we try to identify the source register + std::vector ops; + i->getOperands(ops); + for (auto oit = ops.begin(); oit != ops.end(); ++oit) { + if (!(*oit).writesMemory() && !(*oit).readsMemory()) { + std::set regsRead; + oit->getReadSet(regsRead); + src = AbsRegion(Absloc( (*regsRead.begin())->getID() )); + parsing_printf("\t\tContinue to slice on %s\n", src.format().c_str()); + break; + } + } + + loc.block = curBlock; + s->getInsnsBackward(loc); + while (loc.addr() > targetAddr) { + loc.rcurrent++; + } + break; + } + } + } + + for (auto eit = curBlock->sources().begin(); eit != curBlock->sources().end(); ++eit) { + ParseAPI::Edge *e = *eit; + if (e->interproc()) continue; + if (e->type() == CATCH) continue; + if (inQueue.find(e->src()) != inQueue.end()) continue; + inQueue.insert(e->src()); + workingList.push(e->src()); + } + } + + if (targetBlock == NULL) { + parsing_printf("\t\t Cannot find match\n"); + return Assignment::Ptr(); + } + + AssignmentConverter ac(true, false); + vector assignments; + ac.convert(targetInsn, targetAddr, n->func(), targetBlock, assignments); + return assignments[0]; +} + +bool JumpTableFormatPred::adjustSliceFrame(Slicer::SliceFrame &frame, SliceNode::Ptr n, Slicer* s) { + // Delete all active regions introduce by this memory read, + // such as memory region, stack pointer, frame pointer std::vector& inputs = n->assign()->inputs(); for (auto iit = inputs.begin(); iit != inputs.end(); ++iit) { parsing_printf("\tdelete %s from active map\n", iit->format().c_str()); frame.active.erase(*iit); } + // Search backward for the instruction that writes to the memory location + AbsRegion src; + Assignment::Ptr assign = SearchForWrite(n, src, frame.loc, s); + if (!assign) return false; + NodeIterator nbegin, nend; n->outs(nbegin, nend); - parsing_printf("\tadd %s to active map\n", n->assign()->out().format().c_str()); + parsing_printf("\tadd %s to active map\n", src.format().c_str()); for (; nbegin != nend; ++nbegin) { SliceNode::Ptr next = boost::static_pointer_cast(*nbegin); - frame.active[n->assign()->out()].push_back(Slicer::Element(next->block(), next->func(), n->assign()->out(), next->assign())); + frame.active[src].push_back(Slicer::Element(next->block(), next->func(), src, next->assign())); + if (n->assign()->out() != src) { + aliases[next->assign()] = make_pair(VariableAST::create(Variable(n->assign()->out())), VariableAST::create(Variable(src))); + } + } + return true; } diff --git a/parseAPI/src/JumpTableFormatPred.h b/parseAPI/src/JumpTableFormatPred.h index 0fab4753ab..282362039a 100644 --- a/parseAPI/src/JumpTableFormatPred.h +++ b/parseAPI/src/JumpTableFormatPred.h @@ -29,6 +29,7 @@ class JumpTableFormatPred : public Slicer::Predicates { AST::Ptr jumpTargetExpr; set
constAddr; + dyn_hash_map, Assignment::AssignmentPtrHasher> aliases; JumpTableFormatPred(ParseAPI::Function *f, ParseAPI::Block *b, @@ -42,11 +43,11 @@ class JumpTableFormatPred : public Slicer::Predicates { firstMemoryRead = true; } - virtual bool modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g); + virtual bool modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g, Slicer*); std::string format(); bool isJumpTableFormat() { return jumpTableFormat && findIndex && jumpTargetExpr;} bool findSpillRead(Graph::Ptr g, SliceNode::Ptr &); - void adjustActiveMap(Slicer::SliceFrame &frame, SliceNode::Ptr); + bool adjustSliceFrame(Slicer::SliceFrame &frame, SliceNode::Ptr, Slicer*); }; #endif diff --git a/parseAPI/src/JumpTableIndexPred.C b/parseAPI/src/JumpTableIndexPred.C index a634eab8d3..19126b9944 100644 --- a/parseAPI/src/JumpTableIndexPred.C +++ b/parseAPI/src/JumpTableIndexPred.C @@ -292,7 +292,9 @@ bool JumpTableIndexPred::MatchReadAST(Assignment::Ptr a) { return false; } -bool JumpTableIndexPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g) { +bool JumpTableIndexPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g, Slicer *) { + parsing_printf("\tIn JumpTableIndexPred::modifyCurrentFrame, size %d\n", g->size()); + if (g->size() == 1) { /* This is the start of the jump table index slice. * As the slicing interface only works with an assignment, @@ -301,6 +303,7 @@ bool JumpTableIndexPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Pt Slicer::SliceFrame::ActiveMap::iterator it1, it2; it1 = frame.active.begin(); while (it1 != frame.active.end()) { + parsing_printf("\t\tactive region %s\n", it1->first.format().c_str()); if (it1->first != index) { it2 = it1; ++it2; diff --git a/parseAPI/src/JumpTableIndexPred.h b/parseAPI/src/JumpTableIndexPred.h index 18306556df..fd241c09d7 100644 --- a/parseAPI/src/JumpTableIndexPred.h +++ b/parseAPI/src/JumpTableIndexPred.h @@ -27,7 +27,7 @@ class JumpTableIndexPred : public Slicer::Predicates { StridedInterval bound; std::set currentAssigns; virtual bool addNodeCallback(AssignmentPtr ap, std::set &visitedEdges); - virtual bool modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g); + virtual bool modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g, Slicer*); GraphPtr BuildAnalysisGraph(std::set &visitedEdges); bool IsIndexBounded(GraphPtr slice, BoundFactsCalculator &bfc, StridedInterval &target); bool FillInOutEdges(StridedInterval &target, std::vector >& outEdges); diff --git a/parseAPI/src/Parser.C b/parseAPI/src/Parser.C index eb753c0715..3c2a5d2d05 100644 --- a/parseAPI/src/Parser.C +++ b/parseAPI/src/Parser.C @@ -1813,6 +1813,10 @@ int Parser::findCurrentBlocks(CodeRegion* cr, Address addr, return _parse_data->findBlocks(cr, addr, blocks); } +int Parser::findCurrentFuncs(CodeRegion * cr, Address addr, std::set &funcs) { + return _parse_data->findFuncs(cr, addr, funcs); +} + Edge* Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink) { diff --git a/parseAPI/src/Parser.h b/parseAPI/src/Parser.h index 999b305d42..9dc65e427c 100644 --- a/parseAPI/src/Parser.h +++ b/parseAPI/src/Parser.h @@ -126,6 +126,8 @@ class Parser { int findBlocks(CodeRegion * cr, Address addr, set & blocks); // returns current blocks without parsing. int findCurrentBlocks(CodeRegion* cr, Address addr, std::set& blocks); + int findCurrentFuncs(CodeRegion * cr, Address addr, set & funcs); + Block * findNextBlock(CodeRegion * cr, Address addr); void parse();