Skip to content

Commit

Permalink
1. Add instruction semantics for powerpc ld
Browse files Browse the repository at this point in the history
2. Properly use xlatetom to handle endianness
3. Improve identification of TOC base.
4. Allow reading jump table location from memory location
  • Loading branch information
mxz297 committed Aug 15, 2017
1 parent f4390bb commit 74d71a7
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 31 deletions.
1 change: 1 addition & 0 deletions dataflowAPI/rose/semantics/DispatcherPowerpc.C
Expand Up @@ -1047,6 +1047,7 @@ DispatcherPowerpc::iproc_init()
iproc_set(powerpc_lbzu, new Powerpc::IP_lbzu);
iproc_set(powerpc_lbzux, new Powerpc::IP_lbzu);
iproc_set(powerpc_lbzx, new Powerpc::IP_lbz);
iproc_set(powerpc_ld, new Powerpc::IP_move);
iproc_set(powerpc_lha, new Powerpc::IP_lha);
iproc_set(powerpc_lhax, new Powerpc::IP_lha);
iproc_set(powerpc_lhz, new Powerpc::IP_lhz);
Expand Down
8 changes: 1 addition & 7 deletions instructionAPI/src/InstructionDecoder-power.C
Expand Up @@ -247,13 +247,7 @@ namespace Dyninst
isRAWritten = false;
isFPInsn = false;
bcIsConditional = false;
#if !defined(arch_ppc_little_endian)
insn = b.start[0] << 24 | b.start[1] << 16 |
b.start[2] << 8 | b.start[3];
#else
insn = b.start[0] | b.start[1] << 8 |
b.start[2] << 16 | b.start[3] << 24;
#endif
insn = *((const uint32_t*)b.start);
#if defined(DEBUG_RAW_INSN)
cout.width(0);
cout << "0x";
Expand Down
4 changes: 3 additions & 1 deletion parseAPI/src/IndirectAnalyzer.C
Expand Up @@ -58,7 +58,8 @@ static bool IsVariableArgumentFormat(AST::Ptr t, AbsRegion &index) {
}

bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
// if (block->last() == 0x100813b4) dyn_debug_parsing=1; else dyn_debug_parsing=0;
// if (block->last() == 0x15890bc) dyn_debug_parsing=1; else dyn_debug_parsing=0;

parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
parsing_printf("Looking for thunk\n");

Expand All @@ -83,6 +84,7 @@ bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Ad
Slicer formatSlicer(assignments[0], block, func, false, false);

SymbolicExpression se;
se.cs = block->obj()->cs();
JumpTableFormatPred jtfp(func, block, rf, thunks, se);
GraphPtr slice = formatSlicer.backwardSlice(jtfp);
//parsing_printf("\tJump table format: %s\n", jtfp.format().c_str());
Expand Down
73 changes: 53 additions & 20 deletions parseAPI/src/JumpTableFormatPred.C
Expand Up @@ -27,24 +27,32 @@ JumpTableFormatPred::JumpTableFormatPred(ParseAPI::Function *f,
}

void JumpTableFormatPred::FindTOC() {
parsing_printf("Try to find TOC address in R2\n");
Address entry = 0;
if (func->src() == HINT) {
entry = func->addr();
} else if (func->src() == RT) {
entry = func->addr() - 8;
} else {
parsing_printf("\tUnhandled type of function for getting TOC address\n");
return;
toc_address = block->obj()->cs()->getTOC(func->addr());
if (!toc_address) {
// Little endian powerpc changes its ABI, which does not have .opd section, but often load R2 at function entry
Address entry = 0;
if (func->src() == HINT) {
entry = func->addr();
} else if (func->src() == RT) {
entry = func->addr() - 8;
} else {
parsing_printf("\tUnhandled type of function for getting TOC address\n");
return;
}
const uint32_t * buf = (const uint32_t*) block->obj()->cs()->getPtrToInstruction(entry);
if (buf == NULL) return;
if ((buf[0] >> 16) != 0x3c40 || (buf[1] >> 16) != 0x3842) return;
if (buf[0] & 0x8000) {
toc_address = (buf[0] & 0xffff) | SIGNEX_64_16;
} else {
toc_address = buf[0] & 0xffff;
}
if (buf[1] & 0x8000) {
toc_address = (toc_address << 16) + ((buf[1] & 0xffff) | SIGNEX_64_16);
} else {
toc_address = (toc_address << 16) + (buf[1] & 0xffff);
}
}
parsing_printf("\tLook at address %x\n", entry);
const unsigned char * buf = (const unsigned char*) block->obj()->cs()->getPtrToInstruction(entry);
if (buf == NULL) return;
if (buf[2] != 0x40 || buf[3] != 0x3c || buf[6] != 0x42 || buf[7] != 0x38) return;
toc_address = buf[1];
toc_address = (toc_address << 8) | buf[0];
toc_address = (toc_address << 8) | buf[5];
toc_address = (toc_address << 8) | buf[4];
parsing_printf("\t TOC address %lx in R2\n", toc_address);
}

Expand Down Expand Up @@ -88,16 +96,21 @@ bool JumpTableFormatPred::modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::P
frame.active.erase(rit);
} else {
// For a later memory read, if we have not disqualified this indirect jump,
// it is likely to be a jump table. This memory read is assumed
// it is likely to be a jump table. There are two cases to be handled:
// 1) On ppc, this could be a read from TOC (read from a constant address)
// 2) This memory read is assumed
// and likely to be a spill for a certain register. We syntactically find the location
// where the memory is written and keep slicing on the source register
SliceNode::Ptr readNode;
parsing_printf("\t\tfind another memory read %s %s\n", rit->first.format().c_str(), rit->second[0].ptr->format().c_str());
if (!findSpillRead(g, readNode)) {
if (!findRead(g, readNode)) {
parsing_printf("\tWARNING: a potential memory spill cannot be handled.\n");
jumpTableFormat = false;
return false;
}
if (isTOCRead(frame, readNode)) {
break;
}
// We then do the following things
// 1. delete all absregions introduced by this read node from the active map
// 2. search for the closest instruction that writes the same memory location,
Expand Down Expand Up @@ -295,7 +308,7 @@ string JumpTableFormatPred::format() {
return string("");
}

bool JumpTableFormatPred::findSpillRead(Graph::Ptr g, SliceNode::Ptr &readNode) {
bool JumpTableFormatPred::findRead(Graph::Ptr g, SliceNode::Ptr &readNode) {
NodeIterator gbegin, gend;
g->allNodes(gbegin, gend);
for (; gbegin != gend; ++gbegin) {
Expand Down Expand Up @@ -433,4 +446,24 @@ bool JumpTableFormatPred::adjustSliceFrame(Slicer::SliceFrame &frame, SliceNode:
return true;
}

bool JumpTableFormatPred::isTOCRead(Slicer::SliceFrame &frame, SliceNode::Ptr n) {
// Delete all active regions introduce by this memory read,
// such as memory region, er
std::vector<AbsRegion>& inputs = n->assign()->inputs();
bool findR2 = false;
for (auto iit = inputs.begin(); iit != inputs.end(); ++iit) {
if (*iit == AbsRegion(Absloc(ppc32::r2)) || *iit == AbsRegion(Absloc(ppc64::r2))) {
findR2 = true;
break;
}
}
if (!findR2) return false;
parsing_printf("\tTOC Read\n");
for (auto iit = inputs.begin(); iit != inputs.end(); ++iit) {
parsing_printf("\tdelete %s from active map\n", iit->format().c_str());
frame.active.erase(*iit);
}
return true;
}


3 changes: 2 additions & 1 deletion parseAPI/src/JumpTableFormatPred.h
Expand Up @@ -38,8 +38,9 @@ class JumpTableFormatPred : public Slicer::Predicates {
virtual bool modifyCurrentFrame(Slicer::SliceFrame &frame, Graph::Ptr g, Slicer*);
std::string format();
bool isJumpTableFormat() { return jumpTableFormat && findIndex && findTableBase;}
bool findSpillRead(Graph::Ptr g, SliceNode::Ptr &);
bool findRead(Graph::Ptr g, SliceNode::Ptr &);
bool adjustSliceFrame(Slicer::SliceFrame &frame, SliceNode::Ptr, Slicer*);
bool isTOCRead(Slicer::SliceFrame &frame, SliceNode::Ptr);
void FindTOC();
JumpTableFormatPred(ParseAPI::Function *f,
ParseAPI::Block *b,
Expand Down
47 changes: 47 additions & 0 deletions parseAPI/src/SymbolicExpression.C
Expand Up @@ -10,6 +10,43 @@ using namespace std;
using namespace Dyninst;
using namespace Dyninst::ParseAPI;
using namespace Dyninst::DataflowAPI;

CodeSource* SymbolicExpression::cs = NULL;

bool SymbolicExpression::ReadMemory(Address addr, uint64_t &v, int ) {
int addressWidth = cs->getAddressWidth();
if (addressWidth == 4) {
addr &= 0xffffffff;
}

#if defined(os_windows)
addr -= cs->loadAddress();
#endif
if (!cs->isCode(addr) && !cs->isData(addr)) return false;
v = *(const uint64_t *) cs->getPtrToInstruction(addr);
/*
switch (memoryReadSize) {
case 0:
case 8:
v = *(const uint64_t *) cs->getPtrToInstruction(addr);
break;
case 4:
v = *(const uint32_t *) cs->getPtrToInstruction(addr);
break;
case 2:
v = *(const uint16_t *) cs->getPtrToInstruction(addr);
break;
case 1:
v = *(const uint8_t *) cs->getPtrToInstruction(addr);
break;
default:
parsing_printf("Invalid memory read size %d\n", memoryReadSize);
return false;
}
*/
return true;
}

AST::Ptr SymbolicExpression::SimplifyRoot(AST::Ptr ast, Address addr) {
if (ast->getID() == AST::V_RoseAST) {
RoseAST::Ptr roseAST = boost::static_pointer_cast<RoseAST>(ast);
Expand Down Expand Up @@ -141,6 +178,14 @@ AST::Ptr SymbolicExpression::SimplifyRoot(AST::Ptr ast, Address addr) {
// Any 8-bit value is bounded in [0,255].
// Need to keep the length of the dereference if it is 8-bit.
// However, dereference longer than 8-bit should be regarded the same.
if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
uint64_t val = 0;
ConstantAST::Ptr c = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
Address addr = c->val().val;
if (ReadMemory(addr, val, roseAST->val().size / 8)) {
return ConstantAST::create(Constant(val, 64));
}
}
if (roseAST->val().size == 8)
return ast;
else
Expand All @@ -153,10 +198,12 @@ AST::Ptr SymbolicExpression::SimplifyRoot(AST::Ptr ast, Address addr) {
ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
return ConstantAST::create(Constant(child0->val().val << child1->val().val, 64));
}
/*
if (roseAST->child(1)->getID() == AST::V_ConstantAST) {
ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
if (child1->val().val == 0) return roseAST->child(0);
}
*/
break;
case ROSEOperation::andOp:
if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
Expand Down
4 changes: 3 additions & 1 deletion parseAPI/src/SymbolicExpression.h
Expand Up @@ -3,6 +3,7 @@

#include "DynAST.h"
#include "Absloc.h"
#include "CodeSource.h"
#include <map>
using Dyninst::AST;
using namespace Dyninst;
Expand All @@ -19,7 +20,8 @@ class SymbolicExpression {
static AST::Ptr SubstituteAnAST(AST::Ptr ast, const std::map<AST::Ptr, AST::Ptr>& aliasMap);
static AST::Ptr DeepCopyAnAST(AST::Ptr ast);
static bool ContainAnAST(AST::Ptr root, AST::Ptr check);

static bool ReadMemory(Address addr, uint64_t &val, int size);
static ParseAPI::CodeSource* cs;
std::pair<AST::Ptr, bool> ExpandAssignment(Assignment::Ptr);

//On x86 and x86-64, the value of PC is post-instruction,
Expand Down
8 changes: 7 additions & 1 deletion symtabAPI/src/Object-elf.C
Expand Up @@ -622,11 +622,17 @@ bool Object::loaded_elf(Offset& txtaddr, Offset& dataddr,
if (!scn.isFromDebugFile()) {
allRegionHdrs.push_back(&scn);
Elf_X_Data data = scn.get_data();
if(strcmp(name, OPD_NAME) == 0)
if(strcmp(name, OPD_NAME) == 0 || strcmp(name, GOT_NAME) == 0)
{
data.d_type(ELF_T_XWORD);
data.xlatetom(elfHdr->e_endian() ? ELFDATA2MSB : ELFDATA2LSB);
}
if(strcmp(name, TEXT_NAME) == 0 || strcmp(name, ".rodata") == 0)
{
data.d_type(ELF_T_WORD);
data.xlatetom(elfHdr->e_endian() ? ELFDATA2MSB : ELFDATA2LSB);
}

if(scn.sh_flags() & SHF_ALLOC) {
// .bss, etc. have a disk size of 0
unsigned long diskSize = (scn.sh_type() == SHT_NOBITS) ? 0 : scn.sh_size();
Expand Down

0 comments on commit 74d71a7

Please sign in to comment.