Skip to content

Commit

Permalink
Fix multi-unquicken bug for same CodeItems
Browse files Browse the repository at this point in the history
Newly introduced CompactDex is deduplicating not only String data, but all types
of items in data section, including CodeItems. Therefore, if the same CodeItem is
linked with more than one methods (e.g. simple type of constructors) the Cdex is
using the same code offset, which should be decompiled once.

So we need to maintain a history with all the visited CodeItems so we check if
already decompiled. Otherwise, the QuickenData streams will get corrupted since
it will be consumed in wrong offsets.

Currently this is achieved via simple hashset implementation that was forked from
the https://github.com/avsej/hashset.c. Hashing the data pointers is good enough
for now since we always operate against the already mapped file (all CodeItem
offsets calculated from loaded file virtual address).

Signed-off-by: Anestis Bechtsoudis <anestis@census-labs.com>
  • Loading branch information
anestisb committed Aug 30, 2018
1 parent 7ad48d6 commit 254fb3b
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 18 deletions.
13 changes: 13 additions & 0 deletions src/dex.c
Original file line number Diff line number Diff line change
Expand Up @@ -1281,5 +1281,18 @@ void dex_DecodeCDexFields(cdexCode *pCdexCode,
}
}

void dex_getCodeItemInfo(const u1 *dexFileBuf, dexMethod *pDexMethod, u2 **pCode, u4 *codeSize) {
// We have different code items in StandardDex and CompactDex
if (dex_checkType(dexFileBuf) == kNormalDex) {
dexCode *pDexCode = (dexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff);
*pCode = pDexCode->insns;
*codeSize = pDexCode->insnsSize;
} else {
cdexCode *pCdexCode = (cdexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff);
*pCode = pCdexCode->insns;
dex_DecodeCDexFields(pCdexCode, codeSize, NULL, NULL, NULL, NULL, true);
}
}

void dex_setDisassemblerStatus(bool status) { enableDisassembler = status; }
bool dex_getDisassemblerStatus(void) { return enableDisassembler; }
3 changes: 3 additions & 0 deletions src/dex.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,4 +388,7 @@ char *dex_descriptorClassToDot(const char *);
// decodeOnlyInsrCnt is specified then only the instruction count is decoded.
void dex_DecodeCDexFields(cdexCode *, u4 *, u2 *, u2 *, u2 *, u2 *, bool);

// Get CodeItem information from a DexMethod
void dex_getCodeItemInfo(const u1 *, dexMethod *, u2 **, u4 *);

#endif
133 changes: 133 additions & 0 deletions src/hashset/hashset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright 2012 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hashset.h"
#include <assert.h>

static const unsigned int prime_1 = 73;
static const unsigned int prime_2 = 5009;

hashset_t hashset_create() {
hashset_t set = calloc(1, sizeof(struct hashset_st));

if (set == NULL) {
return NULL;
}
set->nbits = 3;
set->capacity = (size_t)(1 << set->nbits);
set->mask = set->capacity - 1;
set->items = calloc(set->capacity, sizeof(size_t));
if (set->items == NULL) {
hashset_destroy(set);
return NULL;
}
set->nitems = 0;
set->n_deleted_items = 0;
return set;
}

size_t hashset_num_items(hashset_t set) { return set->nitems; }

void hashset_destroy(hashset_t set) {
if (set) {
free(set->items);
}
free(set);
}

static int hashset_add_member(hashset_t set, void *item) {
size_t value = (size_t)item;
size_t ii;

if (value == 0 || value == 1) {
return -1;
}

ii = set->mask & (prime_1 * value);

while (set->items[ii] != 0 && set->items[ii] != 1) {
if (set->items[ii] == value) {
return 0;
} else {
/* search free slot */
ii = set->mask & (ii + prime_2);
}
}
set->nitems++;
if (set->items[ii] == 1) {
set->n_deleted_items--;
}
set->items[ii] = value;
return 1;
}

static void maybe_rehash(hashset_t set) {
size_t *old_items;
size_t old_capacity, ii;

if (set->nitems + set->n_deleted_items >= (double)set->capacity * 0.85) {
old_items = set->items;
old_capacity = set->capacity;
set->nbits++;
set->capacity = (size_t)(1 << set->nbits);
set->mask = set->capacity - 1;
set->items = calloc(set->capacity, sizeof(size_t));
set->nitems = 0;
set->n_deleted_items = 0;
assert(set->items);
for (ii = 0; ii < old_capacity; ii++) {
hashset_add_member(set, (void *)old_items[ii]);
}
free(old_items);
}
}

int hashset_add(hashset_t set, void *item) {
int rv = hashset_add_member(set, item);
maybe_rehash(set);
return rv;
}

int hashset_remove(hashset_t set, void *item) {
size_t value = (size_t)item;
size_t ii = set->mask & (prime_1 * value);

while (set->items[ii] != 0) {
if (set->items[ii] == value) {
set->items[ii] = 1;
set->nitems--;
set->n_deleted_items++;
return 1;
} else {
ii = set->mask & (ii + prime_2);
}
}
return 0;
}

int hashset_is_member(hashset_t set, void *item) {
size_t value = (size_t)item;
size_t ii = set->mask & (prime_1 * value);

while (set->items[ii] != 0) {
if (set->items[ii] == value) {
return 1;
} else {
ii = set->mask & (ii + prime_2);
}
}
return 0;
}
72 changes: 72 additions & 0 deletions src/hashset/hashset.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright 2012 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef HASHSET_H
#define HASHSET_H 1

#include <stdlib.h>

#ifdef __cplusplus
extern "C" {
#endif

struct hashset_st {
size_t nbits;
size_t mask;

size_t capacity;
size_t *items;
size_t nitems;
size_t n_deleted_items;
};

typedef struct hashset_st *hashset_t;

/* create hashset instance */
hashset_t hashset_create(void);

/* destroy hashset instance */
void hashset_destroy(hashset_t set);

size_t hashset_num_items(hashset_t set);

/* add item into the hashset.
*
* @note 0 and 1 is special values, meaning nil and deleted items. the
* function will return -1 indicating error.
*
* returns zero if the item already in the set and non-zero otherwise
*/
int hashset_add(hashset_t set, void *item);

/* remove item from the hashset
*
* returns non-zero if the item was removed and zero if the item wasn't
* exist
*/
int hashset_remove(hashset_t set, void *item);

/* check if existence of the item
*
* returns non-zero if the item exists and zero otherwise
*/
int hashset_is_member(hashset_t set, void *item);

#ifdef __cplusplus
}
#endif

#endif
54 changes: 46 additions & 8 deletions src/vdex/vdex_backend_019.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <sys/mman.h>

#include "../hashset/hashset.h"
#include "../out_writer.h"
#include "../utils.h"
#include "vdex_backend_019.h"
Expand Down Expand Up @@ -414,6 +415,13 @@ int vdex_backend_019_process(const char *VdexFileName,
initCompactOffset(quickenInfoOffTable.data);
}

// Make sure to not unquicken the same code item multiple times.
hashset_t unquickened_code_items = hashset_create();
if (!unquickened_code_items) {
LOGMSG(l_ERROR, "Failed to create hashset");
return -1;
}

// For each class
log_dis("file #%zu: classDefsSize=%" PRIu32 "\n", dex_file_idx,
dex_getClassDefsSize(dexFileBuf));
Expand Down Expand Up @@ -462,16 +470,27 @@ int vdex_backend_019_process(const char *VdexFileName,

// Skip empty methods
if (curDexMethod.codeOff == 0) {
continue;
goto next_dmethod;
}

if (pRunArgs->unquicken) {
// Check if we've already unquickened the code item
u2 *pCode = NULL;
u4 codeSize = 0;
dex_getCodeItemInfo(dexFileBuf, &curDexMethod, &pCode, &codeSize);
if (hashset_is_member(unquickened_code_items, (void *)pCode)) {
LOGMSG(l_DEBUG, "Already unquickened direct method:%d",
lastIdx + curDexMethod.methodIdx);
goto next_dmethod;
}

// Since new code item, add to set
hashset_add(unquickened_code_items, (void *)pCode);

// Offset being 0 means not quickened.
const u4 qOffset = getOffset(lastIdx + curDexMethod.methodIdx);

// Update lastIdx since followings delta_idx are based on 1st elements idx
lastIdx += curDexMethod.methodIdx;

// Get quickenData for method and decompile
vdex_data_array_t quickenData;
memset(&quickenData, 0, sizeof(vdex_data_array_t));
if (quickenInfo.size != 0 && qOffset != 0u) {
Expand All @@ -482,6 +501,10 @@ int vdex_backend_019_process(const char *VdexFileName,
LOGMSG(l_ERROR, "Failed to decompile Dex file");
return -1;
}

next_dmethod:
// Update lastIdx since followings delta_idx are based on 1st elements idx
lastIdx += curDexMethod.methodIdx;
} else {
vdex_decompiler_019_walk(dexFileBuf, &curDexMethod);
}
Expand All @@ -497,16 +520,27 @@ int vdex_backend_019_process(const char *VdexFileName,

// Skip native or abstract methods
if (curDexMethod.codeOff == 0) {
continue;
goto next_vmethod;
}

if (pRunArgs->unquicken) {
// Check if we've already unquickened the code item
u2 *pCode = NULL;
u4 codeSize = 0;
dex_getCodeItemInfo(dexFileBuf, &curDexMethod, &pCode, &codeSize);
if (hashset_is_member(unquickened_code_items, (void *)pCode)) {
LOGMSG(l_DEBUG, "Already unquickened virtual method:%d",
lastIdx + curDexMethod.methodIdx);
goto next_vmethod;
}

// Since new code item, add to set
hashset_add(unquickened_code_items, (void *)pCode);

// Offset being 0 means not quickened.
const u4 qOffset = getOffset(lastIdx + curDexMethod.methodIdx);

// Update lastIdx since followings delta_idx are based on 1st elements idx
lastIdx += curDexMethod.methodIdx;

// Get quickenData for method and decompile
vdex_data_array_t quickenData;
memset(&quickenData, 0, sizeof(vdex_data_array_t));
if (quickenInfo.size != 0 && qOffset != 0u) {
Expand All @@ -517,6 +551,10 @@ int vdex_backend_019_process(const char *VdexFileName,
LOGMSG(l_ERROR, "Failed to decompile Dex file");
return -1;
}

next_vmethod:
// Update lastIdx since followings delta_idx are based on 1st elements idx
lastIdx += curDexMethod.methodIdx;
} else {
vdex_decompiler_019_walk(dexFileBuf, &curDexMethod);
}
Expand Down
12 changes: 2 additions & 10 deletions src/vdex/vdex_decompiler_019.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,10 @@ bool vdex_decompiler_019_decompile(const u1 *dexFileBuf,
return true;
}

// We have different code items in StandardDex and CompactDex
// Get method's CodeItem information
u2 *pCode = NULL;
u4 codeSize = 0;
if (dex_checkType(dexFileBuf) == kNormalDex) {
dexCode *pDexCode = (dexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff);
pCode = pDexCode->insns;
codeSize = pDexCode->insnsSize;
} else {
cdexCode *pCdexCode = (cdexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff);
pCode = pCdexCode->insns;
dex_DecodeCDexFields(pCdexCode, &codeSize, NULL, NULL, NULL, NULL, true);
}
dex_getCodeItemInfo(dexFileBuf, pDexMethod, &pCode, &codeSize);

u4 startCodeOff = dex_getFirstInstrOff(dexFileBuf, pDexMethod);

Expand Down

0 comments on commit 254fb3b

Please sign in to comment.