diff --git a/src/dex.c b/src/dex.c index abdfd09..3f828bd 100644 --- a/src/dex.c +++ b/src/dex.c @@ -1281,5 +1281,18 @@ void dex_DecodeCDexFields(cdexCode *pCdexCode, } } +void dex_getCodeItemInfo(const u1 *dexFileBuf, dexMethod *pDexMethod, u2 **pCode, u4 *codeSize) { + // We have different code items in StandardDex and CompactDex + if (dex_checkType(dexFileBuf) == kNormalDex) { + dexCode *pDexCode = (dexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); + *pCode = pDexCode->insns; + *codeSize = pDexCode->insnsSize; + } else { + cdexCode *pCdexCode = (cdexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); + *pCode = pCdexCode->insns; + dex_DecodeCDexFields(pCdexCode, codeSize, NULL, NULL, NULL, NULL, true); + } +} + void dex_setDisassemblerStatus(bool status) { enableDisassembler = status; } bool dex_getDisassemblerStatus(void) { return enableDisassembler; } diff --git a/src/dex.h b/src/dex.h index 2edf57d..86ad899 100644 --- a/src/dex.h +++ b/src/dex.h @@ -388,4 +388,7 @@ char *dex_descriptorClassToDot(const char *); // decodeOnlyInsrCnt is specified then only the instruction count is decoded. void dex_DecodeCDexFields(cdexCode *, u4 *, u2 *, u2 *, u2 *, u2 *, bool); +// Get CodeItem information from a DexMethod +void dex_getCodeItemInfo(const u1 *, dexMethod *, u2 **, u4 *); + #endif diff --git a/src/hashset/hashset.c b/src/hashset/hashset.c new file mode 100644 index 0000000..eeb3b33 --- /dev/null +++ b/src/hashset/hashset.c @@ -0,0 +1,133 @@ +/* + * Copyright 2012 Couchbase, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hashset.h" +#include + +static const unsigned int prime_1 = 73; +static const unsigned int prime_2 = 5009; + +hashset_t hashset_create() { + hashset_t set = calloc(1, sizeof(struct hashset_st)); + + if (set == NULL) { + return NULL; + } + set->nbits = 3; + set->capacity = (size_t)(1 << set->nbits); + set->mask = set->capacity - 1; + set->items = calloc(set->capacity, sizeof(size_t)); + if (set->items == NULL) { + hashset_destroy(set); + return NULL; + } + set->nitems = 0; + set->n_deleted_items = 0; + return set; +} + +size_t hashset_num_items(hashset_t set) { return set->nitems; } + +void hashset_destroy(hashset_t set) { + if (set) { + free(set->items); + } + free(set); +} + +static int hashset_add_member(hashset_t set, void *item) { + size_t value = (size_t)item; + size_t ii; + + if (value == 0 || value == 1) { + return -1; + } + + ii = set->mask & (prime_1 * value); + + while (set->items[ii] != 0 && set->items[ii] != 1) { + if (set->items[ii] == value) { + return 0; + } else { + /* search free slot */ + ii = set->mask & (ii + prime_2); + } + } + set->nitems++; + if (set->items[ii] == 1) { + set->n_deleted_items--; + } + set->items[ii] = value; + return 1; +} + +static void maybe_rehash(hashset_t set) { + size_t *old_items; + size_t old_capacity, ii; + + if (set->nitems + set->n_deleted_items >= (double)set->capacity * 0.85) { + old_items = set->items; + old_capacity = set->capacity; + set->nbits++; + set->capacity = (size_t)(1 << set->nbits); + set->mask = set->capacity - 1; + set->items = calloc(set->capacity, sizeof(size_t)); + set->nitems = 0; + set->n_deleted_items = 0; + assert(set->items); + for (ii = 0; ii < old_capacity; ii++) { + hashset_add_member(set, (void *)old_items[ii]); + } + free(old_items); + } +} + +int hashset_add(hashset_t set, void *item) { + int rv = hashset_add_member(set, item); + maybe_rehash(set); + return rv; +} + +int hashset_remove(hashset_t set, void *item) { + size_t value = (size_t)item; + size_t ii = set->mask & (prime_1 * value); + + while (set->items[ii] != 0) { + if (set->items[ii] == value) { + set->items[ii] = 1; + set->nitems--; + set->n_deleted_items++; + return 1; + } else { + ii = set->mask & (ii + prime_2); + } + } + return 0; +} + +int hashset_is_member(hashset_t set, void *item) { + size_t value = (size_t)item; + size_t ii = set->mask & (prime_1 * value); + + while (set->items[ii] != 0) { + if (set->items[ii] == value) { + return 1; + } else { + ii = set->mask & (ii + prime_2); + } + } + return 0; +} diff --git a/src/hashset/hashset.h b/src/hashset/hashset.h new file mode 100644 index 0000000..e625242 --- /dev/null +++ b/src/hashset/hashset.h @@ -0,0 +1,72 @@ +/* + * Copyright 2012 Couchbase, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HASHSET_H +#define HASHSET_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct hashset_st { + size_t nbits; + size_t mask; + + size_t capacity; + size_t *items; + size_t nitems; + size_t n_deleted_items; +}; + +typedef struct hashset_st *hashset_t; + +/* create hashset instance */ +hashset_t hashset_create(void); + +/* destroy hashset instance */ +void hashset_destroy(hashset_t set); + +size_t hashset_num_items(hashset_t set); + +/* add item into the hashset. + * + * @note 0 and 1 is special values, meaning nil and deleted items. the + * function will return -1 indicating error. + * + * returns zero if the item already in the set and non-zero otherwise + */ +int hashset_add(hashset_t set, void *item); + +/* remove item from the hashset + * + * returns non-zero if the item was removed and zero if the item wasn't + * exist + */ +int hashset_remove(hashset_t set, void *item); + +/* check if existence of the item + * + * returns non-zero if the item exists and zero otherwise + */ +int hashset_is_member(hashset_t set, void *item); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/vdex/vdex_backend_019.c b/src/vdex/vdex_backend_019.c index 13a4671..7445b36 100644 --- a/src/vdex/vdex_backend_019.c +++ b/src/vdex/vdex_backend_019.c @@ -22,6 +22,7 @@ #include +#include "../hashset/hashset.h" #include "../out_writer.h" #include "../utils.h" #include "vdex_backend_019.h" @@ -414,6 +415,13 @@ int vdex_backend_019_process(const char *VdexFileName, initCompactOffset(quickenInfoOffTable.data); } + // Make sure to not unquicken the same code item multiple times. + hashset_t unquickened_code_items = hashset_create(); + if (!unquickened_code_items) { + LOGMSG(l_ERROR, "Failed to create hashset"); + return -1; + } + // For each class log_dis("file #%zu: classDefsSize=%" PRIu32 "\n", dex_file_idx, dex_getClassDefsSize(dexFileBuf)); @@ -462,16 +470,27 @@ int vdex_backend_019_process(const char *VdexFileName, // Skip empty methods if (curDexMethod.codeOff == 0) { - continue; + goto next_dmethod; } if (pRunArgs->unquicken) { + // Check if we've already unquickened the code item + u2 *pCode = NULL; + u4 codeSize = 0; + dex_getCodeItemInfo(dexFileBuf, &curDexMethod, &pCode, &codeSize); + if (hashset_is_member(unquickened_code_items, (void *)pCode)) { + LOGMSG(l_DEBUG, "Already unquickened direct method:%d", + lastIdx + curDexMethod.methodIdx); + goto next_dmethod; + } + + // Since new code item, add to set + hashset_add(unquickened_code_items, (void *)pCode); + // Offset being 0 means not quickened. const u4 qOffset = getOffset(lastIdx + curDexMethod.methodIdx); - // Update lastIdx since followings delta_idx are based on 1st elements idx - lastIdx += curDexMethod.methodIdx; - + // Get quickenData for method and decompile vdex_data_array_t quickenData; memset(&quickenData, 0, sizeof(vdex_data_array_t)); if (quickenInfo.size != 0 && qOffset != 0u) { @@ -482,6 +501,10 @@ int vdex_backend_019_process(const char *VdexFileName, LOGMSG(l_ERROR, "Failed to decompile Dex file"); return -1; } + + next_dmethod: + // Update lastIdx since followings delta_idx are based on 1st elements idx + lastIdx += curDexMethod.methodIdx; } else { vdex_decompiler_019_walk(dexFileBuf, &curDexMethod); } @@ -497,16 +520,27 @@ int vdex_backend_019_process(const char *VdexFileName, // Skip native or abstract methods if (curDexMethod.codeOff == 0) { - continue; + goto next_vmethod; } if (pRunArgs->unquicken) { + // Check if we've already unquickened the code item + u2 *pCode = NULL; + u4 codeSize = 0; + dex_getCodeItemInfo(dexFileBuf, &curDexMethod, &pCode, &codeSize); + if (hashset_is_member(unquickened_code_items, (void *)pCode)) { + LOGMSG(l_DEBUG, "Already unquickened virtual method:%d", + lastIdx + curDexMethod.methodIdx); + goto next_vmethod; + } + + // Since new code item, add to set + hashset_add(unquickened_code_items, (void *)pCode); + // Offset being 0 means not quickened. const u4 qOffset = getOffset(lastIdx + curDexMethod.methodIdx); - // Update lastIdx since followings delta_idx are based on 1st elements idx - lastIdx += curDexMethod.methodIdx; - + // Get quickenData for method and decompile vdex_data_array_t quickenData; memset(&quickenData, 0, sizeof(vdex_data_array_t)); if (quickenInfo.size != 0 && qOffset != 0u) { @@ -517,6 +551,10 @@ int vdex_backend_019_process(const char *VdexFileName, LOGMSG(l_ERROR, "Failed to decompile Dex file"); return -1; } + + next_vmethod: + // Update lastIdx since followings delta_idx are based on 1st elements idx + lastIdx += curDexMethod.methodIdx; } else { vdex_decompiler_019_walk(dexFileBuf, &curDexMethod); } diff --git a/src/vdex/vdex_decompiler_019.c b/src/vdex/vdex_decompiler_019.c index b5454ba..968b7f7 100644 --- a/src/vdex/vdex_decompiler_019.c +++ b/src/vdex/vdex_decompiler_019.c @@ -107,18 +107,10 @@ bool vdex_decompiler_019_decompile(const u1 *dexFileBuf, return true; } - // We have different code items in StandardDex and CompactDex + // Get method's CodeItem information u2 *pCode = NULL; u4 codeSize = 0; - if (dex_checkType(dexFileBuf) == kNormalDex) { - dexCode *pDexCode = (dexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); - pCode = pDexCode->insns; - codeSize = pDexCode->insnsSize; - } else { - cdexCode *pCdexCode = (cdexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); - pCode = pCdexCode->insns; - dex_DecodeCDexFields(pCdexCode, &codeSize, NULL, NULL, NULL, NULL, true); - } + dex_getCodeItemInfo(dexFileBuf, pDexMethod, &pCode, &codeSize); u4 startCodeOff = dex_getFirstInstrOff(dexFileBuf, pDexMethod);