From 254fb3b51fe3973baddcea595699dfc62bd124dc Mon Sep 17 00:00:00 2001 From: Anestis Bechtsoudis Date: Thu, 30 Aug 2018 12:22:35 +0300 Subject: [PATCH] Fix multi-unquicken bug for same CodeItems Newly introduced CompactDex is deduplicating not only String data, but all types of items in data section, including CodeItems. Therefore, if the same CodeItem is linked with more than one methods (e.g. simple type of constructors) the Cdex is using the same code offset, which should be decompiled once. So we need to maintain a history with all the visited CodeItems so we check if already decompiled. Otherwise, the QuickenData streams will get corrupted since it will be consumed in wrong offsets. Currently this is achieved via simple hashset implementation that was forked from the https://github.com/avsej/hashset.c. Hashing the data pointers is good enough for now since we always operate against the already mapped file (all CodeItem offsets calculated from loaded file virtual address). Signed-off-by: Anestis Bechtsoudis --- src/dex.c | 13 ++++ src/dex.h | 3 + src/hashset/hashset.c | 133 +++++++++++++++++++++++++++++++++ src/hashset/hashset.h | 72 ++++++++++++++++++ src/vdex/vdex_backend_019.c | 54 +++++++++++-- src/vdex/vdex_decompiler_019.c | 12 +-- 6 files changed, 269 insertions(+), 18 deletions(-) create mode 100644 src/hashset/hashset.c create mode 100644 src/hashset/hashset.h diff --git a/src/dex.c b/src/dex.c index abdfd09..3f828bd 100644 --- a/src/dex.c +++ b/src/dex.c @@ -1281,5 +1281,18 @@ void dex_DecodeCDexFields(cdexCode *pCdexCode, } } +void dex_getCodeItemInfo(const u1 *dexFileBuf, dexMethod *pDexMethod, u2 **pCode, u4 *codeSize) { + // We have different code items in StandardDex and CompactDex + if (dex_checkType(dexFileBuf) == kNormalDex) { + dexCode *pDexCode = (dexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); + *pCode = pDexCode->insns; + *codeSize = pDexCode->insnsSize; + } else { + cdexCode *pCdexCode = (cdexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); + *pCode = pCdexCode->insns; + dex_DecodeCDexFields(pCdexCode, codeSize, NULL, NULL, NULL, NULL, true); + } +} + void dex_setDisassemblerStatus(bool status) { enableDisassembler = status; } bool dex_getDisassemblerStatus(void) { return enableDisassembler; } diff --git a/src/dex.h b/src/dex.h index 2edf57d..86ad899 100644 --- a/src/dex.h +++ b/src/dex.h @@ -388,4 +388,7 @@ char *dex_descriptorClassToDot(const char *); // decodeOnlyInsrCnt is specified then only the instruction count is decoded. void dex_DecodeCDexFields(cdexCode *, u4 *, u2 *, u2 *, u2 *, u2 *, bool); +// Get CodeItem information from a DexMethod +void dex_getCodeItemInfo(const u1 *, dexMethod *, u2 **, u4 *); + #endif diff --git a/src/hashset/hashset.c b/src/hashset/hashset.c new file mode 100644 index 0000000..eeb3b33 --- /dev/null +++ b/src/hashset/hashset.c @@ -0,0 +1,133 @@ +/* + * Copyright 2012 Couchbase, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hashset.h" +#include + +static const unsigned int prime_1 = 73; +static const unsigned int prime_2 = 5009; + +hashset_t hashset_create() { + hashset_t set = calloc(1, sizeof(struct hashset_st)); + + if (set == NULL) { + return NULL; + } + set->nbits = 3; + set->capacity = (size_t)(1 << set->nbits); + set->mask = set->capacity - 1; + set->items = calloc(set->capacity, sizeof(size_t)); + if (set->items == NULL) { + hashset_destroy(set); + return NULL; + } + set->nitems = 0; + set->n_deleted_items = 0; + return set; +} + +size_t hashset_num_items(hashset_t set) { return set->nitems; } + +void hashset_destroy(hashset_t set) { + if (set) { + free(set->items); + } + free(set); +} + +static int hashset_add_member(hashset_t set, void *item) { + size_t value = (size_t)item; + size_t ii; + + if (value == 0 || value == 1) { + return -1; + } + + ii = set->mask & (prime_1 * value); + + while (set->items[ii] != 0 && set->items[ii] != 1) { + if (set->items[ii] == value) { + return 0; + } else { + /* search free slot */ + ii = set->mask & (ii + prime_2); + } + } + set->nitems++; + if (set->items[ii] == 1) { + set->n_deleted_items--; + } + set->items[ii] = value; + return 1; +} + +static void maybe_rehash(hashset_t set) { + size_t *old_items; + size_t old_capacity, ii; + + if (set->nitems + set->n_deleted_items >= (double)set->capacity * 0.85) { + old_items = set->items; + old_capacity = set->capacity; + set->nbits++; + set->capacity = (size_t)(1 << set->nbits); + set->mask = set->capacity - 1; + set->items = calloc(set->capacity, sizeof(size_t)); + set->nitems = 0; + set->n_deleted_items = 0; + assert(set->items); + for (ii = 0; ii < old_capacity; ii++) { + hashset_add_member(set, (void *)old_items[ii]); + } + free(old_items); + } +} + +int hashset_add(hashset_t set, void *item) { + int rv = hashset_add_member(set, item); + maybe_rehash(set); + return rv; +} + +int hashset_remove(hashset_t set, void *item) { + size_t value = (size_t)item; + size_t ii = set->mask & (prime_1 * value); + + while (set->items[ii] != 0) { + if (set->items[ii] == value) { + set->items[ii] = 1; + set->nitems--; + set->n_deleted_items++; + return 1; + } else { + ii = set->mask & (ii + prime_2); + } + } + return 0; +} + +int hashset_is_member(hashset_t set, void *item) { + size_t value = (size_t)item; + size_t ii = set->mask & (prime_1 * value); + + while (set->items[ii] != 0) { + if (set->items[ii] == value) { + return 1; + } else { + ii = set->mask & (ii + prime_2); + } + } + return 0; +} diff --git a/src/hashset/hashset.h b/src/hashset/hashset.h new file mode 100644 index 0000000..e625242 --- /dev/null +++ b/src/hashset/hashset.h @@ -0,0 +1,72 @@ +/* + * Copyright 2012 Couchbase, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HASHSET_H +#define HASHSET_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct hashset_st { + size_t nbits; + size_t mask; + + size_t capacity; + size_t *items; + size_t nitems; + size_t n_deleted_items; +}; + +typedef struct hashset_st *hashset_t; + +/* create hashset instance */ +hashset_t hashset_create(void); + +/* destroy hashset instance */ +void hashset_destroy(hashset_t set); + +size_t hashset_num_items(hashset_t set); + +/* add item into the hashset. + * + * @note 0 and 1 is special values, meaning nil and deleted items. the + * function will return -1 indicating error. + * + * returns zero if the item already in the set and non-zero otherwise + */ +int hashset_add(hashset_t set, void *item); + +/* remove item from the hashset + * + * returns non-zero if the item was removed and zero if the item wasn't + * exist + */ +int hashset_remove(hashset_t set, void *item); + +/* check if existence of the item + * + * returns non-zero if the item exists and zero otherwise + */ +int hashset_is_member(hashset_t set, void *item); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/vdex/vdex_backend_019.c b/src/vdex/vdex_backend_019.c index 13a4671..7445b36 100644 --- a/src/vdex/vdex_backend_019.c +++ b/src/vdex/vdex_backend_019.c @@ -22,6 +22,7 @@ #include +#include "../hashset/hashset.h" #include "../out_writer.h" #include "../utils.h" #include "vdex_backend_019.h" @@ -414,6 +415,13 @@ int vdex_backend_019_process(const char *VdexFileName, initCompactOffset(quickenInfoOffTable.data); } + // Make sure to not unquicken the same code item multiple times. + hashset_t unquickened_code_items = hashset_create(); + if (!unquickened_code_items) { + LOGMSG(l_ERROR, "Failed to create hashset"); + return -1; + } + // For each class log_dis("file #%zu: classDefsSize=%" PRIu32 "\n", dex_file_idx, dex_getClassDefsSize(dexFileBuf)); @@ -462,16 +470,27 @@ int vdex_backend_019_process(const char *VdexFileName, // Skip empty methods if (curDexMethod.codeOff == 0) { - continue; + goto next_dmethod; } if (pRunArgs->unquicken) { + // Check if we've already unquickened the code item + u2 *pCode = NULL; + u4 codeSize = 0; + dex_getCodeItemInfo(dexFileBuf, &curDexMethod, &pCode, &codeSize); + if (hashset_is_member(unquickened_code_items, (void *)pCode)) { + LOGMSG(l_DEBUG, "Already unquickened direct method:%d", + lastIdx + curDexMethod.methodIdx); + goto next_dmethod; + } + + // Since new code item, add to set + hashset_add(unquickened_code_items, (void *)pCode); + // Offset being 0 means not quickened. const u4 qOffset = getOffset(lastIdx + curDexMethod.methodIdx); - // Update lastIdx since followings delta_idx are based on 1st elements idx - lastIdx += curDexMethod.methodIdx; - + // Get quickenData for method and decompile vdex_data_array_t quickenData; memset(&quickenData, 0, sizeof(vdex_data_array_t)); if (quickenInfo.size != 0 && qOffset != 0u) { @@ -482,6 +501,10 @@ int vdex_backend_019_process(const char *VdexFileName, LOGMSG(l_ERROR, "Failed to decompile Dex file"); return -1; } + + next_dmethod: + // Update lastIdx since followings delta_idx are based on 1st elements idx + lastIdx += curDexMethod.methodIdx; } else { vdex_decompiler_019_walk(dexFileBuf, &curDexMethod); } @@ -497,16 +520,27 @@ int vdex_backend_019_process(const char *VdexFileName, // Skip native or abstract methods if (curDexMethod.codeOff == 0) { - continue; + goto next_vmethod; } if (pRunArgs->unquicken) { + // Check if we've already unquickened the code item + u2 *pCode = NULL; + u4 codeSize = 0; + dex_getCodeItemInfo(dexFileBuf, &curDexMethod, &pCode, &codeSize); + if (hashset_is_member(unquickened_code_items, (void *)pCode)) { + LOGMSG(l_DEBUG, "Already unquickened virtual method:%d", + lastIdx + curDexMethod.methodIdx); + goto next_vmethod; + } + + // Since new code item, add to set + hashset_add(unquickened_code_items, (void *)pCode); + // Offset being 0 means not quickened. const u4 qOffset = getOffset(lastIdx + curDexMethod.methodIdx); - // Update lastIdx since followings delta_idx are based on 1st elements idx - lastIdx += curDexMethod.methodIdx; - + // Get quickenData for method and decompile vdex_data_array_t quickenData; memset(&quickenData, 0, sizeof(vdex_data_array_t)); if (quickenInfo.size != 0 && qOffset != 0u) { @@ -517,6 +551,10 @@ int vdex_backend_019_process(const char *VdexFileName, LOGMSG(l_ERROR, "Failed to decompile Dex file"); return -1; } + + next_vmethod: + // Update lastIdx since followings delta_idx are based on 1st elements idx + lastIdx += curDexMethod.methodIdx; } else { vdex_decompiler_019_walk(dexFileBuf, &curDexMethod); } diff --git a/src/vdex/vdex_decompiler_019.c b/src/vdex/vdex_decompiler_019.c index b5454ba..968b7f7 100644 --- a/src/vdex/vdex_decompiler_019.c +++ b/src/vdex/vdex_decompiler_019.c @@ -107,18 +107,10 @@ bool vdex_decompiler_019_decompile(const u1 *dexFileBuf, return true; } - // We have different code items in StandardDex and CompactDex + // Get method's CodeItem information u2 *pCode = NULL; u4 codeSize = 0; - if (dex_checkType(dexFileBuf) == kNormalDex) { - dexCode *pDexCode = (dexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); - pCode = pDexCode->insns; - codeSize = pDexCode->insnsSize; - } else { - cdexCode *pCdexCode = (cdexCode *)(dex_getDataAddr(dexFileBuf) + pDexMethod->codeOff); - pCode = pCdexCode->insns; - dex_DecodeCDexFields(pCdexCode, &codeSize, NULL, NULL, NULL, NULL, true); - } + dex_getCodeItemInfo(dexFileBuf, pDexMethod, &pCode, &codeSize); u4 startCodeOff = dex_getFirstInstrOff(dexFileBuf, pDexMethod);