Skip to content

Commit

Permalink
Unhide APIs when decompiling files from Android P
Browse files Browse the repository at this point in the history
As part of the API blacklisting in Android P, the Android runtime is implementing
a custom encoding in the access flags of the class members (fields & methods).
This is to prevent apps from directly messing with the core system frameworks.

However, when decompiling a framework Vdex the access flags that are written
on disk are encoded. So we need to decode them and overwrite when extracting
the Dex resources from the Vdex container. Otherwise, the ART verifier will
complain when the decompiled resources are repackaged or side-loaded.

This commit implements the base logic to decode access flags and the ULeb128
primitives to inline update the member values.

Signed-off-by: Anestis Bechtsoudis <anestis@census-labs.com>
  • Loading branch information
anestisb committed Sep 8, 2018
1 parent d457978 commit 6f69698
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 2 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@ file #0: classDefsSize=8840

## Changelog

* __0.5.2__ - TBC
* Fix an issue when decompiling framework bytecode the APIs of which have been hidden
* __0.5.1__ - 3 September 2018
* Improve handling of deduplicated shared data section when exporting CompactDex files after
decompilation
Expand Down
2 changes: 1 addition & 1 deletion src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ typedef __attribute__((__aligned__(1))) int32_t unaligned_s4;
#define CHECK_GE(a, b) CHECK_IMPL((a), >=, (b))

#define PROG_NAME "vdexExtractor"
#define PROG_VERSION "0.5.1"
#define PROG_VERSION "0.5.2"
#define PROG_AUTHORS \
" Anestis Bechtsoudis <anestis@census-labs.com>\n" \
" Copyright 2017 - 2018 by CENSUS S.A. All Rights Reserved."
Expand Down
81 changes: 81 additions & 0 deletions src/dex.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,27 @@ static bool enableDisassembler = false;

static inline u2 get2LE(unsigned char const *pSrc) { return pSrc[0] | (pSrc[1] << 8); }

static inline bool IsLeb128Terminator(const u1 *ptr) { return *ptr <= 0x7f; }

// Returns the number of bytes needed to encode the value in unsigned LEB128.
static inline u4 ULeb128Size(u4 data) {
// bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1)
// bytes = ceil(bits_to_encode / 7.0); // (6 + bits_to_encode) / 7
u4 x = 6 + 32 - __builtin_clz(data | 1U);

// Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
// This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
return (x * 37) >> 8;
}

static inline bool IsPowerOfTwo(u4 x) { return (x & (x - 1)) == 0; }

static inline bool IsFirstBitSet(u4 value) { return !IsPowerOfTwo(value & kAccVisibilityFlags); }

static inline u4 GetSecondFlag(u4 value) {
return ((value & kAccNative) != 0) ? kAccDexHiddenBitNative : kAccDexHiddenBit;
}

// Helper for dex_dumpInstruction(), which builds the string representation
// for the index in the given instruction.
static char *indexString(const u1 *dexFileBuf, u2 *codePtr, u4 bufSize) {
Expand Down Expand Up @@ -717,6 +738,47 @@ u4 dex_readULeb128(const u1 **pStream) {
return (u4)result;
}

u1 *dex_writeULeb128(u1 *dest, u4 value) {
u1 out = value & 0x7f;
value >>= 7;
while (value != 0) {
*dest++ = out | 0x80;
out = value & 0x7f;
value >>= 7;
}
*dest++ = out;
return dest;
}

u1 *dex_reverseSearchULeb128(u1 *end_ptr) {
u1 *ptr = end_ptr;

// Move one byte back, check that this is the terminating byte.
ptr--;
CHECK(IsLeb128Terminator(ptr));

// Keep moving back while the previous byte is not a terminating byte.
// Fail after reading five bytes in case there isn't another Leb128 value
// before this one.
while (!IsLeb128Terminator(ptr - 1)) {
ptr--;
CHECK_LE(end_ptr - ptr, 5);
}

return ptr;
}

void dex_updateULeb128(u1 *dest, u4 value) {
const u1 *old_end = dest;
u4 old_value = dex_readULeb128(&old_end);
CHECK_LE(ULeb128Size(value), ULeb128Size(old_value));
for (u1 *end = dex_writeULeb128(dest, value); end < old_end; end++) {
// Use longer encoding than necessary to fill the allocated space.
end[-1] |= 0x80;
end[0] = 0;
}
}

s4 dex_readSLeb128(const u1 **data) {
const u1 *ptr = *data;
s4 result = *(ptr++);
Expand Down Expand Up @@ -1298,5 +1360,24 @@ void dex_getCodeItemInfo(const u1 *dexFileBuf, dexMethod *pDexMethod, u2 **pCode
}
}

u4 dex_decodeAccessFlagsFromDex(u4 dex_access_flags) {
u4 new_access_flags = dex_access_flags;
if (IsFirstBitSet(new_access_flags) != false) {
new_access_flags ^= kAccVisibilityFlags;
}
new_access_flags &= ~GetSecondFlag(new_access_flags);
return new_access_flags;
}

void dex_unhideAccessFlags(u1 *data_ptr, u4 new_access_flags, bool is_method) {
// Go back 1 uleb to start.
data_ptr = dex_reverseSearchULeb128(data_ptr);
if (is_method) {
// Methods have another uleb field before the access flags
data_ptr = dex_reverseSearchULeb128(data_ptr);
}
dex_updateULeb128(data_ptr, new_access_flags);
}

void dex_setDisassemblerStatus(bool status) { enableDisassembler = status; }
bool dex_getDisassemblerStatus(void) { return enableDisassembler; }
19 changes: 19 additions & 0 deletions src/dex.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <zlib.h>
#include "common.h"
#include "dex_instruction.h"
#include "dex_modifiers.h"

// CompactDex helper constants for CodeItem decoding
#define kRegistersSizeShift ((size_t)12)
Expand Down Expand Up @@ -321,6 +322,19 @@ void dex_repairDexCRC(const u1 *, off_t);
// tolerates non-zero high-order bits in the fifth encoded byte.
u4 dex_readULeb128(const u1 **);

// Writes an unsigned LEB128 (Little-Endian Base 128) value, updating the
// given pointer to point just past the end of the written value.
u1 *dex_writeULeb128(u1 *dest, u4 value);

// Returns the first byte of a Leb128 value assuming that:
// (1) `end_ptr` points to the first byte after the Leb128 value, and
// (2) there is another Leb128 value before this one.
u1 *dex_reverseSearchULeb128(u1 *);

// Overwrite encoded Leb128 with a new value. The new value must be less than
// or equal to the old value to ensure that it fits the allocated space.
void dex_updateULeb128(u1 *, u4);

// Reads a signed LEB128 value, updating the given pointer to point
// just past the end of the read value. This function tolerates
// non-zero high-order bits in the fifth encoded byte.
Expand Down Expand Up @@ -391,4 +405,9 @@ void dex_DecodeCDexFields(cdexCode *, u4 *, u2 *, u2 *, u2 *, u2 *, bool);
// Get CodeItem information from a DexMethod
void dex_getCodeItemInfo(const u1 *, dexMethod *, u2 **, u4 *);

u4 dex_decodeAccessFlagsFromDex(u4);

// Changes the dex class data pointed to by data_ptr it to not have any hiddenapi flags.
void dex_unhideAccessFlags(u1 *, u4, bool);

#endif
146 changes: 146 additions & 0 deletions src/dex_modifiers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
vdexExtractor
-----------------------------------------
Anestis Bechtsoudis <anestis@census-labs.com>
Copyright 2017 - 2018 by CENSUS S.A. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#ifndef _DEX_MODIFIERS_H_
#define _DEX_MODIFIERS_H_

#include "common.h"

static const u4 kAccPublic = 0x0001; // class, field, method, ic
static const u4 kAccPrivate = 0x0002; // field, method, ic
static const u4 kAccProtected = 0x0004; // field, method, ic
static const u4 kAccStatic = 0x0008; // field, method, ic
static const u4 kAccFinal = 0x0010; // class, field, method, ic
static const u4 kAccSynchronized = 0x0020; // method (only allowed on natives)
static const u4 kAccSuper = 0x0020; // class (not used in dex)
static const u4 kAccVolatile = 0x0040; // field
static const u4 kAccBridge = 0x0040; // method (1.5)
static const u4 kAccTransient = 0x0080; // field
static const u4 kAccVarargs = 0x0080; // method (1.5)
static const u4 kAccNative = 0x0100; // method
static const u4 kAccInterface = 0x0200; // class, ic
static const u4 kAccAbstract = 0x0400; // class, method, ic
static const u4 kAccStrict = 0x0800; // method
static const u4 kAccSynthetic = 0x1000; // class, field, method, ic
static const u4 kAccAnnotation = 0x2000; // class, ic (1.5)
static const u4 kAccEnum = 0x4000; // class, field, ic (1.5)

static const u4 kAccJavaFlagsMask = 0xffff; // bits set from Java sources (low 16)

// The following flags are used to insert hidden API access flags into boot
// class path dex files. They are decoded by DexFile::ClassDataItemIterator and
// removed from the access flags before used by the runtime.
static const u4 kAccDexHiddenBit = 0x00000020; // field, method (not native)
static const u4 kAccDexHiddenBitNative = 0x00000200; // method (native)

static const u4 kAccConstructor = 0x00010000; // method (dex only) <(cl)init>
static const u4 kAccDeclaredSynchronized = 0x00020000; // method (dex only)
static const u4 kAccClassIsProxy = 0x00040000; // class (dex only)
// Set to indicate that the ArtMethod is obsolete and has a different DexCache + DexFile from its
// declaring class. This flag may only be applied to methods.
static const u4 kAccObsoleteMethod = 0x00040000; // method (runtime)
// Used by a method to denote that its execution does not need to go through slow path interpreter.
static const u4 kAccSkipAccessChecks = 0x00080000; // method (runtime, not native)
// Used by a class to denote that the verifier has attempted to check it at least once.
static const u4 kAccVerificationAttempted = 0x00080000; // class (runtime)
static const u4 kAccSkipHiddenApiChecks = 0x00100000; // class (runtime)
// This is set by the class linker during LinkInterfaceMethods. It is used by a method to represent
// that it was copied from its declaring class into another class. All methods marked kAccMiranda
// and kAccDefaultConflict will have this bit set. Any kAccDefault method contained in the methods_
// array of a concrete class will also have this bit set.
static const u4 kAccCopied = 0x00100000; // method (runtime)
static const u4 kAccMiranda = 0x00200000; // method (runtime, not native)
static const u4 kAccDefault = 0x00400000; // method (runtime)
// Native method flags are set when linking the methods based on the presence of the
// @dalvik.annotation.optimization.{Fast,Critical}Native annotations with build visibility.
// Reuse the values of kAccSkipAccessChecks and kAccMiranda which are not used for native methods.
static const u4 kAccFastNative = 0x00080000; // method (runtime; native only)
static const u4 kAccCriticalNative = 0x00200000; // method (runtime; native only)

// Set by the JIT when clearing profiling infos to denote that a method was previously warm.
static const u4 kAccPreviouslyWarm = 0x00800000; // method (runtime)

// This is set by the class linker during LinkInterfaceMethods. Prior to that point we do not know
// if any particular method needs to be a default conflict. Used to figure out at runtime if
// invoking this method will throw an exception.
static const u4 kAccDefaultConflict = 0x01000000; // method (runtime)

// Set by the verifier for a method we do not want the compiler to compile.
static const u4 kAccCompileDontBother = 0x02000000; // method (runtime)

// Set by the verifier for a method that could not be verified to follow structured locking.
static const u4 kAccMustCountLocks = 0x04000000; // method (runtime)

// Set by the class linker for a method that has only one implementation for a
// virtual call.
static const u4 kAccSingleImplementation = 0x08000000; // method (runtime)

static const u4 kAccHiddenApiBits = 0x30000000; // field, method

// Not currently used, except for intrinsic methods where these bits
// are part of the intrinsic ordinal.
static const u4 kAccMayBeUnusedBits = 0x40000000;

// Set by the compiler driver when compiling boot classes with instrinsic methods.
static const u4 kAccIntrinsic = 0x80000000; // method (runtime)

// Special runtime-only flags.
// Interface and all its super-interfaces with default methods have been recursively initialized.
static const u4 kAccRecursivelyInitialized = 0x20000000;
// Interface declares some default method.
static const u4 kAccHasDefaultMethod = 0x40000000;
// class/ancestor overrides finalize()
static const u4 kAccClassIsFinalizable = 0x80000000;

// Continuous sequence of bits used to hold the ordinal of an intrinsic method. Flags
// which overlap are not valid when kAccIntrinsic is set.
static const u4 kAccIntrinsicBits =
kAccMayBeUnusedBits | kAccHiddenApiBits | kAccSingleImplementation | kAccMustCountLocks |
kAccCompileDontBother | kAccDefaultConflict | kAccPreviouslyWarm;

// Valid (meaningful) bits for a field.
static const u4 kAccValidFieldFlags = kAccPublic | kAccPrivate | kAccProtected | kAccStatic |
kAccFinal | kAccVolatile | kAccTransient | kAccSynthetic |
kAccEnum;

// Valid (meaningful) bits for a method.
static const u4 kAccValidMethodFlags = kAccPublic | kAccPrivate | kAccProtected | kAccStatic |
kAccFinal | kAccSynchronized | kAccBridge | kAccVarargs |
kAccNative | kAccAbstract | kAccStrict | kAccSynthetic |
kAccConstructor | kAccDeclaredSynchronized;

// Valid (meaningful) bits for a class (not interface).
// Note 1. These are positive bits. Other bits may have to be zero.
// Note 2. Inner classes can expose more access flags to Java programs. That is handled by libcore.
static const u4 kAccValidClassFlags =
kAccPublic | kAccFinal | kAccSuper | kAccAbstract | kAccSynthetic | kAccEnum;

// Valid (meaningful) bits for an interface.
// Note 1. Annotations are interfaces.
// Note 2. These are positive bits. Other bits may have to be zero.
// Note 3. Inner classes can expose more access flags to Java programs. That is handled by libcore.
static const u4 kAccValidInterfaceFlags =
kAccPublic | kAccInterface | kAccAbstract | kAccSynthetic | kAccAnnotation;

static const u4 kAccVisibilityFlags = kAccPublic | kAccPrivate | kAccProtected;

#endif
17 changes: 16 additions & 1 deletion src/vdex/vdex_backend_019.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,6 @@ int vdex_backend_019_process(const char *VdexFileName,
for (u4 i = 0; i < dex_getClassDefsSize(dexFileBuf); ++i) {
const dexClassDef *pDexClassDef = dex_getClassDef(dexFileBuf, i);

// TODO: Unhide APIs if we're unquickening
dex_dumpClassInfo(dexFileBuf, i);

// Last read field or method index to apply delta to
Expand All @@ -451,13 +450,21 @@ int vdex_backend_019_process(const char *VdexFileName,
dexField pDexField;
memset(&pDexField, 0, sizeof(dexField));
dex_readClassDataField(&curClassDataCursor, &pDexField);

// APIs are unhidden regardless if we're decompiling or not
dex_unhideAccessFlags((u1 *)curClassDataCursor,
dex_decodeAccessFlagsFromDex(pDexField.accessFlags), false);
}

// Skip instance fields
for (u4 j = 0; j < pDexClassDataHeader.instanceFieldsSize; ++j) {
dexField pDexField;
memset(&pDexField, 0, sizeof(dexField));
dex_readClassDataField(&curClassDataCursor, &pDexField);

// APIs are unhidden regardless if we're decompiling or not
dex_unhideAccessFlags((u1 *)curClassDataCursor,
dex_decodeAccessFlagsFromDex(pDexField.accessFlags), false);
}

// For each direct method
Expand All @@ -468,6 +475,10 @@ int vdex_backend_019_process(const char *VdexFileName,
dex_readClassDataMethod(&curClassDataCursor, &curDexMethod);
dex_dumpMethodInfo(dexFileBuf, &curDexMethod, lastIdx, "direct");

// APIs are unhidden regardless if we're decompiling or not
dex_unhideAccessFlags((u1 *)curClassDataCursor,
dex_decodeAccessFlagsFromDex(curDexMethod.accessFlags), true);

// Skip empty methods
if (curDexMethod.codeOff == 0) {
goto next_dmethod;
Expand Down Expand Up @@ -518,6 +529,10 @@ int vdex_backend_019_process(const char *VdexFileName,
dex_readClassDataMethod(&curClassDataCursor, &curDexMethod);
dex_dumpMethodInfo(dexFileBuf, &curDexMethod, lastIdx, "virtual");

// APIs are unhidden regardless if we're decompiling or not
dex_unhideAccessFlags((u1 *)curClassDataCursor,
dex_decodeAccessFlagsFromDex(curDexMethod.accessFlags), true);

// Skip native or abstract methods
if (curDexMethod.codeOff == 0) {
goto next_vmethod;
Expand Down

0 comments on commit 6f69698

Please sign in to comment.