Skip to content

Commit

Permalink
Merge pull request #691 from reox/fix_packagename
Browse files Browse the repository at this point in the history
Fix packagename
  • Loading branch information
reox committed May 10, 2019
2 parents 984c0d9 + ef94f00 commit 55f282b
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 64 deletions.
134 changes: 75 additions & 59 deletions androguard/core/analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,8 @@ def is_android_api(self):
def get_methods(self):
"""
Return all :class:`MethodClassAnalysis` objects of this class
:rtype: Iterator[MethodClassAnalysis]
"""
return list(self._methods.values())

Expand Down Expand Up @@ -1111,7 +1113,7 @@ def __repr__(self):
" EXTERNAL" if isinstance(self.orig_class, ExternalClass) else "")

def __str__(self):
# Print only instanciation from other classes here
# Print only instantiation from other classes here
# TODO also method xref and field xref should be printed?
data = "XREFto for %s\n" % self.orig_class
for ref_class in self.xrefto:
Expand Down Expand Up @@ -1172,6 +1174,7 @@ def add(self, vm):
:param vm: :class:`dvm.DalvikVMFormat` to add to this Analysis
"""
self.vms.append(vm)
log.info("Adding DEX file version {}".format(vm.version))
for current_class in vm.get_classes():
self.classes[current_class.get_name()] = ClassAnalysis(current_class)

Expand Down Expand Up @@ -1204,18 +1207,20 @@ def create_xref(self):
# or check that we do not write garbage.

# TODO multiprocessing
# One reason why multiprocessing is hard to implement is the creation of
# the external classes and methods. This must be synchronized.
for c in self._get_all_classes():
self._create_xref(c)

log.info("End of creating cross references (XREF)")
log.info("run time: {:0d}min {:02d}s".format(*divmod(int(time.time() - tic), 60)))
log.info("End of creating cross references (XREF) "
"run time: {:0d}min {:02d}s".format(*divmod(int(time.time() - tic), 60)))

def _create_xref(self, current_class):
"""
Create the xref for `current_class`
There are four steps involved in getting the xrefs:
* Xrefs for classes
* Xrefs for class instantiation and static class usage
* for method calls
* for string usage
* for field manipulation
Expand All @@ -1232,62 +1237,72 @@ def _create_xref(self, current_class):
for current_method in current_class.get_methods():
log.debug("Creating XREF for %s" % current_method)

off = 0
for instruction in current_method.get_instructions():
for off, instruction in current_method.get_instructions_idx():
op_value = instruction.get_op_value()

# 1) check for class calls: const-class (0x1c), new-instance (0x22)
if op_value in [0x1c, 0x22]:
idx_type = instruction.get_ref_kind()
# type_info is the string like 'Ljava/lang/Object;'
type_info = instruction.cm.vm.get_cm_type(idx_type)
type_info = instruction.cm.vm.get_cm_type(idx_type).lstrip('[')
if type_info[0] != 'L':
# Need to make sure, that we get class types and not other types
continue

# Internal xref related to class manipulation
# FIXME should the xref really only set if the class is in self.classes? If an external class is added later, it will be added too!
# See https://github.com/androguard/androguard/blob/d720ebf2a9c8e2a28484f1c81fdddbc57e04c157/androguard/core/analysis/analysis.py#L806
# Before the check would go for internal classes only!
# FIXME: effectively ignoring calls to itself - do we want that?
if type_info != cur_cls_name:
if type_info not in self.classes:
# Create new external class
self.classes[type_info] = ClassAnalysis(ExternalClass(type_info))
if type_info == cur_cls_name:
continue

if type_info not in self.classes:
# Create new external class
self.classes[type_info] = ClassAnalysis(ExternalClass(type_info))

cur_cls = self.classes[cur_cls_name]
oth_cls = self.classes[type_info]
cur_cls = self.classes[cur_cls_name]
oth_cls = self.classes[type_info]

# FIXME: xref_to does not work here! current_method is wrong, as it is not the target!
cur_cls.AddXrefTo(REF_TYPE(op_value), oth_cls, current_method, off)
oth_cls.AddXrefFrom(REF_TYPE(op_value), cur_cls, current_method, off)
# FIXME: xref_to does not work here! current_method is wrong, as it is not the target!
cur_cls.AddXrefTo(REF_TYPE(op_value), oth_cls, current_method, off)
oth_cls.AddXrefFrom(REF_TYPE(op_value), cur_cls, current_method, off)

# 2) check for method calls: invoke-* (0x6e ... 0x72), invoke-xxx/range (0x74 ... 0x78)
elif (0x6e <= op_value <= 0x72) or (0x74 <= op_value <= 0x78):
idx_meth = instruction.get_ref_kind()
method_info = instruction.cm.vm.get_cm_method(idx_meth)
if method_info:
class_info = method_info[0]

method_item = None
# TODO: should create get_method_descriptor inside Analysis
for vm in self.vms:
method_item = vm.get_method_descriptor(method_info[0], method_info[1], ''.join(method_info[2]))
if method_item:
break

if not method_item:
# Seems to be an external class, create it first
# Beware: if not all DEX files are loaded at the time create_xref runs
# you will run into problems!
if method_info[0] not in self.classes:
self.classes[method_info[0]] = ClassAnalysis(ExternalClass(method_info[0]))
method_item = self.classes[method_info[0]].get_fake_method(method_info[1], method_info[2])

self.classes[cur_cls_name].AddMXrefTo(current_method, self.classes[class_info], method_item, off)
self.classes[class_info].AddMXrefFrom(method_item, self.classes[cur_cls_name], current_method, off)

# Internal xref related to class manipulation
if class_info in self.classes and class_info != cur_cls_name:
self.classes[cur_cls_name].AddXrefTo(REF_TYPE(op_value), self.classes[class_info], method_item, off)
self.classes[class_info].AddXrefFrom(REF_TYPE(op_value), self.classes[cur_cls_name], current_method, off)
if not method_info:
log.warning("Could not get method_info for instruction at {} in method {}".format(off, current_method))
continue

class_info = method_info[0].lstrip('[')
if class_info[0] != 'L':
# Need to make sure, that we get class types and not other types
continue

method_item = None
# TODO: should create get_method_descriptor inside Analysis
for vm in self.vms:
method_item = vm.get_method_descriptor(class_info, method_info[1], ''.join(method_info[2]))
if method_item:
break

if not method_item:
# Seems to be an external class, create it first
# Beware: if not all DEX files are loaded at the time create_xref runs
# you will run into problems!
if class_info not in self.classes:
self.classes[class_info] = ClassAnalysis(ExternalClass(class_info))
method_item = self.classes[class_info].get_fake_method(method_info[1], method_info[2])

self.classes[cur_cls_name].AddMXrefTo(current_method, self.classes[class_info], method_item, off)
self.classes[class_info].AddMXrefFrom(method_item, self.classes[cur_cls_name], current_method, off)

# Internal xref related to class manipulation
if class_info in self.classes and class_info != cur_cls_name:
self.classes[cur_cls_name].AddXrefTo(REF_TYPE(op_value), self.classes[class_info], method_item, off)
self.classes[class_info].AddXrefFrom(REF_TYPE(op_value), self.classes[cur_cls_name], current_method, off)

# 3) check for string usage: const-string (0x1a), const-string/jumbo (0x1b)
elif 0x1a <= op_value <= 0x1b:
Expand All @@ -1307,15 +1322,15 @@ def _create_xref(self, current_class):
field_info = instruction.cm.vm.get_cm_field(idx_field)
field_item = instruction.cm.vm.get_field_descriptor(field_info[0], field_info[2], field_info[1])
# TODO: The bytecode offset is stored for classes but not here?
if field_item:
if (0x52 <= op_value <= 0x58) or (0x60 <= op_value <= 0x66):
# read access to a field
self.classes[cur_cls_name].AddFXrefRead(current_method, self.classes[cur_cls_name], field_item)
else:
# write access to a field
self.classes[cur_cls_name].AddFXrefWrite(current_method, self.classes[cur_cls_name], field_item)
if not field_item:
continue

off += instruction.get_length()
if (0x52 <= op_value <= 0x58) or (0x60 <= op_value <= 0x66):
# read access to a field
self.classes[cur_cls_name].AddFXrefRead(current_method, self.classes[cur_cls_name], field_item)
else:
# write access to a field
self.classes[cur_cls_name].AddFXrefWrite(current_method, self.classes[cur_cls_name], field_item)

def get_method(self, method):
"""
Expand Down Expand Up @@ -1414,7 +1429,7 @@ def get_external_classes(self):
Returns all external classes, that means all classes that are not
defined in the given set of `DalvikVMObjects`.
:rtype: generator of `ClassAnalysis`
:rtype: Iterator[ClassAnalysis]
"""
for cls in self.classes.values():
if cls.is_external():
Expand All @@ -1425,7 +1440,7 @@ def get_internal_classes(self):
Returns all external classes, that means all classes that are
defined in the given set of :class:`~DalvikVMFormat`.
:rtype: generator of :class:`~ClassAnalysis`
:rtype: Iterator[ClassAnalysis]
"""
for cls in self.classes.values():
if not cls.is_external():
Expand All @@ -1435,15 +1450,15 @@ def get_strings_analysis(self):
"""
Returns a dictionary of strings and their corresponding :class:`StringAnalysis`
:return: a dictionary
:rtype: dict
"""
return self.strings

def get_strings(self):
"""
Returns a list of :class:`StringAnalysis` objects
:rtype: list of :class:`StringAnalysis`
:rtype: Iterator[StringAnalysis]
"""
return self.strings.values()

Expand All @@ -1453,7 +1468,7 @@ def get_classes(self):
Returns both internal and external classes (if any)
:rtype: list of :class:`ClassAnalysis`
:rtype: Iterator[ClassAnalysis]
"""
return self.classes.values()

Expand All @@ -1470,6 +1485,7 @@ def get_fields(self):
"""
Returns a list of `FieldClassAnalysis` objects
:rtype: Iterator[FieldClassAnalysis]
"""
for c in self.classes.values():
for f in c.get_fields():
Expand All @@ -1483,7 +1499,7 @@ def find_classes(self, name=".*", no_external=False):
:param name: regular expression for class name (default ".*")
:param no_external: Remove external classes from the output (default False)
:rtype: generator of `ClassAnalysis`
:rtype: Iterator[ClassAnalysis]
"""
for cname, c in self.classes.items():
if no_external and isinstance(c.get_vm_class(), ExternalClass):
Expand All @@ -1503,7 +1519,7 @@ def find_methods(self, classname=".*", methodname=".*", descriptor=".*",
:param descriptor: regular expression for the descriptor
:param accessflags: regular expression for the accessflags
:param no_external: Remove external method from the output (default False)
:rtype: generator of `MethodClassAnalysis`
:rtype: Iterator[MethodClassAnalysis]
"""
for cname, c in self.classes.items():
if re.match(classname, cname):
Expand All @@ -1524,7 +1540,7 @@ def find_strings(self, string=".*"):
Find strings by regex
:param string: regular expression for the string to search for
:rtype: generator of `StringAnalysis`
:rtype: Iterator[StringAnalysis]
"""
for s, sa in self.strings.items():
if re.match(string, s):
Expand All @@ -1538,7 +1554,7 @@ def find_fields(self, classname=".*", fieldname=".*", fieldtype=".*", accessflag
:param fieldname: regular expression of the fieldname
:param fieldtype: regular expression of the fieldtype
:param accessflags: regular expression of the access flags
:rtype: generator of `FieldClassAnalysis`
:rtype: Iterator[FieldClassAnalysis]
"""
for cname, c in self.classes.items():
if re.match(classname, cname):
Expand Down Expand Up @@ -1570,7 +1586,7 @@ def get_call_graph(self, classname=".*", methodname=".*", descriptor=".*",
:param no_isolated: remove isolated nodes from the graph, e.g. methods which do not call anything (default: False)
:param entry_points: A list of classes that are marked as entry point
:rtype: DiGraph
:rtype: networkx.DiGraph
"""

def _add_node(G, method):
Expand Down
14 changes: 13 additions & 1 deletion androguard/core/bytecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,16 +897,28 @@ def get_package_class_name(name):
If no package could be found, the package is an empty string.
If the name is an array type, the array is discarded.
example::
>>> get_package_class_name('Ljava/lang/Object;')
('java.lang', 'Object')
>>> get_package_class_name('[[Ljava/lang/Object;')
('java.lang', 'Object')
>>> get_package_class_name('LSomeClass;')
('', 'SomeClass')
:param name: the name
:rtype: tuple
:return:
"""
if name[0] != 'L' and name[-1] != ';':
if name[-1] != ';':
raise ValueError("The name '{}' does not look like a typed name!".format(name))

# discard array types, there might be many...
name = name.lstrip('[')

if name[0] != 'L':
raise ValueError("The name '{}' does not look like a typed name!".format(name))

name = name[1:-1]
Expand Down
3 changes: 3 additions & 0 deletions androguard/core/bytecodes/apk.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ def __init__(self, filename, raw=False, magic_file=None, skip_analysis=False, te
self.zip = zipfile.ZipFile(io.BytesIO(self.__raw), mode="r")

if testzip:
log.info("Testing zip file integrity, this might take a while...")
# Test the zipfile for integrity before continuing.
# This process might be slow, as the whole file is read.
# Therefore it is possible to enable it as a separate feature.
Expand Down Expand Up @@ -304,6 +305,7 @@ def _apk_analysis(self):
extracted from the Manifest.
"""
i = "AndroidManifest.xml"
log.info("Starting analysis on {}".format(i))
try:
manifest_data = self.zip.read(i)
except KeyError:
Expand Down Expand Up @@ -360,6 +362,7 @@ def _apk_analysis(self):
self.declared_permissions[d_perm_name] = d_perm_details

self.valid_apk = True
log.info("APK file was successfully validated!")

self.permission_module = androconf.load_api_specific_resource_module(
"aosp_permissions", self.get_target_sdk_version())
Expand Down
18 changes: 17 additions & 1 deletion androguard/core/bytecodes/dvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3109,6 +3109,22 @@ def get_instructions(self):
return []
return self.code.get_bc().get_instructions()

def get_instructions_idx(self):
"""
Iterate over all instructions of the method, but also return the current index.
This is the same as using :meth:`get_instructions` and adding the instruction length
to a variable each time.
:return:
:rtype: Iterator[(int, Instruction)]
"""
if self.get_code() is None:
return []
idx = 0
for ins in self.get_code().get_bc().get_instructions():
yield idx, ins
idx += ins.get_length()

def set_instructions(self, instructions):
"""
Set the instructions
Expand Down Expand Up @@ -7578,7 +7594,7 @@ class DalvikVMFormat(bytecode.BuffHandle):
:param buff: a string which represents the classes.dex file
:param decompiler: associate a decompiler object to display the java source code
:type buff: string
:type buff: bytes
:type decompiler: object
example::
Expand Down
10 changes: 7 additions & 3 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,16 @@ def testAPK(self):

self.assertEqual(len(list(dx.get_internal_classes())), 1353) # checked by reading the dex header
self.assertEqual(len(dx.get_strings()), 1564)
self.assertEqual(len(list(dx.get_methods())), 11694)
self.assertEqual(len(list(dx.get_methods())), 11691)
self.assertEqual(len(list(dx.get_fields())), 3033)
self.assertEqual(len(list(dx.get_external_classes())), 394)
self.assertEqual(len(list(dx.get_external_classes())), 388)

for cls in dx.get_external_classes():
self.assertEqual(cls.name[0], 'L')
self.assertEqual(cls.name[-1], ';')

# Filter all support libraries
self.assertEqual(len(list(dx.find_classes("^(?!Landroid/support).*;$"))), 516)
self.assertEqual(len(list(dx.find_classes("^(?!Landroid/support).*;$"))), 512)
self.assertEqual(len(list(dx.find_classes("^(?!Landroid/support).*;$", no_external=True))), 124)

# Find all constructors by method name
Expand Down

0 comments on commit 55f282b

Please sign in to comment.