In [None]:
%matplotlib notebook

In [None]:
from androguard.misc import AnalyzeAPK
from androguard.session import Save, Session, Load
from androguard.core.bytecode import FormatClassToJava
from androguard.core.analysis.analysis import StringAnalysis

import networkx as nx
import matplotlib.pyplot as plt

import unittest

from os import path
from collections import defaultdict, Counter

In [None]:
AG_SESSION_FILE = "./Androguard.ag"
MAX_USAGE_COUNT_STR = 20
APK_FILE_PATH = "../../../Downloads/com.snapchat.android_10.85.5.74-2067_minAPI19(arm64-v8a)(nodpi)_apkmirror.com.apk"

In [None]:
def load_androguard(force_reload=False, write_session=True):
    if (not force_reload) and path.exists(AG_SESSION_FILE):
        print("Loading Existing Session")
        s = Load(AG_SESSION_FILE)
    else:
        print("Loading Session from Apk")
        s = Session()
        a, d, dx = AnalyzeAPK(APK_FILE_PATH, s)
        if write_session:
            print("Saving Loaded Session to", AG_SESSION_FILE)
            Save(s, AG_SESSION_FILE)
        return a, d, dx

# Saving Session causes Kernel disconnection, Loading it causes a EOF Error
a, d, dx = load_androguard(force_reload=True, write_session=False)

In [None]:
# https://source.android.com/devices/tech/dalvik/dex-format#typedescriptor
type_descriptors = {
    "void": "V",
    "boolean": "Z",
    "byte": "B",
    "short": "S",
    "char": "C",
    "int": "I",
    "long": "J",
    "float": "F",
    "double": "D"
}

def get_as_type_descriptor(arg):
    if arg.endswith("[]"):
        return "[" + get_as_type_descriptor(arg[:-2])
    if arg in type_descriptors:
        return type_descriptors[arg]
    return FormatClassToJava(arg)

In [None]:
def strip_return(name):
    return name[1:name.index(")")]

In [None]:
def get_method_repr(class_name, method_name, param_types):
    return f"{class_name}#{method_name}({param_types})"

Unit Testing both functions to check if they will work correctly when working with Andrguard

In [None]:
tests_1 = (
    ("java.lang.String", "Ljava/lang/String;"),
    ("java.lang.String[]", "[Ljava/lang/String;"),
    ("void", "V"),
    ("int[]", "[I"),
    ("char", "C"), 
    ("java.lang.Object[][]", "[[Ljava/lang/Object;")
)

tests_2 = (
    ("(I)I", "I"), 
    ("(C)Z", "C"),
    ("(Ljava/lang/CharSequence; I)I", "Ljava/lang/CharSequence; I")
)

class TestFunction(unittest.TestCase):
    def test_add(self):
        for test, val in tests_1:
            self.assertEqual(get_as_type_descriptor(test), val)
        for test, val in tests_2:
            self.assertEqual(strip_return(test), val)


unittest.main(argv=[''], verbosity=2, exit=False)

In [None]:
class MethodDec:
    def __init__(self, name, *param_types):
        self.name = name
        self.param_types = param_types
    
    def get_method_parameter_types(self):
        return list(map(get_as_type_descriptor, self.param_types))
    
    
    def param_types_repr(self):
        return " ".join(method_dec.get_method_parameter_types())
    
    
    def get_method_repr(self, class_name):
        return get_method_repr(class_name, self.name, self.param_types_repr())

In [None]:
decs_to_find = {
    "rD5": MethodDec("a", "rD5", "qD5")
}
optimized = False

In [None]:
if not optimized:
    # Map Keys to TypeDescriptor Representation
    decs_to_find = { get_as_type_descriptor(k): v for k, v in decs_to_find.items() }
    resolved_classes = [dx.get_class_analysis(k) for k in decs_to_find.keys()]
    
    optimized = True

In [None]:
resolved_methods = []
# Loop through resolved classes
for (class_name, method_dec), class_analysis in zip(decs_to_find.items(), resolved_classes):
    # Loop through all methods in the resolved class
    for method in class_analysis.get_methods():
        
        # Check Method Name
        if method_dec.name != method.name:
            continue
        
        # Check Parameter Types
        m_dec_types = " ".join(method_dec.get_method_parameter_types())
        if m_dec_types != strip_return(str(method.get_descriptor())):
            continue
        
        # Matching Method Declaration found
        print("Found Class and Method", method_dec.get_method_repr(class_name))
        resolved_methods.append(method)

In [None]:
# Map target Class and target Methods with a set of strings found in it
m_strs, c_strs = defaultdict(set), defaultdict(set)

for s in dx.get_strings():
    xrefs = s.get_xref_from()
    
    # Ignore string if it is used too often for its presence to be a valid and reliable characteristic
    if len(xrefs) > MAX_USAGE_COUNT_STR:
        continue
    
    for x in xrefs:
        c_ref, m_ref = x
        
        if c_ref.name not in decs_to_find:
            continue

        for r_m in resolved_methods:
            if m_ref == r_m:
                # print(len(xrefs), s.value)
                m_strs[r_m].add(s.value)
            elif c_ref.name == r_m.class_name:
                # Matching Class
                c_strs[c_ref.name].add(s.value)
            # Else: String unused in Class and Method

In [None]:
def flat_map(f, li):
    """
    Maps values with function f recursively on all Iterables (except Strings)
    Flattened by using Subgenerator Delegation
    """
    from collections.abc import Iterable
    for i in li:
        # str will cause a recursion depth error (Iterator of str returns str)
        if isinstance(i, Iterable) and not isinstance(i, str):
            yield from flat_map(f, i)
        else:
            yield f(i)

# def flat_map(f, li): return (f(y) for x in li for y in x)

In [None]:
for s in dx.get_strings():
    for m_ref, m_set in m_strs.items():
        c_name = m_ref.class_name
        c_set = c_strs[c_name]
        
        if s.value in m_set:
            for x in s.get_xref_from():
                m_xref = x[1]
                print("Possible MCandidate:", get_method_repr(c_name, m_ref.name, strip_return(str(m_xref.descriptor))))
        elif s.value in c_set:
            for x in s.get_xref_from():
                c_xref = x[0]
                print("Possible CCandidate:", c_name)