In [30]:
from androguard.misc import AnalyzeAPK

file_path = "./APKs/malign/0AF7642E7A58238E778BAA5940A36BEE84510F53CD96D5590919A0BE2A29D782.apk"#"./APKs/benign/031363170CB9EB84638CC4F5E191CA6892F753F608CB0708E6E17B17E774CDDA.apk"
apk, dex, analysis = AnalyzeAPK(file_path)

## Utility Functions

In [2]:
from math import floor, log10
from collections import Counter
import pandas as pd

benford = {1: .301, 
           2: .176, 
           3: .125, 
           4: .097, 
           5: .079, 
           6: .067, 
           7: .058, 
           8: .051, 
           9: .046}

def get_first_digit(i: int):
    if i == 0:
        return 0
    return floor(i / (10 ** floor(log10(i))))

def data_to_df(data: list[int]) -> pd.DataFrame:
    # Take list of data points and return dataframe of first digit frequencies as percentages
    range_to_fill = range(1, 10)
    fd = [get_first_digit(d) for d in data]
    total = len(data)
    if total == 0:
        return pd.DataFrame()
    counts = Counter({key: (Counter(fd)[key]/total) for key in range_to_fill})
    df = pd.DataFrame([counts], columns=counts.keys())
    return df.copy()

## Lengths of constant strings

In [3]:
strings = analysis.get_strings()  # dex[0].get_strings()
strings = [s.get_value() for s in strings]
strings = list(map(str.strip, strings))

data = list(map(len, strings))

data_to_df(data)

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.34347,0.157424,0.110912,0.096601,0.06619,0.057245,0.071556,0.053667,0.033989


## Sum of Decimal encoded characters of constant strings

In [4]:
strings = analysis.get_strings()  # dex[0].get_strings()
strings = [s.get_value() for s in strings]

strings = list(map(str.strip, strings))

data = list(map(lambda s: sum(ord(c) for c in s), strings))

data_to_df(data)

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.277281,0.150268,0.121646,0.105546,0.093023,0.064401,0.062612,0.055456,0.060823


## Lengths of methods

In [5]:
methods = analysis.get_methods()

# The above get_methods() method returns a deprecated type, so we get_method() again below to return a usable type.
# External methods don't have available lengths, so we skip past them
method_lengths = [method.get_method().get_length() for method in methods if not method.is_external()]

data_to_df(method_lengths)

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.232,0.137818,0.147273,0.094909,0.058909,0.112,0.058545,0.071273,0.031636


## Number of fields in a class

In [6]:
classes = analysis.get_internal_classes()

fields = [c.get_fields() for c in classes]

fields_count = list(map(len, fields))

data_to_df(fields_count)

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.23475,0.208872,0.125693,0.072089,0.072089,0.051756,0.007394,0.048059,0.027726


## Number of methods in a class

In [7]:
classes = analysis.get_internal_classes()
methods = [c.get_methods() for c in classes]
methods_count = list(map(len, methods))
data_to_df(methods_count)

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.158965,0.473198,0.09427,0.088725,0.072089,0.040665,0.014787,0.022181,0.025878


## Bytes -> Hex -> Decimal of classes.dex file

In [8]:
all_classes = apk.get_all_dex()

byte = [int(c, 16) for classes in all_classes for c in classes.hex(sep=' ').split(' ')]

data_to_df(byte)

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.332618,0.10757,0.077333,0.043861,0.033211,0.029377,0.024927,0.05403,0.040017


## Numbers in Strings

In [9]:
import re

strings = analysis.get_strings()
strings = [s.get_value() for s in strings]

nums = []

for string in strings:
    n = [int(match) for match in re.findall(r'\d+', string)]
    nums.extend(n)

df = data_to_df(nums)
df
# This doesn't seem to follow Benford's law

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,0.09375,0.171875,0.078125,0.09375,0.09375,0.0625,0.078125,0.15625,0.0625


## Timeouts/Durations

In [10]:
for d in dex:
    for method in d.get_methods():
        if "Thread.sleep" in method.get_source():
            print("Potential timeout: ", method.get_source())

strings = analysis.get_strings()

for string_id in strings:
    if "timeout" in string_id.get_value().lower() or "duration" in string_id.get_value().lower():
        print("Potential timeout string:", string_id.get_value())

attributes = apk.get_permissions()
declared = apk.get_declared_permissions_details()
print("Attributes:", attributes)
print("Declared:", declared)
# Unsure how this can be used with Benford's law

Potential timeout:  
    public void run()
    {
        while (this.a.a) {
            try {
                if (System.getProperty("z") != null) {
                    android.util.Log.i("WKAPP", "before calling wkApp.takeTaskFromQueue();");
                }
                String v0_4 = this.a.c.a();
                if (!this.a.c.isCoverMode()) {
                    if (System.getProperty("z") != null) {
                        android.util.Log.i("WKAPP", "got a task and start it.");
                    }
                    if (v0_4 != null) {
                        v0_4.start();
                    }
                    if (v0_4.getStatus() == 0) {
                        this.a.c.a(v0_4.getId());
                    }
                    if (System.getProperty("z") != null) {
                        android.util.Log.i("WKAPP", "task ended, loop for another task");
                    }
                }
            } catch (String v0_9) {
                if (System.getProperty("

The following procedure was taken from [this Github repo](https://github.com/asimswati553/RGB-based-Andorid-Malware-detection/blob/master/APK2File.ipynb) to find intents. 

## API Calls
Maps suspicious external API calls to the internal methods in which they are called

In [37]:
from collections import defaultdict
import json, csv
import os
import configparser

with open('./scores/api_scores.csv', 'r') as f:
    suspicious = csv.reader(f)
    suspicious = [line[0] for line in suspicious]
print(suspicious)

def evaluate_api(dex, analysis) -> dict[str, list[str]]:
    API_calls = defaultdict(list)
    for method in dex[0].get_methods():
        method_name = str(method).split('(')[0]
        if method_name.split('/')[0] == 'Landroid': #skip android libraries (not sure if these can be edited by developer)
            continue
        #print(method_name)
        for api in analysis.get_external_classes():
            for i in api.get_methods():
                api_name = str(i.get_method()).split('(')[0] # ignoring the parameters and return type.
                if api_name not in suspicious:
                    continue
                try:
                    if i.get_method().get_name() in method.get_source():
                        API_calls[api_name].append(method_name)
                    #print("\t", api_name)
                except Exception as e:
                    print(e)
    return API_calls

config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation(), allow_no_value=True)
config.read('config.ini')

BENIGN_DIR = config['PATHS']['benign_dir']

for file in os.listdir(BENIGN_DIR):
    file_path = os.path.join(BENIGN_DIR, file)
    temp_apk,temp_dex,temp_analysis = AnalyzeAPK(file_path)

    print(file)
    print(json.dumps(evaluate_api(temp_dex, temp_analysis), indent=4))
    print("\n\n\n")

['Ljavax/sql/ConnectionEvent;-><init>', 'Ljava/nio/channels/WritableByteChannel;->close', 'Landroid/service/carrier/CarrierService;->stopSelf', 'Landroid/opengl/Matrix;->getClass', 'Landroid/view/ViewStructure;->setCheckable', 'Landroid/text/method/BaseKeyListener;->getInputType', 'Landroid/provider/MediaStore$Images$Media;->wait', 'Ljava/lang/Runtime;->exec', 'Ljava/lang/System;->loadLibrary', 'Landroid/widget/AdapterView;->refreshDrawableState', 'Landroid/widget/MultiAutoCompleteTextView;->saveHierarchyState', 'Ljava/io/BufferedOutputStream;-><init>', 'Ljava/io/FileOutputStream;-><init>', 'Landroid/app/PendingIntent;->send', 'Landroid/app/AlarmManager;->Set', 'Landroid/app/NativeActivity;->getVolumeControlStream', 'Landroid/app/AcitivityManager;->killBackgroudProcess', 'Landroid/content/pm/PacakageManager;->removePackageFromPrefe', 'Landroid/content/pm/PacakageManager;->getInastallerPackageName', 'Landroid/content/pm/PacakageManager;->getInstalledPackages', 'Landroid/content/pm/Pacak

AttributeError: 'NoneType' object has no attribute 'get_end'

In [27]:
permissions = apk.get_permissions() + apk.get_declared_permissions()
collected_permissions = []
for permission in permissions:
    perm = permission.split('.')[-1]
    collected_permissions.append(perm)

#Activities
activities = apk.get_activities()
collected_activities = ['Activities:']
for activity in activities:
    act = activity.split('.')[-1]
    collected_activities.append(act)
        
#Services
services = apk.get_services()
collected_services = ['Services:']
for service in services:
    srvc = service.split('.')[-1]
    collected_services.append(srvc)
        
#Recivers
receivers = apk.get_receivers()
collected_receivers = ['Receivers:']
for receiver in receivers:
    recevr = receiver.split('.')[-1]
    collected_receivers.append(recevr)
        
#Providers
providers = apk.get_providers()
collected_providers = ['Providers:']
for provider in providers:
    collected_providers.append(provider)
    
#Intents
collected_intents = ['Intents:']
manifest_list = {'permissions':permissions,'activity' : activities, 'service': services, 'receiver':receivers, 'provider':providers}
intents_itemtype = {'activity' : activities, 'service': services, 'receiver':receivers, 'provider':providers}
for itemtype, listt in intents_itemtype.items():
    for item in listt:
        try:
            for intnts in apk.get_intent_filters(itemtype, item).values():
                for intnt in intnts:
                    collected_intents.append(intnt)
        except:
            pass
app_components = collected_activities + collected_services + collected_receivers + collected_providers + collected_intents
#print(collected_permissions)
#print(app_components)
#print(API_calls)

['Ljavax/sql/ConnectionEvent;-><init>', 'Ljava/nio/channels/WritableByteChannel;->close', 'Landroid/service/carrier/CarrierService;->stopSelf', 'Landroid/opengl/Matrix;->getClass', 'Landroid/view/ViewStructure;->setCheckable', 'Landroid/text/method/BaseKeyListener;->getInputType', 'Landroid/provider/MediaStore$Images$Media;->wait', 'Ljava/lang/Runtime;->exec', 'Ljava/lang/System;->loadLibrary', 'Landroid/widget/AdapterView;->refreshDrawableState', 'Landroid/widget/MultiAutoCompleteTextView;->saveHierarchyState', 'Ljava/io/BufferedOutputStream;-><init>', 'Ljava/io/FileOutputStream;-><init>', 'Landroid/app/PendingIntent;->send', 'Landroid/app/AlarmManager;->Set', 'Landroid/app/NativeActivity;->getVolumeControlStream', 'Landroid/app/AcitivityManager;->killBackgroudProcess', 'Landroid/content/pm/PacakageManager;->removePackageFromPrefe', 'Landroid/content/pm/PacakageManager;->getInastallerPackageName', 'Landroid/content/pm/PacakageManager;->getInstalledPackages', 'Landroid/content/pm/Pacak

KeyboardInterrupt: 

In [None]:
import csv

suspicious = {}
with open('./scores/api_scores.csv', 'r') as f:
    suspicious = csv.reader(f)
    suspicious = {line[0]:int(line[1]) for line in suspicious}

score = 0
for call in evaluate_api(dex, analysis):
    if call in suspicious:
        score += suspicious[call]


print(score)

1490
