In [None]:
import pandas as pd
import numpy as np
import os

from pathlib import Path

In [None]:
# Constants
PROJECT_ROOT = Path().cwd().parent.parent
DREBIN_BASE_PATH = PROJECT_ROOT / "dataset" / "drebin" / "metadata"
FEATURE_VECTOR_PATH = DREBIN_BASE_PATH / "feature_vectors"
POSITIVE_SAMPLES_LOOKUPTABLE_PATH = DREBIN_BASE_PATH / "sha256_family.csv"

FEATURE_NAMES = [
    "hw_features",
    "req_permissions",
    "app_components",
    "intents",
    "api_calls",
    "used_permissions",
    "sus_api_calls",
    "urls",
]

FEATURE_LOOKUP = {
    "feature": "hw_features",
    "permission": "req_permissions",
    "activity": "app_components",
    "service_receiver": "app_components",
    "provider": "app_components",
    "service": "app_components",
    "intent": "intents",
    "api_call": "api_calls",
    "real_permission": "used_permissions",
    "call": "sus_api_calls",
    "url": "urls",
}

In [2]:
def get_features(lines: list[str]) -> dict[str, str]:
    feature_dict = {k: "" for k in FEATURE_NAMES}

    for line in lines:
        if line != "\n":
            type = line.split("::")[0]
            content = line.split("::")[1].strip()
            feature_dict[FEATURE_LOOKUP[type]] += content + ","

    return feature_dict

In [3]:
def load_drebin(load_data: bool = False):
    if load_data:
        # Load the positive samples lookup table
        samples_lookup = pd.read_csv(POSITIVE_SAMPLES_LOOKUPTABLE_PATH, delimiter=",")
        samples_lookup = samples_lookup.set_index("sha256")

        # Load the feature vectors
        feature_files = os.listdir(FEATURE_VECTOR_PATH)

        # Create the feature vectors dataframe
        feature_vectors = pd.DataFrame(
            columns=["sha256"] + FEATURE_NAMES + ["malware", "family"]
        )
        feature_vectors = feature_vectors.set_index("sha256")

        # Populate the feature vectors dataframe (file name is the sha256 hash)
        for file in feature_files[:1000]:
            if file in samples_lookup.index:
                malware = 1
                family = samples_lookup.loc[file].values[0]
            else:
                malware = 0
                family = "Benign"

            with open(os.path.join(FEATURE_VECTOR_PATH, file), "r") as f:
                lines = f.readlines()
                sha256 = file.split(".")[0]
                features = get_features(lines)

            feature_vectors.loc[sha256] = list(features.values()) + [malware, family]

        return feature_vectors
    else:
        # Load the previously created dataframe
        return pd.read_csv("test.csv")

In [4]:
feature_vectors = load_drebin(load_data=True)
feature_vectors

Unnamed: 0_level_0,hw_features,req_permissions,app_components,intents,api_calls,used_permissions,sus_api_calls,urls,malware,family
sha256,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
00002d74a9faa53f5199c910b652ef09d3a7f6bd42b693755a233635c3ffb0f4,"android.hardware.touchscreen,android.hardware....","android.permission.READ_PHONE_STATE,android.pe...",".GameBootReceiver,.GameService,GameAlertDialog...","android.intent.action.BOOT_COMPLETED,android.i...","java/net/HttpURLConnection,android/content/Con...","android.permission.INTERNET,android.permission...","Read/Write External Storage,getPackageInfo,pri...",http://client.go360days.com/client.php?action%...,1,GinMaster
000068216bdb459df847bfdd67dd11069c3c50166db1ea8772cdc9250d948bcf,"android.hardware.touchscreen,","android.permission.WRITE_EXTERNAL_STORAGE,andr...","paklena.batterydiviner.Settings,paklena.batter...","android.intent.category.HOME,android.intent.ac...","android/content/Context;->startService,android...","android.permission.VIBRATE,","Read/Write External Storage,getSystemService,",,0,Benign
0000764713b286cfe7e8e76c7038c92312977712d9c5a86d504be54f3c1d025a,"android.hardware.touchscreen,android.hardware....","android.permission.ACCESS_FINE_LOCATION,androi...","BootReceiver,ru.alpha.AlphaReceiver,ru.alpha.A...","android.intent.category.HOME,android.intent.ac...",android/net/ConnectivityManager;->getActiveNet...,"android.permission.INTERNET,android.permission...","getDeviceId,Read/Write External Storage,Obfusc...","http://m-001.net/i/,m-001.net,http://m-001.net...",1,Opfake
0000962c2c34de1ca0c329b18be7847459da2d9d14b6b23a21cbc6427522403c,"android.hardware.touchscreen,android.hardware....","android.permission.INTERNET,",".index,index,","android.intent.action.MAIN,android.intent.cate...","android/webkit/WebView,","android.permission.INTERNET,","getSystemService,",,0,Benign
000167f1ff061ea91440c40659c11c2af160342fd2e493d609e4996b8820e78f,"android.hardware.touchscreen,android.hardware....","android.permission.INTERNET,android.permission...","NokyART,.NokyArtWidget$UpdateService,.NokyArt,...","android.intent.action.MAIN,android.intent.cate...",android/net/ConnectivityManager;->getNetworkIn...,"android.permission.INTERNET,android.permission...","getSystemService,",http://nokyart.n97.fr/Application/android/inde...,0,Benign
...,...,...,...,...,...,...,...,...,...,...
01e78d647cabf702bdbbb3af46ab70f2cbb407dfe6c6f86af1e94e75128cc213,"android.hardware.touchscreen,android.hardware....","android.permission.INTERNET,android.permission...",".EmotionSensor,","android.intent.action.MAIN,android.intent.cate...",android/net/ConnectivityManager;->getActiveNet...,"android.permission.ACCESS_NETWORK_STATE,androi...","getPackageInfo,getSystemService,Cipher(AES/CBC...",http://schemas.android.com/apk/lib/com.google....,0,Benign
01e7a30caff13ed959ec74a26ec27564dea94389200ed916de683e2ebdd25fb5,"android.hardware.touchscreen,","android.permission.WRITE_EXTERNAL_STORAGE,",".main,","android.intent.action.MAIN,android.intent.cate...",android/content/ContentResolver;->openInputStr...,"android.permission.INTERNET,android.permission...","Read/Write External Storage,getSystemService,",,0,Benign
01e7a6ea485da0acbe7ac6f7c1dbb20df27faa3a06130f3304265c3993ab787a,"android.hardware.touchscreen,android.hardware....","android.permission.INTERNET,android.permission...",".zaragoza_872,","android.intent.action.MAIN,android.intent.cate...",android/telephony/TelephonyManager;->getDevice...,"android.permission.ACCESS_FINE_LOCATION,androi...","getDeviceId,printStackTrace,Read/Write Externa...",,0,Benign
01e86ce55b5175a75ed513d452c5a7169a56d013d822f13a7b48a3395b1dda39,"android.hardware.touchscreen,android.hardware....","android.permission.INTERNET,com.android.vendin...",".MainActivity,","android.intent.action.MAIN,android.intent.cate...","android/app/Activity;->startActivity,android/m...","android.permission.READ_CONTACTS,android.permi...","printStackTrace,getPackageInfo,getSystemServic...","http://market.android.com/details?id%3D,http:/...",0,Benign


In [5]:
feature_vectors.to_csv("test.csv")