In [1]:
import os
import pandas as pd
from androguard.misc import AnalyzeAPK, AnalyzeDex
from androguard.core.bytecodes.axml import ARSCParser
from androguard.core.bytecodes.axml import AXMLPrinter, ARSCResTableEntry
from androguard.core.bytecodes.dvm import DalvikVMFormat
import json
import hashlib

In [2]:
az = pd.read_csv('az18.csv')
androDi = pd.read_csv('azBalanceado.csv')

In [3]:
def getResources(a):
    rids = []
    resEntry = []
    resTable = str(a.get_android_resources().get_public_resources(a.get_package()))
    for res in resTable.split(' '):
        if 'id=' in res:
            if res.split('"')[1] not in rids:
                rids.append(res.split('"')[1])
    for rid in rids:
        resEntry.append(a.get_android_resources().get_resource_xml_name(int(rid,0)).split('/')[1])
    return resEntry

In [4]:
def getPackages(d):
    pkgs = []
    for c in d.get_classes():
        s = str(c.get_name()[1:].split('$')[0]).split('/')
        x = '/'.join(s[:-1])
        if x not in pkgs:
            pkgs.append(x)
    return pkgs

In [5]:
def getActionCategory(a):
    manifest = str(a.get_android_manifest_axml().get_xml(pretty=True)).split('\\n')
    action = []
    category = []
    for line in manifest:
        if 'action' in line:
            if line.split('"')[1] not in action:
                action.append(line.split('"')[1])
        if 'category' in line:
            if line.split('"')[1] not in category:
                category.append(line.split('"')[1])
    return action, category

In [6]:
def getCerts(a):
    cert = []
    for x in a.get_certificates():
        cert.append(x.subject.human_friendly)
    return cert

In [7]:
def getMethods(d):
    methods = []
    for m in d.get_methods():
        if m.name not in methods:
            methods.append(m.name)
    return methods

In [8]:
def sha256(apk, label):
    if label:
        apk='apksAz18/malware/'+apk+'.apk'
    else:
        apk='apksAz18/normal/'+apk+'.apk'
    sha256 = hashlib.sha256()
    with open(apk, 'rb') as f:
        while True:
            data = f.read(65536)
            if not data:
                break
            sha256.update(data)
    return sha256.hexdigest()

In [9]:
def classifyApk(apk):
    data = []
    print(apk)
    malware = os.listdir('apksAz18/malware')
    normal = os.listdir('apksAz18/normal')
    label = False
    azData = pd.DataFrame()
    if apk+'.apk' in normal:
        azData = az[(az['pkg_name']==apk) & (az['vt_detection']==0)]
    else:
        azData = az[(az['pkg_name']==apk) & (az['vt_detection']>5)]
        label = True
    if azData.shape[0]>1:
        azData = az[az['sha256']==sha256(apk, label).upper()]
    if len(azData) == 1:
        data.append(azData.sha256.values[0].lower())
        data.append(azData.sha1.values[0].lower())
        data.append(azData.md5.values[0].lower())
        data.append(apk)
        data.append(azData.vt_detection.values[0])
        data.append(azData.dex_size.values[0])
        data.append(azData.apk_size.values[0])
        apkPath = ''
        if int(azData.vt_detection.values[0]) > 5:
            apkPath = 'apksAz18/malware/'+apk+'.apk'
        else:
            apkPath = 'apksAz18/normal/'+apk+'.apk'
        a, d, dx = AnalyzeAPK(apkPath)
        d = DalvikVMFormat(a.get_dex())
        data.append(a.get_target_sdk_version())
        data.append(a.get_min_sdk_version())
        data.append(a.get_max_sdk_version())
        data.append(azData.dex_date.values[0])
        data.append(azData.vt_scan_date.values[0])
        data.append(getResources(a))
        data.append(getPackages(d))
        data.append(a.get_permissions())
        data.append(a.get_activities())
        action, category = getActionCategory(a)
        data.append(action)
        data.append(category)
        data.append(a.get_features())
        data.append(getMethods(d))
        data.append(getCerts(a))
        data.append(azData.markets.values[0])
        with open('type.json', 'r') as f:
            try:
                data.append(json.load(f)[azData.sha256.values[0].lower()])
            except:
                print('sem tipo')
                data.append('')
        with open('family.json', 'r') as f:
            try:
                data.append(json.load(f)[azData.sha256.values[0].lower()])
            except:
                print('sem familia')
                data.append('')
        if int(azData.vt_detection.values[0]) == 0:
            data.append(0)
        else:
            data.append(1)
        return data
    else:
        print('apk não encontrado')

In [10]:
lst=[]
data = pd.DataFrame(columns = ['meta.sha256','meta.sha1','meta.md5','meta.pkg.name','meta.vt.score','meta.dex.size','meta.apk.size','manifest.tarsdk','manifest.minsdk','manifest.maxsdk','meta.dex.date','meta.vt.date','resource.entry','source.class.package','manifest.permission','manifest.activity','manifest.action','manifest.category','manifest.feature','source.method.name','certificate.owner','meta.market','label.type','label.family','label'])
for apk in androDi['Apk']:
    lst = classifyApk(apk)
    data = data.append(pd.DataFrame([lst], columns=data.columns),ignore_index=True)

com.ForntYardIdeas.eshall
sem tipo
sem familia
com.narasimata.najwashihab
sem tipo
sem familia
com.Ideasforaquascape.uwiapps
sem tipo
sem familia
com.greatesthitsallalbum.maroon5
sem tipo
sem familia
com.innockstudios.balloonarcher
sem tipo
sem familia
com.Sportstudio.PROSOCCERSTARLEAGUE
sem tipo
sem familia
free.stock.guru
sem tipo
sem familia
com.pastarecipes.sangdroib
sem tipo
sem familia
com.OfficeBuildingDesign.lukoni
sem tipo
sem familia
sdteamnew.fastphofc.com.ph
sem tipo
sem familia
com.resepayamsausmentegalezat.resepmasakandanminuman.toptrandresepmasakanapps.mesotheliomalawfirm
sem tipo
sem familia
com.lemi.xuanhuan
sem tipo
sem familia
com.WomenBra.trascodroid
sem tipo
sem familia
com.programsdesigningfor.podksvad
sem tipo
sem familia
com.mt354.adzanpakmayone
sem tipo
sem familia
com.saintlucia.fmradioonline.radiostationtunner
sem tipo
sem familia
com.dc.asg
sem tipo
sem familia
com.nlabs.Milestone
sem tipo
sem familia
com.megaaccount.romenticsongs
sem tipo
sem familia
jacob.

Requested API level 29 is larger than maximum we have, returning API level 28 instead.


sem tipo
sem familia
com.ffour.android.telugulovesong
sem tipo
sem familia
soft98.madahi
sem tipo
sem familia
com.bonsaidesignforbeginners.farah
sem tipo
sem familia
com.virtlab.rotational_motion_with_uniform_acceleration_demo
sem tipo
sem familia
fr.motelservices.tva
sem tipo
sem familia
com.fyxtech.yoyolive
sem tipo
sem familia
com.dishesdesigns.cidut
sem tipo
sem familia
com.omeletteapp.humananatomy
sem tipo
sem familia
bible.frenchbible.romanianbible
sem tipo
sem familia
com.gb.police.bike.shooter.gangster.chase
sem tipo
sem familia
com.stonepoleapps.comfortableheels
sem tipo
sem familia
com.FunnyAppXL.TranslatorForCatsLanguage
sem tipo
sem familia
com.marathi.lernhyds
sem tipo
sem familia
com.enmedio.app
sem tipo
sem familia
tornados.tormenta.rayos
sem tipo
sem familia
dmcy.co.dmcy
sem tipo
sem familia
com.AnkaraFashionStyles.sitd212
sem tipo
sem familia
com.brudawg.diydawg
sem tipo
sem familia
www.oehsindia.com.oehsindia
sem tipo
sem familia
com.appnyoudev14.aghani_wasouf
sem tip

XML Seems to be packed, operations on the AndroidManifest.xml might fail.


sem tipo
sem familia
chinese.ulearning.com.vn
sem tipo
sem familia


In [11]:
data.to_csv('datasetFabricio.csv')

In [None]:
data