In [106]:
import csv
import sys
import os

PWD = os.path.abspath(".")

In [107]:
import locale

def convert_to_int(num_str):
    try:
        locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' ) 
        num = locale.atoi(num_str)
        return num
    
    except ValueError:
        pass
    

In [108]:
def create_dict_event_count(number, event_name, event_count):
    try:
        event_count[event_name].append(number)
    except KeyError:
        # event name that is not registered in dictionary falls into this exception
        event_count.setdefault(event_name, [])
        event_count[event_name].append(number)
    

In [109]:
from itertools import cycle

def extract_event_and_count_from_list(csv_list):
    event_count = dict()

    running = True
    total_cycle = cycle(csv_list)
    next_elem = next(total_cycle)
    sequence_start = 0
    sequence_end = len(csv_list)

    while running:
        current_elem, next_elem = next_elem, next(total_cycle)

        #check if element is number
        if ord(current_elem[0]) >= 48 and ord(current_elem[0]) <= 57:
            data = convert_to_int(current_elem)
            if data != None:
                create_dict_event_count(data, next_elem, event_count)

                # skip one iteration
                next_elem = next(total_cycle)
                sequence_start += 1

        if(sequence_start == sequence_end+1):
            running = False
        else:
            sequence_start += 1

    print('# of events:', len(event_count))
    
    return event_count

In [110]:
def list_csv_file(file_path):
    item_list = list()

    #number of output files from perf
    for i in range (1, 81):
        filename = '/output' + str(i) +'.csv'
        with open(file_path+filename, 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=' ')
            for row in reader:
                if any(row):
                    for item in row:
                        if any(item):
                            #up to this point, remove all the spaces, and empty cells
                            if(item != "#"): #filter out one more apprently unnecessary delimiter
                                item_list.append(item)

    return item_list

In [111]:
def add_type_and_apk_name_in_dictionary(m_target_dict, b_target_dict, times, app_num):
    event_name = "type"
    m_target_dict.setdefault(event_name, [])
    b_target_dict.setdefault(event_name, [])
    for i in range (0, times):
        m_target_dict[event_name].append('malicious')
        b_target_dict[event_name].append('benign')
    
    #get the first apk file name from the directory below 
    ### need to change based on user's directory setting (where the benign and malicious apk files are) ###
    path = os.path.join(PWD, 'droidkungfu_w_benign/'+app_num+'/benign')
    for file in os.listdir(path):
        benign_apk_name = file
        break
        
    path = os.path.join(PWD, 'droidkungfu_w_benign/'+app_num+'/malicious')
    for file in os.listdir(path):
        malicious_apk_name = file
        break
        
    event_name = "apk"
    m_target_dict.setdefault(event_name, [])
    for i in range (0, times):
        m_target_dict[event_name].append(malicious_apk_name)
        
    event_name = "apk-match"
    m_target_dict.setdefault(event_name, [])
    for i in range (0, times):
        m_target_dict[event_name].append(benign_apk_name)
        
    event_name = "apk"
    b_target_dict.setdefault(event_name, [])
    for i in range (0, times):
        b_target_dict[event_name].append(benign_apk_name)
        
    event_name = "apk-match"
    b_target_dict.setdefault(event_name, [])
    for i in range (0, times):
        b_target_dict[event_name].append(malicious_apk_name)
    

In [112]:
def get_malicious_benign_dict(app_name, app_num):
    #malicious
    ### need to change based on user's directory setting (where the perf output files are) ###
    file_name = os.path.join(PWD, 'malicious_output/' + app_name)
    csv_list = list_csv_file(file_name)
    print(file_name)
    malicious_perf_dict = extract_event_and_count_from_list(csv_list)

    #benign
    ### need to change based on user's directory setting (where the perf output files are) ###
    file_name = os.path.join(PWD, 'benign_output/' + app_name)
    csv_list = list_csv_file(file_name)
    print(file_name)
    benign_perf_dict = extract_event_and_count_from_list(csv_list)
    
    #add columns (type, apk, apk-match)
    for key, values in malicious_perf_dict.items():
        len_column = len(malicious_perf_dict[key])
        break
    
    add_type_and_apk_name_in_dictionary(malicious_perf_dict, benign_perf_dict, len_column, app_num)
    
    return malicious_perf_dict, benign_perf_dict

In [113]:
def make_one_csvfile(app_name, benign_dict, malicious_dict):
    
    ### need to change based on user's directory setting (location for saving combined output) ###
    output_dir = os.path.join(PWD, 'combined_csv_output/' + app_name)
    print(output_dir)
    if not os.path.exists(output_dir):
        print(app_name, "created")
        os.mkdir(output_dir)
    else:
        print(app_name, "already exists")

    csv_file = "result.csv"
    out_csv_file_dir = os.path.join(output_dir, csv_file)
    print(out_csv_file_dir)

    with open(out_csv_file_dir, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(benign_dict.keys())
        writer.writerows(zip(*benign_dict.values()))
        writer.writerows(zip(*malicious_dict.values()))
                

In [1]:
'''
    NOTE:
        app_name can be removed. (Then, replace app_name to app_num all the above)
        app_name is composed of app_num plus package name, which can be obtained from Androguard get_package() API.
        app_name can be the type of malware, such as DroidKungFu,
        since we use the same numbers on different types of malware apps when downloading.
        This might need modification later.
'''
app_num = '0'
app_name = app_num + 'com.happymaau.MathRef'

malicious_dict, benign_dict = get_malicious_benign_dict(app_name, app_num)
make_one_csvfile(app_name, benign_dict, malicious_dict)

NameError: name 'get_malicious_benign_dict' is not defined