In [1]:
import pandas as pd
import numpy as np
import glob, os
import json

from pandas.io.parsers import read_csv

def evaluate_accuracy(test):
    ground_truth_source = read_csv("input/ground_truth_mapping/shopping/GT_" + test['source'] + ".csv")
    ground_truth_target = read_csv("input/ground_truth_mapping/shopping/GT_" + test['target'] + ".csv")
    
    source_test = ground_truth_tests.loc[ground_truth_tests['method'] == test['method']]
    if source_test.shape[0] != 1:
        print(source_test, 'is not 1')
    
    for gui_event, source_gui_event in zip(test['event_array'], source_test.iloc[0]['event_array']):
        if source_gui_event['id_or_xpath'][:3] == "id@":
            source_event = ground_truth_source.loc[ground_truth_source['id'] == source_gui_event['id_or_xpath'][3:]]
        else:
            source_event = ground_truth_source.loc[ground_truth_source['xpath'] == source_gui_event['id_or_xpath'][6:]]
        if source_event.shape[0] == 0:
            print('L84 gt missing for', source_gui_event, 'check if it should be added')
        if pd.isnull(gui_event['id_or_xpath']) or gui_event['id_or_xpath'] == '': 
            # this is a sanity check
            # if not transferred to any event, it should be marked as NONE
            print('missed is not marked as NONE', gui_event)
        if gui_event['id_or_xpath'] != "NONE": # check correct or incorrect
            if gui_event['id_or_xpath'][:3] == "id@":
                transfer_event = ground_truth_target.loc[ground_truth_target['id'] == gui_event['id_or_xpath'][3:]]
            else:
                transfer_event = ground_truth_target.loc[ground_truth_target['xpath'] == gui_event['id_or_xpath'][6:]]
            if transfer_event.shape[0] == 0:
                # print('L95 gt missing for', gui_event, 'check if it should be added')
                gui_event['case'] = "incorrect"
                # print("transfer_event", test['source'], test['target'], test['gui_mapper'])
                # print(test['method'])
                # print(gui_event['id_or_xpath'])
            elif transfer_event.iloc[0]['canonical'] == source_event.iloc[0]['canonical']:
                gui_event['case'] = "correct"
            else:
                gui_event['case'] = "incorrect"

        else: # check miss or nonExist
            target_event = ground_truth_target.loc[ground_truth_target['canonical'] == source_event.iloc[0]['canonical']]
            if target_event.shape[0] != 0:
                gui_event['case'] = "missed"
            else:
                gui_event['case'] = "nonExist"
                
    return test

def list_cases(test):
    cases = {'correct' : [], 'incorrect' : [], 'missed' : [], 'nonExist' : []}
    for gui_event in test:
        cases[gui_event['case']].append(gui_event['id_or_xpath'])
    return cases

def count_cases(test):
    cases = {}
    cases['num_correct'] = len(test['correct'])
    cases['num_incorrect'] = len(test['incorrect'])
    cases['num_missed'] = len(test['missed'])
    cases['num_nonExist'] = len(test['nonExist'])
    return cases

def calc_precision_recall_accuracy(test):
    fractions = {}
    try:
        fractions['accuracy_precision'] = test['num_correct'] / (test['num_correct'] + test['num_incorrect'])
    except ZeroDivisionError:
        fractions['accuracy_precision'] = np.NaN
    try:
        fractions['accuracy_recall'] = test['num_correct'] / (test['num_correct'] + test['num_missed'])
    except ZeroDivisionError:
        fractions['accuracy_recall'] = np.NaN
    try:
        fractions['accuracy'] = (test['num_correct'] + test['num_nonExist']) / \
                                (test['num_correct'] + test['num_incorrect'] + test['num_missed'] + test['num_nonExist'])
    except ZeroDivisionError:
        fractions['accuracy'] = np.NaN
    return fractions


# using CraftDroid as an example
craftdroid_csv = []
for path in glob.glob("input/craftdroid/mapping_results/*.csv"):
    csv = read_csv(path)
    apps = os.path.splitext(os.path.basename(path))[0].split("_")
    csv['source'] = csv.apply(lambda x: apps[0], axis=1)
    csv['target'] = csv.apply(lambda x: apps[1], axis=1)
    csv['gui_mapper'] = csv.apply(lambda x: "craftdroid", axis=1)
    craftdroid_csv.append(csv)
combined_csv = pd.concat(craftdroid_csv)
combined_csv['event_array'] = combined_csv['event_array'].apply(json.loads)

ground_truth_tests = [read_csv(path, header=0) for path in glob.glob("input/extracted_tests/craftdroid_tests/*.csv")]
ground_truth_tests = pd.concat(ground_truth_tests)
ground_truth_tests['event_array'] = ground_truth_tests['event_array'].apply(json.loads)


combined_csv = combined_csv.apply(evaluate_accuracy, axis=1)
combined_csv = pd.concat([combined_csv, combined_csv['event_array'].apply(list_cases).apply(pd.Series)], axis=1)
# combined_csv = pd.concat([combined_csv, combined_csv.apply(evaluate_effectiveness, axis=1).apply(pd.Series)], axis=1)
combined_csv = pd.concat([combined_csv, combined_csv.apply(count_cases, axis=1).apply(pd.Series)], axis=1)
combined_csv = pd.concat([combined_csv, combined_csv.apply(calc_precision_recall_accuracy, axis=1).apply(pd.Series)], axis=1)
# 
# combined_csv = pd.concat([combined_csv, combined_csv.apply(append_src_gt_events, axis=1).apply(pd.Series)], axis=1)
combined_csv['event_array'] = combined_csv['event_array'].apply(json.dumps)
# combined_csv = pd.concat([combined_csv, combined_csv.apply(levenshtein, axis=1).apply(pd.Series)], axis=1)
combined_csv.to_csv("output/craftdroid_fidelity.csv", index=False)
print('Done! Check the output file in /output/craftdroid_fidelity.csv')

Done! Check the output file in /output/craftdroid_fidelity.csv
