In [3]:
%%writefile evaluate.py
import sys, os, time, random
from glob import glob
import utils
sys.path.insert(0, "webpage_fingerprinting_methods/")
import webpage_fingerprinting_methods
methods = webpage_fingerprinting_methods.methods

method_name_to_method = {method.get_name():method for method in methods}

def classify(method, output_dir, training_visit_files, test_visit_files, visit_file_label, n_cpu):
    assert not (set(training_visit_files) & set(test_visit_files))
    score = method().classify(training_visit_files, test_visit_files, visit_file_label, output_dir, n_cpu)
    return score

def classify_wrapper(scenario_file):
    try:
        scenario = utils.load_json(scenario_file)
        if "accuracy" in scenario:
            return True
        training_visit_files, test_visit_files, visit_file_label = utils.load_pickle(
            os.path.join(scenario['output_dir'], "split.pickle")
        )
        print(len(training_visit_files), len(test_visit_files), len(visit_file_label))
        assert not set(training_visit_files) & set(test_visit_files)

        
        start = time.time()
        accuracy = classify(method_name_to_method[scenario['method']],
                scenario['output_dir'],
                training_visit_files,
                test_visit_files,
                visit_file_label,
                6)
        end = time.time()
        scenario['accuracy'] = accuracy
        scenario['classification_time'] = end-start
        print(scenario)
        utils.dump_json(scenario, scenario_file)
        return True
    except Exception as e:
        print(e)
        pass
    
scenarios_dir = "../data/scenarios/"
scenario_files = glob(scenarios_dir+"/*/*/*.json")
random.shuffle(scenario_files)
for scenario_file in scenario_files:
    classify_wrapper(scenario_file)
# from concurrent.futures import ProcessPoolExecutor as Pool
# with Pool(20) as pool:
#     pool.map(classify_wrapper, scenario_files)

Overwriting evaluate.py


In [4]:
from glob import glob
import pandas as pd
import utils

In [5]:
result_files = glob("../data/scenarios/*/*/*.json")

In [6]:
df_results = pd.DataFrame(list(map(utils.load_json, result_files)))
df_results.loc[df_results.method != "BoG", 'accuracy'] *= 100
df_results['host_label'] = df_results['host'].apply(lambda x: utils.host_label[x])

In [7]:
len(df_results.dropna(subset=["accuracy"])), len(df_results)

(22848, 22848)

In [8]:
df_results.dropna(subset=['accuracy'],inplace=True)

In [9]:
df_results.head()

Unnamed: 0,accuracy,classification_time,host,method,name,output_dir,test_client_id,training_client_ids,host_label
0,97.714286,9.718208,www.mayoclinic.org,LL,train_one_test_one,../data/scenarios2/69/train_one_test_one_LL_ww...,6,[6],Mayo\nClinic
1,41.714286,99.014383,www.bankofamerica.com,Wfin,train_one_test_one,../data/scenarios2/69/train_one_test_one_Wfin_...,17,[9],Bank of\nAmerica
2,100.0,10.797168,www.bankofamerica.com,PS,train_one_test_one,../data/scenarios2/69/train_one_test_one_PS_ww...,4,[4],Bank of\nAmerica
3,50.285714,76.695425,www.wellsfargo.com,CUMUL,train_one_test_one,../data/scenarios2/69/train_one_test_one_CUMUL...,13,[13],Wells\nFargo
4,8.285714,15.58051,www.plannedparenthood.org,IPS,train_one_test_one,../data/scenarios2/69/train_one_test_one_IPS_w...,2,[6],Planned\nParenthood


In [10]:
df_results.groupby("method").size()

method
BoG      2856
CUMUL    2856
IPS      2856
KFP      2856
LL       2856
OPS      2856
PS       2856
Wfin     2856
dtype: int64

In [11]:
df_results.to_csv("../data/results.csv",index=False)

In [9]:
df_results.groupby("method").agg({"classification_time": ['mean','min','max']})

Unnamed: 0_level_0,classification_time,classification_time,classification_time
Unnamed: 0_level_1,mean,min,max
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
BoG,337.905256,29.160764,7998.894416
CUMUL,362.895109,47.25836,13316.693496
IPS,26.963404,5.726716,373.412456
KFP,67.952571,19.829375,895.727634
LL,9.489132,1.911719,87.891624
OPS,18.73561,4.770276,216.72451
PS,25.169958,4.616395,335.483111
Wfin,208.428835,35.29138,3935.069997
