In [1]:
import os
import sys
#sys.path.insert(0, os.path.dirname(os.getcwd()))
import pickle

import numpy as np
import torch
#import torch.cuda
import timeit
import pandas as pd
#import argparse
import itertools
from load_data import load_data # segment_SHAP
from evaluation.metrics.entropy import Entropy_metric
from models.predictor_utils import load_predictor
from pickle import dump



In [2]:
# PATHS
datasets_folder_path = None
attributions_folder_path = None
trained_models_folder_path = None
save_results_folder_path = None

In [3]:
# settings
dataset_names = ['UWAVE', "KeplerLightCurves", "MP8", "gunpoint", "EOG"]    #[sys.argv[1]] # ['UWAVE', "KeplerLightCurves", "MP8", "gunpoint", "EOG"] 
predictor_names = ['resNet']    #[sys.argv[2]] ["randomForest", 'miniRocket', 'resNet', "QUANT"]
segmentation_names = ["clasp","greedygaussian", "equal", "infogain","nnsegment"] # ["clasp","greedygaussian", "equal", "infogain","nnsegment"] 
background_names =  ["average"] #["average", "zero", "sampling"]
normalization_names = ["default"] #["default", "normalized"]

metric_names = ["entropy"]

results_prefix = "entropy" #"evaluation"

demo_mode = False
demo_mode_samples = 25
# demo
if demo_mode:
    dataset_names = ['UWAVE']
    predictor_names = ['resNet'] #['randomForest', 'resNet', 'miniRocket']
    segmentation_names = ['nnsegment']
    background_names = ["average"]#, 'zero','sampling']
    normalization_names = ["default", "normalized"]

# optional
# # get infos about which explanations are evaluated
# datasets = list( explanations['attributions'].keys() )
# segmentations = list( explanations['attributions'][datasets[0]].keys() )
# predictors = list( explanations['attributions'][datasets[0]][segmentations[0]].keys() )
# backgrounds = list( explanations['attributions'][datasets[0]][segmentations[0]][predictors[0]].keys() )
# result_types = ['default','normalized']

In [4]:
# column_names = ['Dataset', 'Segmentation', 'ML model', 'Background', 'Normalization', 'Metric', 'Perturb', "Result"]
# df = pd.DataFrame(data=data_list, columns = column_names)

In [5]:
# df

In [6]:
cwd = os.getcwd()
if datasets_folder_path is None:
    datasets_folder_path = "datasets" #os.path.join(cwd, "datasets")
if attributions_folder_path is None:
    attributions_folder_path = "attributions"
if trained_models_folder_path is None:
    trained_models_folder_path = "trained_models"
if save_results_folder_path is None:
    save_results_folder_path = os.path.join("evaluation", "entropy_results")

# device
device = "cuda" if torch.cuda.is_available() else "cpu"

starttime = timeit.default_timer()

eval_metrics = dict.fromkeys(metric_names)
for key in eval_metrics:
    if key=="entropy":
         eval_metrics[key] = Entropy_metric()
    else:
        raise KeyError(f"key {key} has no corresponding eval metric defined")

for dataset_name in dataset_names:
    print("Dataset: ", dataset_name)
    # loading dataset
    X_train, X_test, y_train, y_test, enc = load_data(subset='all', dataset_name=dataset_name, path=datasets_folder_path)
    if demo_mode:
        X_test, y_test = X_test[:demo_mode_samples], y_test[:demo_mode_samples]

    for eval_metric in eval_metrics.values():
        eval_metric.fit_data(X_train, X_test, y_train, y_test)

    for predictor_name in predictor_names:
        print("Predictor: ", predictor_name)

        # load classifier
        predictor = load_predictor(path=trained_models_folder_path, predictor_name=predictor_name, dataset_name=dataset_name, device=device) # torch.device(device)

        # load explanations
        attribution_filename = "_".join(("all_results", dataset_name, predictor_name)) + ".npy"
        explanations = np.load(os.path.join(attributions_folder_path, attribution_filename), allow_pickle=True).item() # FileNotFoundError
        label_mapping = explanations['label_mapping'][dataset_name]

        for eval_metric in eval_metrics.values():
            eval_metric.fit_ml_model(predictor)

        data_list = []

        for key in itertools.product(segmentation_names, background_names, normalization_names, metric_names):
            segmentation_name, background_name, normalization_name, metric_name = key
            print("assessing", key)

            # load model and explanations to access
            attributions = explanations['attributions'][dataset_name][segmentation_name][predictor_name][background_name][normalization_name]
            y_test_pred = explanations['y_test_pred'][dataset_name][predictor_name]
            segments = explanations['segments'][dataset_name][segmentation_name]

            eval_metric = eval_metrics[metric_name]
            results = eval_metric.evaluate(segments)

            for result_tuple in results:
                data_list.append((dataset_name, segmentation_name, predictor_name, background_name, normalization_name, metric_name) + result_tuple)

            print("elapsed time", (timeit.default_timer() - starttime))

        # save
        column_names = ['Dataset', 'Segmentation', 'ML model', 'Background', 'Normalization', 'Metric', 'Perturb', "Result"]
        df = pd.DataFrame(data=data_list, columns = column_names)
        file_name = "_".join((results_prefix, predictor_name, dataset_name))
        result_path = os.path.join(save_results_folder_path, file_name)
        if not demo_mode:
            df.to_csv(result_path)
            # with open( "_".join( (dataset_name,classifier_name)) ,"wb") as f:
            #     pickle.dump(results_dict, f)

Dataset:  UWAVE
Predictor:  resNet
assessing ('clasp', 'average', 'default', 'entropy')
elapsed time 0.7922153000254184
assessing ('greedygaussian', 'average', 'default', 'entropy')
elapsed time 1.4598321998491883
assessing ('equal', 'average', 'default', 'entropy')
elapsed time 2.023792899912223
assessing ('infogain', 'average', 'default', 'entropy')
elapsed time 2.5859144998248667
assessing ('nnsegment', 'average', 'default', 'entropy')
elapsed time 3.160228200023994
Dataset:  KeplerLightCurves
Predictor:  resNet
assessing ('clasp', 'average', 'default', 'entropy')
elapsed time 6.795402699848637
assessing ('greedygaussian', 'average', 'default', 'entropy')
elapsed time 6.993549600010738
assessing ('equal', 'average', 'default', 'entropy')
elapsed time 7.18594739981927
assessing ('infogain', 'average', 'default', 'entropy')
elapsed time 7.390084099955857
assessing ('nnsegment', 'average', 'default', 'entropy')
elapsed time 7.559507600031793
Dataset:  MP8
Predictor:  resNet
assessing (

In [7]:
df

Unnamed: 0,Dataset,Segmentation,ML model,Background,Normalization,Metric,Perturb,Result
0,EOG,clasp,resNet,average,default,entropy,mean,0.704625
1,EOG,clasp,resNet,average,default,entropy,std,0.23551
2,EOG,greedygaussian,resNet,average,default,entropy,mean,0.908797
3,EOG,greedygaussian,resNet,average,default,entropy,std,0.056299
4,EOG,equal,resNet,average,default,entropy,mean,1.0
5,EOG,equal,resNet,average,default,entropy,std,0.0
6,EOG,infogain,resNet,average,default,entropy,mean,0.801382
7,EOG,infogain,resNet,average,default,entropy,std,0.119031
8,EOG,nnsegment,resNet,average,default,entropy,mean,0.96365
9,EOG,nnsegment,resNet,average,default,entropy,std,0.016639
