# imports 

In [44]:
%reset -f

# imports

In [61]:
from captum.attr import ShapleyValueSampling
from load_data import load_data
from models.model_wrappers import *
from models.train_models import *
from segmentation import *
from utils import *
import torch
import os
from tqdm import tqdm
import timeit
import sys
from torch.cuda import is_available as is_GPU_available
# device for torch
device = "cuda" if is_GPU_available() else "cpu"

# device for torch

# hyper-parameters

In [62]:

# settings
dataset_names = {'UWAVE'}    #{sys.argv[1]}
predictor_names = {'resNet'}    #{sys.argv[2]} {"randomForest", 'miniRocket', 'resNet'}
segmentation_names = [ "equal" ] #,"clasp","greedygaussian", "infogain","nnsegment"]  # {"clasp","greedygaussian", "equal", "infogain","nnsegment"} # {"clasp","greedygaussian", "equal", "infogain","nnsegment"} 
background_names =  [ "sampling", "average" , "zero"] # , "sampling",] #{"average", "zero", "sampling"}
normalization_names = {"default", "normalized"}

demo_mode = False
# demo
if demo_mode:
    dataset_names = {'gunpoint'}
    predictor_names = {"randomForest"}
    segmentation_names = { "equal"} #,'clasp'}
    background_names ={"average","sampling"} #,'sampling'}
    normalization_names = {"default", "normalized"}


# instantiate dictionaries that gonna be used in the pipeline

In [63]:
# dictionary mapping predictors to torch vs other, step necessary for Captum 
predictors = {
    'torch' : ['resNet'],
    'scikit' : ['miniRocket','randomForest','QUANT']
}
segmentation_dict = {"clasp":get_claSP_segmentation, "infogain": get_InformationGain_segmentation, "greedygaussian": get_GreedyGaussian_segmentation, "equal": get_equal_segmentation, "nnsegment": get_NNSegment_segmentation}

results = dict.fromkeys(('y_test_true', 'label_mapping', "segments", 'y_test_pred', "attributions"))
for key in results.keys():
    results[key] = dict.fromkeys(dataset_names)
    
normalization_names = normalization_names | {"default"}


# train model

In [64]:
from models.predictor_utils import load_predictor, predict_proba

for dataset_name in dataset_names:
    # init dataset
    # load data
    X_train, X_test, y_train, y_test, enc = load_data(subset='all', dataset_name=dataset_name)
    # for debugging only
    if demo_mode:
        X_test = X_test[:2]
        y_test = y_test[:2]

    n_samples, n_chs, ts_length = X_test.shape

    results['y_test_true'][dataset_name] = y_test
    results['label_mapping'][dataset_name] = enc
    results["attributions"][dataset_name] = dict.fromkeys(segmentation_names)
    results["segments"][dataset_name] = dict.fromkeys(segmentation_names)
    results["y_test_pred"][dataset_name] = dict.fromkeys(predictor_names)

    predictor_dict = dict()
    # TODO not to save if in demo mode!
    for predictor_name in predictor_names:
        if demo_mode:
            dataset_name=None

        if predictor_name=='resNet':
            # TODO rollback to normal!!!!!!!!!!!!!!
            clf = load_predictor(path="trained_models",predictor_name="resNet",dataset_name=dataset_name,device="cuda")
            preds = predict_proba(clf,samples=X_test,device="cuda")
            #clf,preds = train_ResNet(X_train, y_train, X_test, y_test, dataset_name,device=device)
        elif predictor_name=='miniRocket':
            clf,preds = train_miniRocket(X_train, y_train, X_test, y_test, dataset_name)
        elif predictor_name=="randomForest":
            clf, preds = train_randomForest(X_train, y_train, X_test, y_test, dataset_name)
        elif predictor_name=="QUANT":
            clf, preds = train_QUANT(X_train, y_train, X_test, y_test, dataset_name)
        else:
            raise ValueError("predictor not found")

        predictor_dict[predictor_name] = {"clf": clf, "preds": preds}


RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
def initialize_result_dict(X_test,predictor_names,dataset_name,segmentation_name,results):
	init_segments = np.empty((X_test.shape[0], X_test.shape[1]), dtype=object) if X_test.shape[1] > 1 else (
		np.empty(X_test.shape[0], dtype=object))
	results["segments"][dataset_name][segmentation_name] = init_segments.copy()
	results["attributions"][dataset_name][segmentation_name] = dict.fromkeys(predictor_names)
	for predictor_name in predictor_names:
		results["attributions"][dataset_name][segmentation_name][predictor_name] = dict.fromkeys(background_names)


def get_sample_info(segmentation_method, X_test,y_test,results, id, mask_list, ts_list, y_list):
	# get current sample and label
	ts, y = X_test[id], torch.tensor(y_test[id:id + 1])
	# get segment and its tensor representation
	current_segments = segmentation_method(ts)[:X_test.shape[1]]
	results['segments'][dataset_name][segmentation_name][i] = current_segments
	mask = get_feature_mask(current_segments, ts.shape[-1])
	mask_list.append(mask)
	ts = torch.tensor(ts).repeat(1, 1, 1)  # TODO use something similar to np.expand_dim?
	ts_list.append(ts)
	y_list.append(y)
	return ts,y,mask


from utils import sample_background
def get_background( background_name, results, normalization_names, X_train, n_background=50):

	results["attributions"][dataset_name][segmentation_name][predictor_name][background_name] = dict.fromkeys(
		normalization_names)
	# background data
	if background_name == "zero":
		background_dataset = torch.zeros((1,) + X_train.shape[1:])
	elif background_name == "sampling":
		background_dataset = sample_background(X_train, n_background)
	elif background_name == "average":
		background_dataset = sample_background(X_train, n_background).mean(axis=0, keepdim=True)

	return background_dataset


def get_attribution(ts, mask, background_dataset,y, results ): #    global ts, mask, background_dataset, tmp, y
    if len(background_dataset)==4:
        # in this case background is 'SAMPLING'
        # get rid of first dimension as it's always 1
        ts = ts[0] ;  mask= mask[0] ; background_dataset= background_dataset[0] ; y=y[0]
        
    if predictor_name in predictors['scikit']:
		# if using random forest flat everything
		#if predictor_name == "randomForest":
		#	ts = ts.reshape(-1, n_chs * ts_length)
		#	mask = mask.reshape(-1, n_chs * ts_length)
		#	background_dataset = background_dataset.reshape(-1, n_chs * ts_length)

        tmp = SHAP.attribute(ts, target=y, feature_mask=mask, baselines=background_dataset, additional_forward_args=clf)

    elif predictor_name in predictors['torch']:
		# if use torch make sure everything is on selected device
		ts = ts.to(device); y = y.to(device) ; mask = mask.to(device); background_dataset = background_dataset.to(device)
		tmp = SHAP.attribute(ts, target=y, feature_mask=mask, baselines=background_dataset)

	# in case of random forest 'un-flatten' result
    if predictor_name=="randomForest":
		tmp = tmp.reshape(-1,X_test.shape[1],X_test.shape[2])

	# lastly store current explanation in the data structure; if sampling store the mean
    results['attributions'][dataset_name][segmentation_name][predictor_name][background_name]["default"][i] = torch.mean(tmp, dim=0).cpu().numpy() if \
		background_name=="sampling" else tmp[0].cpu().numpy()


def get_normalized_results(normalization_names,results):
	if "normalized" in normalization_names:
		weights = np.array(list(map(
			lambda segmentation: list(map(
				lambda channel_segemnts: lengths_to_weights(change_points_to_lengths(channel_segemnts, X_train.shape[-1])),
				segmentation)),
			results["segments"][dataset_name][segmentation_name])))

		results['attributions'][dataset_name][segmentation_name][predictor_name][background_name]["normalized"] = \
			results['attributions'][dataset_name][segmentation_name][predictor_name][background_name]["default"] * weights
	if "default" not in normalization_names:
		del results['attributions'][dataset_name][segmentation_name][predictor_name][background_name]["default"]


In [51]:
def initialize_result_dict(X_test,predictor_names,dataset_name,segmentation_name,results):
    
    init_segments = np.empty((X_test.shape[0], X_test.shape[1]), dtype=object) if X_test.shape[1] > 1 else ( np.empty(X_test.shape[0], dtype=object))
    results["segments"][dataset_name][segmentation_name] = init_segments.copy()
    results["attributions"][dataset_name][segmentation_name] = dict.fromkeys(predictor_names)
    for predictor_name in predictor_names:
        results["attributions"][dataset_name][segmentation_name][predictor_name] = dict.fromkeys(background_names)

In [52]:
def get_sample_info(segmentation_method,segmentation_name, idx, X_test,y_test,results, id, mask_list, ts_list, y_list):
    
    # get current sample and label
    ts, y = X_test[id], torch.tensor(y_test[id:id + 1])
    # get segment and its tensor representation
    current_segments = segmentation_method(ts)[:X_test.shape[1]]
    results['segments'][dataset_name][segmentation_name][idx] = current_segments
    mask = get_feature_mask(current_segments, ts.shape[-1])
    mask_list.append(mask)
    ts = torch.tensor(ts).repeat(1, 1, 1)  # TODO use something similar to np.expand_dim?
    ts_list.append(ts)
    y_list.append(y)
    return ts,y,mask


In [53]:
from utils import sample_background

def get_background( background_name, segmentation_name, results, normalization_names, X_train, n_background=50):

    results["attributions"][dataset_name][segmentation_name][predictor_name][background_name] = dict.fromkeys(
        normalization_names)
    # background data
    if background_name == "zero":
        background_dataset = torch.zeros((1,) + X_train.shape[1:])
    elif background_name == "sampling":
        background_dataset = sample_background(X_train, n_background)
    elif background_name == "average":
        background_dataset = sample_background(X_train, n_background).mean(axis=0, keepdim=True)

    return background_dataset


In [54]:
def get_attribution(explainer, ts, mask, background_dataset,y, sampling ): #    global ts, mask, background_dataset, tmp, y
    
    if sampling:
        # get rid of first dimension as it's always 1
        # TODO try to flatten multiple singles "50 samples" into a 3D dataset and get the performances of that
        ts = ts[0] ;  mask= mask[0] ; y=y[0]

    if predictor_name in predictors['scikit']:
        # if using random forest flat everything
        #if predictor_name == "randomForest":
        #	ts = ts.reshape(-1, n_chs * ts_length)
        #	mask = mask.reshape(-1, n_chs * ts_length)
        #	background_dataset = background_dataset.reshape(-1, n_chs * ts_length)

        tmp = explainer.attribute(ts, target=y, feature_mask=mask, baselines=background_dataset, additional_forward_args=clf)

    elif predictor_name in predictors['torch']:
        # if use torch make sure everything is on selected device
        ts = ts.to(device); y = y.to(device) ; mask = mask.to(device); background_dataset = background_dataset.to(device)
        tmp = explainer.attribute(ts, target=y, feature_mask=mask, baselines=background_dataset)

    # in case of random forest 'un-flatten' result
    if predictor_name=="randomForest":
        tmp = tmp.reshape(-1,X_test.shape[1],X_test.shape[2])

    # lastly store current explanation in the data structure; if sampling store the mean
    saliency_map = torch.mean(tmp, dim=0).cpu().numpy() if sampling else tmp.cpu().numpy()
    return saliency_map


In [55]:
def store_results(table, segmentation_name, normalization_names, current_results, start):
    n_results = current_results.shape[0]
    if 'default' in normalization_names:
        table['default'][start: (start+n_results) ] = current_results

    
    # TODO to be improved!
    if "normalized" in normalization_names:
        weights = np.array(list(map(
            lambda segmentation: list(map(
                lambda channel_segemnts: lengths_to_weights(change_points_to_lengths(channel_segemnts, X_train.shape[-1])),
                segmentation)),
            results["segments"][dataset_name][segmentation_name][start: (start+n_results) ]  )))
        
        table['normalized'][start: (start+n_results) ]  = current_results * weights
    

# DO WE WANT A STATIC BACKGROUND????????????????????????????????????????????????

In [56]:

starttime = timeit.default_timer()

with torch.no_grad():
    for dataset_name in dataset_names:
        for predictor_name in predictor_names:
            results['y_test_pred'][dataset_name][predictor_name] = predictor_dict[predictor_name]["preds"]
        for segmentation_name in segmentation_names:
            initialize_result_dict(X_test,predictor_names,dataset_name,segmentation_name,results)
            segmentation_method = segmentation_dict[segmentation_name]
            
            ts_list = []
            mask_list = []
            y_list = []
            
            for i in range(n_samples) : 
                ts,y,mask = get_sample_info( segmentation_method,segmentation_name, i , X_test,y_test, results, i, mask_list, ts_list, y_list)
            
            for background_name in background_names:
                background_dataset = get_background( background_name, segmentation_name, results, normalization_names, X_train)

                for predictor_name in predictor_names:
                    # get clf and initialize attributions
                    clf = predictor_dict[predictor_name]["clf"]
                    init_attributions = np.zeros(X_test.shape, dtype=np.float32)
                    for normalization_name in normalization_names:
                        results['attributions'][dataset_name][segmentation_name][predictor_name][background_name][normalization_name] = init_attributions.copy()

                    
                    SHAP = ShapleyValueSampling(clf) if predictor_name in predictors['torch'] else ShapleyValueSampling(forward_classification)
                    
                    # prepare for batch computation
                    batch_size = 1 if background_name=='sampling' else 50 #HARD CODEEEEEEED!!!!!!!!!!!!!!!
                    from models.SHAP_dataloader import SHAP_dataloader
                    from torch.utils.data import DataLoader
                    data_loader = DataLoader( SHAP_dataloader(ts_list,y_list,mask_list, background_dim=background_dataset.shape[0] ) ,  batch_size=batch_size )
                    
                    # actually computing
                    current_idx = 0
                    with tqdm(total=len(ts_list)) as pbar:
                        for (ts,y,mask) in data_loader:
                            #print ( segmentation_method, ts.shape,y.shape,mask.shape,background.shape)
                            current_results = get_attribution( SHAP, ts,mask,background_dataset,y, 
                                    sampling= (background_name=='sampling') )
                            
                            #results['attributions'][dataset_name][segmentation_name][predictor_name][background_name]
                            store_results(table=results['attributions'][dataset_name][segmentation_name][predictor_name][background_name], segmentation_name =segmentation_name, normalization_names=normalization_names,
                                          current_results=current_results, start=current_idx)
                            
                            
                            #def store_results(table, segmentation_name, normalization_names, current_results, start):

                            pbar.update(batch_size) ; current_idx+=batch_size
                            
                            
                            

                    
                    #with tqdm(total=len(ts_list)) as pbar:
                    #    for i, (ts, mask, y) in enumerate(zip(ts_list, mask_list, y_list)):
                    #        # for sampling strategy repeat the ts many times as the background dataset size
                    #        ts = ts.repeat(background_dataset.shape[0],1,1) if background_name=="sampling" else ts
                    #        get_attribution (ts, mask, background_dataset,y, results )
                            # update tqdm
                    #        pbar.update(1)
                    #        if i==10:
                    #            break
                            
                    pbar.close()
                    # TODO understand how thi normalised results were computed 
                    #get_normalized_results(normalization_names,results)
                    
print("elapsed time", ( timeit.default_timer() -starttime ) )


100%|██████████| 428/428 [19:43<00:00,  2.77s/it]
450it [00:24, 18.32it/s]                         
450it [00:24, 18.73it/s]                         

elapsed time 1232.4164527829998





In [59]:
ttmp =  results['attributions'][dataset_name][segmentation_name][predictor_name]
for k in ttmp.keys():
    print( "\n\n", k)
    for norm in [ 'default']:
        ttttmp = np.sum(np.abs(ttmp[k][norm]) , axis= -1)
        print( ttttmp , ttttmp.shape )
    



 sampling
[[11.582408  24.254623  35.691555 ]
 [ 7.3367586  9.266972   6.6268415]
 [25.880638  27.253008  18.168533 ]
 ...
 [28.691757   9.965157  17.066982 ]
 [12.745036  30.071623  19.087353 ]
 [31.189804   9.798238  13.231202 ]] (428, 3)


 average
[[29.683067  21.247822  29.267376 ]
 [ 3.9951653  6.4947567  2.620192 ]
 [18.54258   32.54339   42.619137 ]
 ...
 [19.117561  35.358078  30.521587 ]
 [33.42269   21.398525  11.332826 ]
 [25.153896  28.398335  36.558273 ]] (428, 3)


 zero
[[43.885323  21.24476   28.068007 ]
 [ 0.8967617  2.3241808  2.368327 ]
 [23.453777   6.1439085 38.005104 ]
 ...
 [13.480526  16.435982  46.407093 ]
 [29.20761   19.735928  14.694554 ]
 [11.833488  11.819376  45.91996  ]] (428, 3)


In [39]:
# dump result to disk
if not demo_mode:
    file_name = "_".join( ("all_results",dataset_name,predictor_name) )
else:
	file_name = "_".join( ("all_results_DEMO_",dataset_name,predictor_name) )
file_path = os.path.join("attributions", file_name)
np.save( file_path, results )

In [26]:
results["y_test_pred"]

{'gunpoint': {'QUANT': array([[0.985, 0.015],
         [0.015, 0.985]])}}