In [2]:
import numpy as np
from captum.attr import ShapleyValueSampling
from tqdm import trange

from load_data import load_data
from train_models import train_randomForest
from segmentation import *
from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [96]:
# to utils.py

def change_points_to_lengths(change_points, max_length):
    # change points is 1D iterable of idxs
    change_points = list(change_points)
    start_points = [0] + change_points
    end_points = change_points + [max_length]
    lengths = np.array(end_points) - np.array(start_points)
    return lengths

def lengths_to_weights(lengths):
    # lengths is 1D iterable of positive ints
    start_idx = 0
    end_idx = 0
    segment_weights = 1 / lengths
    weights = np.ones(lengths.sum())
    for segment_weight, length in zip(segment_weights, lengths):
        end_idx += length
        weights[start_idx: end_idx] = segment_weight
        start_idx = end_idx
    return weights


In [116]:

# load data
dataset_name = 'UWAVE' #"gunpoint" #
X_train, X_test, y_train, y_test = load_data(subset='all', dataset_name=dataset_name)

# X_test = X_test[:2]
# y_test = y_test[:2]

# train model
clf, preds = train_randomForest(X_train,y_train,X_test,y_test, dataset_name)

# create a dictionary to be dumped containing attribution and metadata
# initialize data structure meant to contain the segments
segments =  np.empty( (X_test.shape[0] , X_test.shape[1]), dtype=object) if X_test.shape[1] > 1  else (
    np.empty( X_test.shape[0] , dtype=object))

all_attributions = {
    'attributions' : np.empty( X_test.shape ,dtype=np.float32 ),
    'segments' : segments,
    'y_test_true' : y_test,
    'y_test_pred' : preds
}

# explain
n_background = 50
background_type = "average" # zero, constant, average, multisample
batch_size = 32
with torch.no_grad():
    SHAP = ShapleyValueSampling(forward_classification)
    for i in range ( X_test.shape[0] ) : # 
        # get current sample and label
        ts, y = X_test[i] , torch.tensor( y_test[i:i+1] )

        # get segment and its tensor representation
        current_segments = get_claSP_segmentation(ts)[:X_test.shape[1]]
        all_attributions['segments'][i] = current_segments
        mask = get_feature_mask(current_segments,ts.shape[-1])

        # background data
        
        if background_type=="zero":
            background_dataset = torch.zeros((1,) + X_train.shape[1:])
        elif background_type=="sampling":
            background_dataset = sample_background(X_train, n_background)
        elif background_type=="average":
            background_dataset = sample_background(X_train, n_background).mean(axis=0, keepdim=True)

        print("\n explaining sample n.",i,"\n")
        # data structure with room for each sample in the background dataset
        current_attr = torch.zeros(background_dataset.shape[0], ts.shape[0], ts.shape[1])
        for j in trange(0,background_dataset.shape[0] ,batch_size):

            sample = background_dataset[j:j+batch_size]
            actual_size = sample.shape[0]
            batched_ts = torch.tensor( np.array([ts]*actual_size) )

            ##### only for random forest as every instance should be a 1D tensor #######
            batched_ts , sample = batched_ts.reshape(actual_size,-1), sample.reshape(actual_size,-1)
            mask = mask.reshape(1,-1)
            ###############################################################################

            tmp = SHAP.attribute( batched_ts, target=y , feature_mask=mask, baselines=sample, additional_forward_args=clf)

            ########  only for random forest as every instance should be a 1D tensor    ########
            current_attr[j:j+actual_size] = tmp.reshape(actual_size,X_test.shape[1],X_test.shape[2])
            ###############################################################################

        # compute as final explanation mean of each explanation using a different baseline
        all_attributions['attributions'][i] =torch.mean(current_attr,dim=0)

weights = np.array(list(map(lambda x: list(map(lambda y: lengths_to_weights(change_points_to_lengths(y, X_train.shape[-1])), x)), all_attributions["segments"])))
all_attributions["attributions"] *= weights

random forest accuracy is 0.5

 explaining sample n. 0 



100%|██████████| 1/1 [00:04<00:00,  4.15s/it]



 explaining sample n. 1 



100%|██████████| 1/1 [00:03<00:00,  3.57s/it]


In [119]:
all_attributions['attributions'].sum(axis=(1,2))

array([0.7299999 , 0.16000003], dtype=float32)