In [1]:
import os
import sys
import pyts
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Patch

from tsx.perturbation import TimeSeriesPerturbation
from tsx.xai.lime import LIMETimeSeries, XAIModels
from tsx.xai import evaluation as eva

from data_util import *
from viz import *

import itertools
import pandas as pd
from biokit.viz import corrplot

### Objectives
1. Evaluate between different XAI models but same family (Ridge, for example)
1. Evaluate between different DL model, but same XAI model
1. Evaluate the same on Multivariate Time Series
1. (Todo) -> Evaluate the same on Univariate Time Series

### Metrics to Evaluation
1. coef correlations
1. r2-scores
    the scores when building XAI model in approximation process $f(z) ~ g(z') = w * z'$


## Prepare Data Set

In [3]:
independents = ["dew", "temp", "press", "wind_direction", "wind_speed", "snow", "rain"]
dependent = "pollution"

# Load data
df = load_data_set_bejin()
x_scaler, y_scaler = get_xy_scalers(df, independents, dependent)

# Normalize data 
df_norm = df.copy()
df_norm[independents] = x_scaler.transform(df[independents].values)
df_norm[dependent] = y_scaler.transform(df[dependent].values.reshape(-1, 1))

# Global param
n_steps = 128
window_size = 8
n_variables = len(independents)
samples_size = 100

In [4]:
# Prepare predict function
wavenet = tf.keras.models.load_model(f"{DATA_DIR}/wavenet_mts_128_1.h5")
lstm = tf.keras.models.load_model(f"{DATA_DIR}/lstm_mts_128_1.h5")

def predict_fn(z, model=lstm):
    z_reshaped = z.T.reshape(1, 128, 7)
    z_hat = model.predict(z_reshaped)
    # to avoid zero coef_ for z_hat in[0, 1]
    z_hat = y_scaler.inverse_transform(z_hat.reshape(-1, 1))  
    z_hat = z_hat.ravel()   # z_hat will arround 50 - 150
    return z_hat[0]

def lstm_fn(z):
    return predict_fn(z, model=lstm)

def wavenet_fn(z):
    return predict_fn(z, model=wavenet)

In [5]:
# Test set (random)
test_set = []
n_instances = 1000
for i in range(n_instances):
    i_df = get_instance_x(df_norm, n_steps + 1, independents + 
    [dependent])
    _x = i_df.loc[:n_steps-1, independents]
    _y = i_df[dependent]   
    test_set.append((_x.values.T, _y.to_numpy()[-1]))

In [6]:
# Prepare Params for different models
scales = ["async", "sync"]
repl_fn = ["zeros", "local_mean", "global_mean"]
model_fn = ["lstm_fn", "wavenet_fn"]

params = list(itertools.product(scales, repl_fn, model_fn))
params_df = pd.DataFrame([{"scale": s, "method": m, "model":model} for s, m, model in params])
# print(params_df)
params_df.style\
    .apply(lambda s: ['background-color: %s' % ('grey' if v else '') for v in s == "async"]) \
    .apply(lambda s: ['background-color: %s' % ('green' if v else '') for v in s == "sync"]) \
    .applymap(lambda s: 'color: %s' % ('cyan' if s == "lstm_fn" else '' )) \
    .applymap(lambda s: 'color: %s' % ('orange' if s == "wavenet_fn" else '' ))

Unnamed: 0,scale,method,model
0,async,zeros,lstm_fn
1,async,zeros,wavenet_fn
2,async,local_mean,lstm_fn
3,async,local_mean,wavenet_fn
4,async,global_mean,lstm_fn
5,async,global_mean,wavenet_fn
6,sync,zeros,lstm_fn
7,sync,zeros,wavenet_fn
8,sync,local_mean,lstm_fn
9,sync,local_mean,wavenet_fn


In [7]:
# Multinrun average explains for different xai models (# dl model)

# Todo: add this function to lime.py
#   coef_to_original = m.perturb_obj._x_masked
# x_df = get_instance_x(df_norm, n_steps, independents)
# x_arr = X[0]

# Generate Explanations over 10 instances
def get_xcoef(model, 
              instances,
              scale="sync", 
              r_fn='zeros',
              window_size=window_size, 
              sample_size=samples_size):
   
    lime_ts = LIMETimeSeries(
            scale=scale, 
            window_size=window_size,                                
            sample_size=sample_size, 
            perturb_method=r_fn
        )
    lime_ts.xai_estimator = XAIModels.Ridge
    lime_ts.explain_instances(instances, predict_fn=eval(model))

    coef = lime_ts.coef
    x_arr = instances[0]    # this to get x-coef in original format
    x_coef = lime_ts.perturb_obj._x_masked(x_arr, coef)

    return x_coef

In [8]:
# Generate explanations for each option
def generate_explanations():
    X, y_true = zip(*test_set)
    explanations = []
    for scale, method, model in params:
        sample_size = 200 if scale == 'sync' else 500
        x_coef = get_xcoef(model, X[:10], scale, method, 
                        window_size=window_size, 
                        sample_size=sample_size)
        d = {"model": model, "scale": scale, "method": method, "xcoef":x_coef}
        explanations.append(d)
    return explanations
# Skip if explanations already generated:
# explanations = generate_explanations()
# np.save('data/explanations.npy',explanations)