# January 21-25, 22

In [1]:
import os
import sys
from os.path import join as pjoin


import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import pickle 

import shap

# main dirs
proj_dir = pjoin(os.environ['HOME'], 'explainable-ai')

# folders
sys.path.insert(0, proj_dir)
from helpers.dataset_utils import *
from helpers.base_model import *
from helpers.model_definitions import *

TensorFlow Addons offers no support for the nightly versions of TensorFlow. Some things might work, some other might not. 
If you encounter a bug, do not file an issue on GitHub.


## data

In [2]:
# get data
with open(pjoin(proj_dir, 'data/emoprox2', 'train_test_arrays.pkl'), 'rb') as f:
    data_dict = pickle.load(f)

In [3]:
# converting to tf tensors
data_dict['train'] = to_tensor(data_dict['train'])
data_dict['test'] = to_tensor(data_dict['test'])

2022-01-26 20:49:05.405064: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-26 20:49:06.876323: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14796 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:17:00.0, compute capability: 7.5
2022-01-26 20:49:06.878511: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14796 MB memory:  -> device: 1, name: Quadro RTX 5000, pci bus id: 0000:65:00.0, compute capability: 7.5


In [4]:
train_X = data_dict['train'][0]
train_y = data_dict['train'][1]
train_mask = data_dict['train'][2]

test_X = data_dict['test'][0]
test_y = data_dict['test'][1]
test_mask = data_dict['test'][2]

In [5]:
train_X.shape

TensorShape([647, 360, 85])

## model

In [6]:
model = Linear_Model()

default_slice = lambda x, start, end : x[start : end, ...]

linear_regression = base_model(task_type="regression", 
                               model=model, 
                               loss_object=tf.keras.losses.MeanSquaredError(), 
                               L1_scale=0.0, 
                               L2_scale=0.0,
                               optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 
                               eval_metric=tfa.metrics.RSquare(),
                               eval_metric_name="% var explained",
                               batch_size=32, 
                               slice_input=default_slice)

## train the model

In [7]:
results = linear_regression.fit(train_X=train_X, 
                                train_Y=train_y, 
                                val_X=train_X, 
                                val_Y=train_y, 
                                num_epochs=10)

Epoch 000: Train Loss: 0.155, Train % var explained: -106.072%  Val Loss: 0.096, Val % var explained: -35.013%  
Epoch 001: Train Loss: 0.079, Train % var explained: -9.911%  Val Loss: 0.069, Val % var explained: 2.910%  
Epoch 002: Train Loss: 0.067, Train % var explained: 5.588%  Val Loss: 0.066, Val % var explained: 7.320%  
Epoch 003: Train Loss: 0.066, Train % var explained: 7.418%  Val Loss: 0.066, Val % var explained: 7.759%  
Epoch 004: Train Loss: 0.066, Train % var explained: 7.492%  Val Loss: 0.066, Val % var explained: 7.755%  
Epoch 005: Train Loss: 0.066, Train % var explained: 7.178%  Val Loss: 0.066, Val % var explained: 7.647%  
Epoch 006: Train Loss: 0.067, Train % var explained: 6.475%  Val Loss: 0.066, Val % var explained: 7.497%  
Epoch 007: Train Loss: 0.068, Train % var explained: 5.540%  Val Loss: 0.066, Val % var explained: 7.436%  
Epoch 008: Train Loss: 0.068, Train % var explained: 4.887%  Val Loss: 0.066, Val % var explained: 7.221%  
Epoch 009: Train Loss:

## Shapley values

In [8]:
# select a set of background examples to take an expectation over
s = train_X.shape
X = tf.reshape(train_X, shape=(s[0]*s[1], s[2])).numpy()
X_background = shap.utils.sample(X, 100)

In [9]:
explainer = shap.KernelExplainer(model=linear_regression.model, 
                                 data=X_background)
shap_values = explainer.shap_values(test_X[0, 0:1, :].numpy())

  0%|          | 0/1 [00:00<?, ?it/s]

The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 


In [10]:
shap_values.shape

(1, 85)