# January 21-25, 2022: basic pipeline

In [1]:
import os
import sys
from os.path import join as pjoin


import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import pickle 

import shap

# main dirs
proj_dir = pjoin(os.environ['HOME'], 'explainable-ai')

# folders
sys.path.insert(0, proj_dir)
from helpers.dataset_utils import *
from helpers.base_model import *
from helpers.model_definitions import *

# select the GPU to be used
gpus = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(gpus[0], True)
    tf.config.experimental.set_memory_growth(gpus[1], True)
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass

TensorFlow Addons offers no support for the nightly versions of TensorFlow. Some things might work, some other might not. 
If you encounter a bug, do not file an issue on GitHub.


## data

In [2]:
# get data
with open(pjoin(proj_dir, 'data/emoprox2', 'train_test_arrays.pkl'), 'rb') as f:
    data_dict = pickle.load(f)

# converting to tf tensors
data_dict['train'] = to_tensor(data_dict['train'])
data_dict['test'] = to_tensor(data_dict['test'])

# get inputs, targets and masks
train_X = data_dict['train'][0]
train_y = data_dict['train'][1]
train_mask = data_dict['train'][2]

test_X = data_dict['test'][0]
test_y = data_dict['test'][1]
test_mask = data_dict['test'][2]

# mask the tensors
train_X = train_X * tf.expand_dims(tf.cast(train_mask, 'float32'), -1)
train_y = train_y * tf.cast(train_mask, 'float32')

test_X = test_X * tf.expand_dims(tf.cast(test_mask, 'float32'), -1)
test_y = test_y * tf.cast(test_mask, 'float32')

2022-02-01 14:24:32.001067: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-01 14:24:32.398153: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14796 MB memory:  -> device: 1, name: Quadro RTX 5000, pci bus id: 0000:65:00.0, compute capability: 7.5


In [3]:
train_mask.shape

TensorShape([645, 360])

## model

In [4]:
model = Linear_Model()

default_slice = lambda x, start, end : x[start : end, ...]

linear_regression = base_model(task_type="regression", 
                               model=model, 
                               loss_object=tf.keras.losses.MeanSquaredError(), 
                               L1_scale=0.0, 
                               L2_scale=0.0,
                               optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 
                               eval_metric=tfa.metrics.RSquare(),
                               eval_metric_name="% var explained",
                               batch_size=32, 
                               slice_input=default_slice)

## train the model

In [5]:
results = linear_regression.fit(train_X=train_X, 
                                train_Y=train_y, 
                                val_X=train_X, 
                                val_Y=train_y, 
                                num_epochs=10)

Epoch 000: Train Loss: 0.449, Train % var explained: -505.795%  Val Loss: 0.260, Val % var explained: -277.047%  
Epoch 001: Train Loss: 0.170, Train % var explained: -138.390%  Val Loss: 0.118, Val % var explained: -71.300%  
Epoch 002: Train Loss: 0.099, Train % var explained: -42.024%  Val Loss: 0.083, Val % var explained: -20.447%  
Epoch 003: Train Loss: 0.076, Train % var explained: -10.699%  Val Loss: 0.071, Val % var explained: -4.157%  
Epoch 004: Train Loss: 0.069, Train % var explained: -0.440%  Val Loss: 0.067, Val % var explained: 2.382%  
Epoch 005: Train Loss: 0.066, Train % var explained: 3.664%  Val Loss: 0.065, Val % var explained: 4.894%  
Epoch 006: Train Loss: 0.065, Train % var explained: 5.445%  Val Loss: 0.064, Val % var explained: 6.087%  
Epoch 007: Train Loss: 0.064, Train % var explained: 6.311%  Val Loss: 0.064, Val % var explained: 6.708%  
Epoch 008: Train Loss: 0.064, Train % var explained: 6.753%  Val Loss: 0.064, Val % var explained: 7.034%  
Epoch 009

In [12]:
y_pred = linear_regression.model(train_X)
y_pred.shape

TensorShape([645, 360, 1])

## Shapley values

In [14]:
# select a set of background examples to take an expectation over
s = train_X.shape
X = tf.reshape(train_X, shape=(s[0]*s[1], s[2])).numpy()

# background = train_X[np.random.choice(train_X.shape[0], 100, replace=False), :]
X_background = shap.utils.sample(X, 100)
print(X_background.shape)

(100, 85)


In [20]:
# SHAP explainer
explainer = shap.KernelExplainer(linear_regression.model, X_background)