# Initialize Fiddler Client

In [1]:
import fiddler as fdl
client = fdl.FiddlerApi()

# Load dataset

In [2]:
import pandas as pd
df = pd.read_csv('/app/fiddler_samples/samples/datasets/imdb_rnn/imdb_rnn.csv')
df_schema = fdl.DatasetInfo.from_dataframe(df, max_inferred_cardinality=1000)

In [3]:
df.head()

Unnamed: 0,sentence,polarity
0,A real blow-up of the film literally. This Bri...,False
1,"I only wish that Return of the Jedi, have been...",True
2,"""I like cheap perfume better; it doesn't last ...",True
3,On the eighth day God created Georges. But the...,True
4,"No, this is not no Alice fairy tale my friends...",True


# Upload dataset

In [4]:
if 'imdb_rnn' not in client.list_datasets():
    upload_result = client.upload_dataset(
        dataset={'train': df}, 
        dataset_id='imdb_rnn')

# Create model schema

In [5]:
target = 'polarity'
feature_columns = ['sentence']
train_input = df[feature_columns]
train_target = df[target]

model_info = fdl.ModelInfo.from_dataset_info(
    dataset_info=client.get_dataset_info('imdb_rnn'),
    target=target, 
    features=feature_columns,
    display_name='Text IG',
    description='this is a tensorflow model using text data and IG enabled from tutorial',
    input_type=fdl.ModelInputType.TEXT
)

# Install Tensorflow if necessary

Uncomment the following line if you need to install Tensorflow.

In [6]:
#!pip install tensorflow==1.14

# Train model

In [7]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_target = le.fit_transform(train_target)
train_target = train_target.reshape(-1,1)

In [187]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence

vocab_size = 1000
max_seq_length = 150
tok = Tokenizer(num_words=vocab_size)
tok.fit_on_texts(train_input['sentence'])
sequences = tok.texts_to_sequences(train_input['sentence'])
sequences_matrix = sequence.pad_sequences(sequences, maxlen=max_seq_length, padding='post')

In [11]:
from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding
from tensorflow.keras.models import Model

def RNN():
    inputs = Input(name='inputs', shape=[max_seq_length])
    layer = Embedding(vocab_size, 64, input_length=max_seq_length)(inputs)
    layer = LSTM(64)(layer)
    layer = Dense(256, name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(1, name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs, outputs=layer)
    return model

In [12]:
from tensorflow.keras.optimizers import RMSprop

model = RNN()
model.summary()
model.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 150)]             0         
_________________________________________________________________
embedding (Embedding)        (None, 150, 64)           64000     
_________________________________________________________________
lstm (LSTM)                  (None, 64)                33024     
_________________________________________________________________
FC1 (Dense)                  (None, 256)               16640     
_________________________________________________________________
activation (Activation)      (None, 256)               0         
_______________________

In [13]:
from tensorflow.keras.callbacks import EarlyStopping
model.fit(sequences_matrix, train_target, batch_size=128, epochs=5,
          validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.001)])

Train on 22500 samples, validate on 2500 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5


<tensorflow.python.keras.callbacks.History at 0x7f9c3ffc9810>

# Save model and schema

In [188]:
import pathlib
import shutil
import pickle
import yaml
import tensorflow as tf

project_id = 'tutorial'
model_id = 'tf_ig_imdb'

# create temp dir
model_dir = pathlib.Path(model_id)
shutil.rmtree(model_dir, ignore_errors=True)
model_dir.mkdir()

# save model
tf.keras.experimental.export_saved_model(model, str(model_dir / 'saved_model'))

# save model schema
with open(model_dir / 'model.yaml', 'w') as yaml_file:
    yaml.dump({'model': model_info.to_dict()}, yaml_file)

# save tokenizer
with open(model_dir / 'tokenizer.pkl', 'wb') as tok_file:
    tok_file.write(pickle.dumps(tok))

INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: None
INFO:tensorflow:Signatures INCLUDED in export for Train: ['train']
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: None
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: ['eval']
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']
INFO:tens

# Write package.py and related wrappers

### Import related wrappers

We need to import 2 wrappers for tensorflow. Those files are stored in the utils directory.
- The tf_saved_model_wrapper.py file contains a wrapper to load and run a TF model from a saved_model path.
- The tf_saved_model_wrapper_ig.py file contains a wrapper to support Integrated Gradients (IG) computation for a TF model loaded from a saved_model path.

In [15]:
files = ['utils/tf_saved_model_wrapper.py', 'utils/tf_saved_model_wrapper_ig.py']
for f in files:
    shutil.copy(f, model_dir)

### Write package.py file

Next, we need to write the package.py file. This file contains functions to transform the input, generate the baseline and get the attributions.

The project_attributions() function uses functionalities from the cover_tokens.py file that we need to write as well.

In [189]:
%%writefile tf_ig_imdb/package.py

import numpy as np
import re
import pathlib
import pickle
import logging
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from .tf_saved_model_wrapper_ig import TFSavedModelWrapperIg


PACKAGE_PATH = pathlib.Path(__file__).parent
SAVED_MODEL_PATH = PACKAGE_PATH / 'saved_model'
TOKENIZER_PATH = PACKAGE_PATH / 'tokenizer.pkl'

LOG = logging.getLogger(__name__)


class MyModel(TFSavedModelWrapperIg):
    def __init__(self, saved_model_path, sig_def_key, tokenizer_path,
                 target,
                 is_binary_classification=False,
                 output_key=None,
                 batch_size=8,
                 output_columns=[],
                 input_tensor_to_differentiable_layer_mapping={},
                 max_allowed_error=None):
        """
        Class to load and run the IMDB RNN model.
        See: TFSavedModelWrapper

        """
        super().__init__(saved_model_path, sig_def_key,
                         is_binary_classification=is_binary_classification,
                         output_key=output_key,
                         batch_size=batch_size,
                         output_columns=output_columns,
                         input_tensor_to_differentiable_layer_mapping=
                         input_tensor_to_differentiable_layer_mapping,
                         max_allowed_error=max_allowed_error)
        with open(tokenizer_path, 'rb') as handle:
            self.tokenizer = pickle.load(handle)
        self.max_seq_length = 150
        self.target = target

    def transform_input(self, input_df):
        """
        Transform the provided dataframe into one that complies with the input
        interface of the model.

        Overrides the transform_input method of TFSavedModelWrapper.
        """
        
        sequences = self.tokenizer.texts_to_sequences(input_df[self.target])
        sequences_matrix = sequence.pad_sequences(sequences,
                                                  maxlen=self.max_seq_length,
                                                  padding='post')

        return pd.DataFrame({'inputs': sequences_matrix.tolist()})

    def generate_baseline(self, input_df):
        
        input_tokens = input_df[self.target].apply(lambda x: '')
        sequences = self.tokenizer.texts_to_sequences(input_tokens)
        sequences_matrix = sequence.pad_sequences(sequences,
                                                  maxlen=self.max_seq_length,
                                                  padding='post')

        return pd.DataFrame({'inputs': sequences_matrix.tolist()})

    def project_attributions(self, input_df, transformed_input_df,
                             attributions):
        """
        Maps the transformed input to original input space so that the
        attributions correspond to the features of the original input.
        Overrides the project_attributions method of TFSavedModelWrapper.
        """
        segments = re.split(r'([ '+self.tokenizer.filters+'])', input_df[self.target].iloc[0])
        unpadded_input=[self.tokenizer.texts_to_sequences([x])[0] for x in input_df[self.target].values]
        word_tokens = self.tokenizer.sequences_to_texts([[x] for x in unpadded_input[0]])
        word_attributions = attributions['inputs'][0].astype('float').tolist()[:len(word_tokens)] 
        
        # Let's walk segments and assign attributions to the components where
        # they match word_tokens, the token sequence consumed by the model; otherwise assign 0.
        i = 0
        final_attributions = []
        final_segments = []
        for segment in segments:
            if segment is not '':
                final_segments.append(segment)
                seg_low = segment.lower()
                if len(word_tokens)>i and seg_low == word_tokens[i]:
                    final_attributions.append(word_attributions[i])
                    i+=1
                else:
                    final_attributions.append(0)       
        return {"embedding_input":[final_segments, final_attributions]}


def get_model():
    model = MyModel(
        SAVED_MODEL_PATH,
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY,
        TOKENIZER_PATH,
        target='sentence',
        is_binary_classification=True,
        batch_size=128,
        output_columns=['inputs'],
        input_tensor_to_differentiable_layer_mapping=
        {'inputs': 'embedding/embedding_lookup:0'},
        max_allowed_error=5)
    model.load_model()
    return model


Writing tf_ig_imdb/package.py


# Upload model

In [179]:
client.delete_model(project_id, model_id)
client.upload_model_package(model_dir, project_id, model_id)

# Run model

In [181]:
prediction_input = train_input[:10]
result = client.run_model(project_id, model_id, prediction_input)
result

Unnamed: 0,inputs
0,0.213811
1,0.643106
2,0.219616
3,0.38828
4,0.302248
5,0.382934
6,0.144117
7,0.093723
8,0.0776
9,0.111019


# Get explanation

In [182]:
selected_point = df.head(1)

In [184]:
project_id = 'tutorial'
model_id = 'tf_ig_imdb'

ex_ig = client.run_explanation(
    project_id=project_id,
    model_id=model_id, 
    df=selected_point, 
    dataset_id='imdb_rnn',
    explanations='ig')

In [185]:
ex_ig

AttributionExplanation(algorithm='ig', inputs=['A', ' ', 'real', ' ', 'blow', '-', 'up', ' ', 'of', ' ', 'the', ' ', 'film', ' ', 'literally', '.', ' ', 'This', ' ', 'British', ' ', 'film', ' ', 'is', ' ', 'boringly', ' ', 'made', '.', '<', 'br', ' ', '/', '>', '<', 'br', ' ', '/', '>', 'What', ' ', 'an', ' ', 'exciting', ' ', 'plot', '!', ' ', 'A', ' ', 'terrorist', ' ', 'places', ' ', 'bombs', ' ', 'on', ' ', 'a', ' ', 'train', '.', ' ', 'How', ' ', 'could', ' ', 'the', ' ', 'writers', ' ', 'and', ' ', 'producers', ' ', 'of', ' ', 'this', ' ', 'stinker', ' ', 'turn', ' ', 'this', ' ', 'into', ' ', 'such', ' ', 'a', ' ', 'dull', ' ', 'story', '?', '<', 'br', ' ', '/', '>', '<', 'br', ' ', '/', '>', 'Glenn', ' ', 'Ford', ',', ' ', 'as', ' ', 'the', ' ', 'expert', ' ', 'called', ' ', 'upon', ' ', 'to', ' ', 'defuse', ' ', 'the', ' ', 'bomb', ',', ' ', 'is', ' ', 'given', ' ', 'awful', ' ', 'writing', ' ', 'material', ' ', 'to', ' ', 'work', ' ', 'with', '.', ' ', 'Naturally', ',', ' ', 