*Copyright (c) Microsoft Corporation. All rights reserved.*

*Licensed under the MIT License.*

# Text Classification of SST-2 Sentences using a 3-Player Introspective Model

In [1]:
import sys
import os
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from interpret_text.common.dataset.utils_sst2 import load_sst2_pandas_df
from interpret_text.three_player_introspective.three_player_introspective_explainer import ThreePlayerIntrospectiveExplainer
from interpret_text.common.utils_three_player import GlovePreprocessor, ModelArguments, load_glove_embeddings
from interpret_text.widget import ExplanationDashboard

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Introduction
In this notebook, we train and evaluate a  [three-player explainer](http://people.csail.mit.edu/tommi/papers/YCZJ_EMNLP2019.pdf) model on a subset of the [SST-2](https://nlp.stanford.edu/sentiment/index.html/) dataset. To run this notebook, we used the SST-2 data files provided [here](https://github.com/AcademiaSinicaNLPLab/sentiment_dataset).

### Set parameters
Here we set some parameters that we use for our modeling task.

In [23]:
# if true, skips over embedding, most of model training, and model evaulation; used to test notebook flow
QUICK_RUN = True

# data processing parameters
DATA_FOLDER = "../../../data/sst2"
LABEL_COL = "labels" 
TEXT_COL = "sentences"
token_count_thresh = 1
max_sentence_token_count = 70

# training procedure parameters
model_save_dir = os.path.join("..", "models")
model_prefix = "sst2rnpmodel"
cuda = True
batch_size = 200
if not QUICK_RUN:
    save_best_model = True
    pre_train_cls = True
    num_epochs = 200
else:
    save_best_model = True
    pre_train_cls = False
    num_epochs = 1

# ModelArguments contains default parameters used internally in the model that can changed
args = ModelArguments(cuda, pre_train_cls, batch_size, num_epochs, save_best_model, model_save_dir=model_save_dir, model_prefix=model_prefix)
# example of changing an argument
args.cuda = False

# If using glove embeddings (i.e. not using BERT), load pretrained embeddings
# TODO: load glove embedding file in load_glove_embeddings to blob storage
if not QUICK_RUN:
    args.embedding_path = load_glove_embeddings(DATA_FOLDER)
else:
    args.embedding_path = os.path.join(DATA_FOLDER, "noEmbeddingFile.txt")

## Read Dataset
We start by loading a subset of the data for training and testing.

In [24]:
# TODO: load dataset to blob storage
train_data = load_sst2_pandas_df('train')
test_data = load_sst2_pandas_df('test')
if QUICK_RUN:
    train_data = train_data.head(batch_size)
    test_data = test_data.head(batch_size)
all_data = pd.concat([train_data, test_data])
X_train = train_data[TEXT_COL]
X_test = test_data[TEXT_COL]

In [25]:
# get all unique labels
labels = all_data[LABEL_COL].unique()
args.labels = np.array(sorted(labels))
args.num_labels = len(labels)

## Tokenization and embedding
The data is then tokenized and embedded using glove embeddings.

In [26]:
preprocessor = GlovePreprocessor(all_data[TEXT_COL], token_count_thresh, max_sentence_token_count)

# append labels to tokenizer output
df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1)
df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1)

## Explainer
Then, we create and train the explainer.

In [27]:
explainer = ThreePlayerIntrospectiveExplainer(args, preprocessor, classifier_type="RNN")
if not QUICK_RUN:
    classifier = explainer.fit(df_train, df_test) # TODO: add back pretraining classifier
else:
    classifier = explainer.fit(df_train, df_test)

embedding is initialized fully randomly.
embedding is initialized fully randomly.
embedding is initialized fully randomly.


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:10<00:00, 10.58s/it]


We can test the explainer and measure its performance:

In [28]:
if not QUICK_RUN:
    explainer.score(df_test, n_examples_displayed=0)
    print("Test sparsity: ", explainer.model.avg_sparsity)
    print("Test accuracy: ", explainer.model.avg_accuracy, "% Anti-accuracy: ", explainer.model.avg_anti_accuracy)

## Local importances
We can display the found local importances (the most and least important words for a given sentence):

In [29]:
# Enter a sentence that needs to be interpreted
sentence = "This great movie was really good"
label = 1

local_explanation = explainer.explain_local(sentence, label, preprocessor, hard_importances=False)

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return bound(*args, **kwds)


## Visualize explanations
We can visualize local feature importances as a heatmap over words in the document and view importance values of individual words.

In [30]:
explainer.visualize(local_explanation._local_importance_values, local_explanation._features)

In [31]:
ExplanationDashboard(local_explanation)

ExplanationWidget(value={'text': ['this', 'great', 'movie', 'was', 'really', 'good'], 'prediction': [0, 1], 'c…

<interpret_text.widget.ExplanationDashboard.ExplanationDashboard at 0x275b0010e80>