In [None]:
%matplotlib inline

In [None]:
#import libraries
import sys
import os
import json
import pandas as pd
import numpy as np
import scipy
import string
from scrapbook.api import glue
working_dir = %pwd

from interpret_text.bow.BOWExplainer import BOWExplainer
#from interpret_text.common.utils_bow import BOWTokenizer, BOWEncoder

# sklearn
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

# spacy and nlp recipes
import spacy
from spacy.lang.en import English
from utils_nlp.dataset.multinli import load_pandas_df

In [None]:
#SPACY_LANG_MODEL = 'en_core_web_md'
DATA_FOLDER = './prod_training_data/'
TRAIN_SIZE = 0.7
TEST_SIZE = 0.3

In [None]:
suffix = "_labeled_feedback.txt"
explainer_dict = {filename[:-1*len(suffix)]:{"explainer":None, "filename":filename} for filename in os.listdir(DATA_FOLDER)}

In [None]:
for category in explainer_dict:
    filename = explainer_dict[category]["filename"]
    df_reach = pd.read_csv(DATA_FOLDER+filename, sep="\t")

    # fetch documents and labels from data frame
    X_str = df['verbatim'] # the document we want to analyze
    ylabels = df['cannot_reach_page'].apply(str) # the labels, or answers, we want to test against converted to strings
    # Create explainer object that contains defaault glassbox classfier and explanation methods
    bow_explainer = BOWExplainer()
    # encode document and labels into vector form using 1-gram Bag of Words.
    X_train, X_test, y_train, y_test = bow_explainer.encode(X_str,ylabels)
    classifier, best_params = bow_explainer.train(X_train,y_train)
    # obtain best classifier and hyper params
    print("best classifier for {}: ".format(category) + str(best_params))
    mean_accuracy = classifier.score(X_test, y_test, sample_weight=None)
    print("accuracy for {}= ".format(category) + str(mean_accuracy*100) + "%")
    y_pred = classifier.predict(X_test)
    [precision, recall, fscore, support] = precision_recall_fscore_support(y_test, y_pred,average='macro')
    explainer_dict[category]["explainer"] = bow_explainer
    

In [None]:
bow_explainer = explainer_dict["cannot_reach_page"]["explainer"]
# bow_explainer = explainer_dict["vpn_issue"]["explainer"]
print("The class names are as follows")
print(bow_explainer.preprocessor.labelEncoder.classes_)
label_name = "0.0"

#Obtain the top feature ids for the selected class label.           
#Map top features back to words.
top_words, top_importances = bow_explainer.explain_global(label_name)

In [None]:
#Enter any document & label pair that needs to be interpreted
document = "Can't reach this page https://webmail.optusnet.com.au/index.php/mail | Can not connect to Optus Web Mail."
#Obtain the top feature ids for the selected class label
word_importances, parsed_sentence = bow_explainer.explain_local(document, model=None)

In [None]:
bow_explainer.visualize(word_importances, parsed_sentence)