# Testing Feminist Chatbot: NLU pipelines (with Jupyter)
###    Model created using Rasa Open Source v. 2.7.0 and Python v. 3.7.6.

In [1]:
# Checking python version
import sys
print(sys.version)


3.7.6 (default, Jan  8 2020, 20:23:39) [MSC v.1916 64 bit (AMD64)]


In [2]:
# Checking installed Rasa version
import rasa
rasa.__version__

'2.7.1'

In [3]:
import pathlib
from rasa.cli.utils import get_validated_path
from rasa.model import get_model, get_model_subdirectories
from rasa.nlu.model import Interpreter
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import TEXT

"""
This function loads the Rasa NLU interpreter. It is able to apply all NLU
pipeline steps to an arbitrary text provided as input argument. 
"""
def load_interpreter(model_path):
    model = get_validated_path(model_path, "model")
    model_path = get_model(model)
    _, nlu_model = get_model_subdirectories(model_path)
    return Interpreter.load(nlu_model)

In [4]:
# Here we specify which trained model (chatbot) we want to load to the interpreter.

nlu_interpreter = load_interpreter("models/20210711-150656.tar.gz")
#nlu_interpreter = load_interpreter("models/20210408-163011.tar.gz")

In [5]:
# This command displays all the components of the NLU pipeline inside of our trained model:
nlu_interpreter.pipeline

[<rasa.nlu.tokenizers.whitespace_tokenizer.WhitespaceTokenizer at 0x1f6f19e8d08>,
 <rasa.nlu.featurizers.sparse_featurizer.regex_featurizer.RegexFeaturizer at 0x1f6f25c1a48>,
 <rasa.nlu.featurizers.sparse_featurizer.lexical_syntactic_featurizer.LexicalSyntacticFeaturizer at 0x1f6f25c1bc8>,
 <rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer at 0x1f6f25c1b08>,
 <rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer at 0x1f6f25e7dc8>,
 <rasa.nlu.classifiers.diet_classifier.DIETClassifier at 0x1f6f264dc48>,
 <rasa.nlu.extractors.entity_synonyms.EntitySynonymMapper at 0x1f6f3cf0908>,
 <rasa.nlu.selectors.response_selector.ResponseSelector at 0x1f6f265f548>,
 <rasa.nlu.classifiers.fallback_classifier.FallbackClassifier at 0x1f68000a408>]

In [6]:
# The nlu_interpreter object has the ability to parse new texts for intent classification.
# The output is a dictionary with detected intents and entities as well as confidence scores for each.

from pprint import pprint 
pprint(nlu_interpreter.parse("hello there"))

{'entities': [],
 'intent': {'confidence': 0.5174404978752136, 'name': 'nlu_fallback'},
 'intent_ranking': [{'confidence': 0.5174404978752136, 'name': 'nlu_fallback'},
                    {'confidence': 0.4825595021247864,
                     'id': -5197062675303167482,
                     'name': 'say_hello'},
                    {'confidence': 0.10962603986263275,
                     'id': 5888282264120118059,
                     'name': 'swear'},
                    {'confidence': 0.090266153216362,
                     'id': 5739350265572138166,
                     'name': 'affirm'},
                    {'confidence': 0.08264221251010895,
                     'id': -8745570098290754332,
                     'name': 'request_names'},
                    {'confidence': 0.07857833802700043,
                     'id': 8966450632895495381,
                     'name': 'repeat_name'},
                    {'confidence': 0.046907633543014526,
                     'id': -10144817238199

In [7]:
# This section loads the training examples for the model
# saved in the nlu.yml file to do further testing and validation.

import rasa.shared.nlu.training_data.loading
train_data = rasa.shared.nlu.training_data.loading.load_data(
    "data/nlu.yml", nlu_interpreter.model_metadata.language
)

# This `train_data` object contains intent_examples. This is a 
# list of `Message` objects. These are containers that can 
# contain intents, entities but also other information that 
# is relevant to a NLU pipeline. 

[m.as_dict() for m in train_data.intent_examples][:5]

[{'text': 'Bom dia', 'intent': 'say_hello'},
 {'text': 'Bonjour', 'intent': 'say_hello'},
 {'text': 'buenos dias', 'intent': 'say_hello'},
 {'text': 'Good Morning', 'intent': 'say_hello'},
 {'text': 'Good morning', 'intent': 'say_hello'}]

In [8]:
# In this section we use a pandas and sklearn to analyse the training data and the predictions from nlu

import pandas as pd 
from sklearn.metrics import classification_report

def add_predictions(dataf):
    pred_blob = [nlu_interpreter.parse(t)['intent'] for t in dataf['text']]
    return (dataf
            [['text', 'intent']]
            .assign(pred_intent=[p['name'] for p in pred_blob])
            .assign(pred_confidence=[p['confidence'] for p in pred_blob]))

df_intents = pd.DataFrame([m.as_dict() for m in train_data.intent_examples]).pipe(add_predictions)

report = classification_report(y_true=df_intents['intent'], y_pred=df_intents['pred_intent'])
print(report)


                        precision    recall  f1-score   support

          abuse_harass       0.88      0.35      0.50        20
                affirm       0.00      0.00      0.00        48
          ask_for_help       1.00      0.66      0.80       115
    ask_gpg_definition       1.00      0.17      0.29        12
          ask_gpg_intl       0.00      0.00      0.00         4
            ask_gpg_uk       0.00      0.00      0.00         4
ask_salary_nego_advice       1.00      0.39      0.56        18
          ask_the_time       0.00      0.00      0.00        52
         bot_challenge       0.00      0.00      0.00        69
                  deny       1.00      0.02      0.04        89
          feeling_good       0.91      0.78      0.84        54
       feeling_unhappy       0.85      0.68      0.76        60
             give_name       0.00      0.00      0.00         8
          nlu_fallback       0.00      0.00      0.00         0
           repeat_name       0.00      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# Summary stats on the training data
df_summary = (df_intents
 .groupby("pred_intent")
 .agg(n=('pred_confidence', 'size'),
      mean_conf=('pred_confidence', 'mean')))

df_summary

Unnamed: 0_level_0,n,mean_conf
pred_intent,Unnamed: 1_level_1,Unnamed: 2_level_1
abuse_harass,8,0.616228
ask_for_help,76,0.736198
ask_gpg_definition,2,0.608156
ask_salary_nego_advice,7,0.636833
deny,2,0.639354
feeling_good,46,0.805621
feeling_unhappy,48,0.713416
nlu_fallback,589,0.556375
salaryQnA,102,0.970944
say_hello,3,0.629019


In [10]:
import altair as alt
bars = alt.Chart(df_intents).mark_bar().encode(
    x='pred_confidence:Q',
    y="pred_intent:O"
)

(bars).properties(height=100) 

In [11]:
df_conf_mat = (df_intents
               .groupby(["intent", "pred_intent"])
               .agg(n_pred=("pred_confidence", "size"))
               .reset_index())

df_plot = df_conf_mat.merge(df_summary.reset_index()).assign(p=lambda d: d['n_pred']/d['n'])

alt.Chart(df_plot).mark_rect().encode(
    x='intent:O',
    y='pred_intent:O',
    color='p:Q'
)