# Explore and Diagnose our Models

Adapted from: https://blog.rasa.com/evaluating-rasa-nlu-models-in-jupyter/

In [None]:
# setup

from rasa.cli.utils import get_validated_path
from rasa.model import get_model, get_model_subdirectories
from rasa.nlu.model import Interpreter
import rasa.shared.nlu.training_data.loading as nlu_loading
from pprint import pprint
import pandas as pd 
from sklearn.metrics import classification_report




def load_interpreter(model_path):
    """
    This loads the Rasa NLU interpreter. It is able to apply all NLU
    pipeline steps to a text that you provide it. 
    """
    model = get_validated_path(model_path, "model")
    model_path = get_model(model)
    _, nlu_model = get_model_subdirectories(model_path)
    return Interpreter.load(nlu_model)

# nlu_interpreter = load_interpreter("models/20210307-100229.tar.gz")

In [None]:
# what do we have

# type(nlu_interpreter)

In [None]:
# methods

# dir(nlu_interpreter)

In [None]:
# as per the blog, the pipeline is accessible
# nlu_interpreter.pipeline

In [None]:
# lets parse a message and see that under the hood is an api
# nlu_interpreter.parse("hello")

## NLU data


In [3]:
# load the nlu data

# train_data = nlu_loading.load_data("data/nlu.yml")

In [None]:
# what do we have


In [None]:
# lets look a few examples

# [m.as_dict() for m in train_data.intent_examples][:5]

In [4]:
# we talked about the pipeline and how rasa is preprocessing and featurizing our data
# lets look at this for the first message

# message = train_data.intent_examples[0]
# for component in nlu_interpreter.pipeline:
#     component.process(message)


In [None]:
# what does the message have?
# . method


In [None]:
# we can a peak inside
# its not just about the utterances, but also the responses
# the text features for a response are attempted to aligned with an predicted action from the stories

# sparse_token_feats, sparse_utterance_feats = message.get_sparse_features("text")

In [None]:
# remember dense matrices?

# sparse_utterance_feats.features.todense().shape

In [5]:
######################### lets use the NLU model
## this will allow us to iterate over intents and predict
## the intent

def add_predictions(dataf, nlu):
    """This function will add prediction columns based on `nlu`"""
    pred_blob = [nlu.parse(t)['intent'] for t in dataf['text']]
    return (dataf
            [['text', 'intent']]
            .assign(pred_intent=[p['name'] for p in pred_blob])
            .assign(pred_confidence=[p['confidence'] for p in pred_blob]))


# list of dictionaries
data_list = [m.as_dict() for m in train_data.intent_examples]

# construct the df and apply the predictions via nlu interpreter
df_intents = pd.DataFrame(data_list).pipe(add_predictions, nlu=nlu_interpreter)

In [6]:
df_intents

Unnamed: 0,text,intent,pred_intent,pred_confidence
0,hey,"{'id': -4957217158235688328, 'name': 'greet', ...",greet,0.999997
1,hello,greet,greet,0.999997
2,hi,greet,greet,0.999997
3,hello there,greet,greet,0.999992
4,good morning,greet,greet,0.999987
...,...,...,...,...
77,what to do next?,learn_more,learn_more,0.999287
78,how can I get help with rasa,learn_more,learn_more,0.999508
79,I want to learn more,learn_more,learn_more,0.999436
80,i want to learn more about this,learn_more,learn_more,0.999286


In [7]:
# lets cleanup - not sure why the first chokes

# df_intents2 = df_intents.iloc[1:, :]

In [8]:
# summarize

# (df_intents2
#  .groupby('pred_intent')
#  .agg(n=('pred_confidence', 'size'),
#       mean_conf=('pred_confidence', 'mean')))

Unnamed: 0_level_0,n,mean_conf
pred_intent,Unnamed: 1_level_1,Unnamed: 2_level_1
affirm,7,0.999554
bot_challenge,5,0.999777
deny,7,0.999667
goodbye,11,0.999972
greet,15,0.942581
learn_more,7,0.999456
mood_great,15,0.99999
mood_unhappy,14,0.999991


In [9]:
# we can use sklearn!

# cr = classification_report(y_true=df_intents2['intent'], y_pred=df_intents2['pred_intent'])
# print(cr)

               precision    recall  f1-score   support

       affirm       1.00      1.00      1.00         7
bot_challenge       1.00      1.00      1.00         5
         deny       1.00      1.00      1.00         7
      goodbye       1.00      0.92      0.96        12
        greet       0.93      1.00      0.97        14
   learn_more       1.00      1.00      1.00         7
   mood_great       1.00      1.00      1.00        15
 mood_unhappy       1.00      1.00      1.00        14

     accuracy                           0.99        81
    macro avg       0.99      0.99      0.99        81
 weighted avg       0.99      0.99      0.99        81

