# Summary

See if we can speed up lshdict instantiation.

In [282]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [283]:
from itertools import product
import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import os
import pandas as pd
from pathlib import Path

from alexa.utils import get_number
from jabberwocky.config import C
from jabberwocky.openai_utils import load_prompt, load_openai_api_key
from jabberwocky.utils import ReturningThread
from htools import *
from htools.structures import _FuzzyDictBase

In [7]:
cd_root()

Current directory: /Users/hmamin/jabberwocky


In [221]:
# Note: remove "New" prefix when porting.
class NewLSHDict(_FuzzyDictBase):
    """Dictionary that returns the value corresponding to a key's nearest
    neighbor if the key isn't present in the dict. This is intended for use
    as a word2index dict when using embeddings in deep learning: e.g. if we
    have domain embeddings for the top 100k websites, some of our options for
    dealing with unknown domains are:

    1. Encode all of them as <UNK>. This loses a lot of information.
    2. Create a FuzzyKeyDict which will search for similar keys using variants
    of Levenshtein distance. Lookup is O(N) and for 100k domains, that comes
    out to 0.6 seconds per item. We might have thousands or millions of
    lookups over the course of training so this can be a significant cost.
    3. Create an LSHDict (lookups are O(1)). Indexing into the dict as usual
    (e.g. my_lsh_dict[key]) will provide the key's index if present and the
    (approximate) nearest neighbor's index otherwise. Either way, the result
    can be used to index into your embedding layer.
    4. Create an LSHDict and use the `similar_values` method to return n>1
    neighbors. Then pass their indices to an Embedding layer and
    compute the sum/average/weighted average of the results. This may be
    preferable to #3 cases such as web domain lookup, where similar URLs are
    not guaranteed to represent similar sites. (This is basically
    equivalent to an EmbeddingBag layer, but in torch that doesn't store
    intermediate representations so we wouldn't be able to use our pretrained
    embeddings.)

    LSHDict does NOT support pickling as of version 6.0.6 (note: setitem seems
    to be called before init when unpickling, meaning we try to access
    self.forest in self._update_forest before it's been defined. Even if we
    change setitem so reindexing does not occur by default, it still tries to
    hash the new word and add it to the forest so unpickling will still fail).
    """

    def __init__(self, data, n_candidates=None, n_keys=3, ngram_size=3,
                 scorer=fuzz.ratio, chunksize=100):
        """
        Parameters
        ----------
        data: dict or list[tuple]
            The base dictionary. Unlike FuzzyKeyDict, we require this since
            adding items one by one is computationally infeasible for large
            datasets. Just build up your dictionary first.
        n_candidates: int or None
            Number of reasonably similar keys to retrieve when trying to index
            in with a key that's missing (or when using the `similar` method).
            You can override this in `similar` but not when using
            __getitem__'s square bracket syntax. If not specified, this will
            be auto initialized to vocab size/1,000, clipped to lie in
            [20, 500]. See `similar` docstring for more on this.
        n_keys: int
            Default number of similar keys to retrieve in `similar`.
        scorer: function
            Default scoring function to use to narrow `n_candidates` keys down
            to `n_keys`. Should be a fuzzywuzzy function where scores lie in
            [0, 100] and higher values indicate high similarity.
        chunksize: int
            Determines how many items to send to each process when hashing
            all the keys in the input data using multiprocessing. The default
            should be fine in most cases.
        """
        if len(data) < 10_000 and len(next(iter(data))) < 100:
            warnings.warn(
                'It looks like you\'re working with a relatively small '
                'amount of data. FuzzyKeyDict may be fast enough for your '
                'use case and would provide the set of strictly most similar '
                'keys rather than an approximation of that set.'
            )

        super().__init__(data)
        self.scorer = scorer
        self.hash_word = partial(self.lsh_hash_word, n=ngram_size)
        self.forest = MinHashLSHForest(num_perm=128)
        self.chunksize = chunksize
        self._initialize_forest()

        # Datasketch's LSH implementation usually gives pretty decent results
        # even with numbers as low as 5-10, but increasing that by a factor of
        # 10 comes with minimal time cost: Fuzzywuzzy matching doesn't get
        # particularly slow until we get into the thousands. The fact that
        # we cap this at 500 makes this lookup asymptotically O(1) while
        # FuzzyKeyDict's is O(N).
        self.n_candidates = n_candidates or np.clip(len(self) // 1_000,
                                                    20, 500)
        self.n_keys = n_keys

    def __setitem__(self, key, val):
        """Try to add keys all at once in the constructor because adding new
        keys can be extremely slow.
        """
        super().__setitem__(key, val)
        self._update_forest(key)

    def _update_forest(self, key, index=True):
        """Used in __setitem__ to update our LSH Forest. Forest's index method
        seems to recompute everything so adding items to a large LSHDict will
        be incredibly slow. Luckily, our deep learning use case rarely/never
        requires us to update object2index dicts after instantiation so that's
        not as troubling as it might seem.
        
        This used to be used by _initialize_forest as well but it didn't lend
        itself to parallelization as well since it acts on a shared, existing
        data structure.

        Parameters
        ----------
        key: str
        index: bool
            If True, reindex the forest (essentially making the key
            queryable). This should be False when initializing the forest so
            we just index once after everything's been added.
        """
        self.forest.add(key, self.hash_word(key))
        if index: self.forest.index()

    def _initialize_forest(self):
        """Called once in __init__ to add all items to LSH Forest. This is
        necessary because dict specifically calls its own __setitem__, not
        its children's.
        """
        hashes = parallelize(self.hash_word, self.keys(), total=len(self),
                             chunksize=self.chunksize)
        for hash_, key in zip(hashes, self.keys()):
            self.forest.add(key, hash_)
        self.forest.index()

    @add_docstring(_FuzzyDictBase._filter_similarity_pairs)
    def similar(self, key, mode='keys_values', n_candidates=None,
                n_keys=None, scorer=None):
        """Find a list of similar keys. This is used in __getitem__ but can
        also be useful as a user-facing method if you want to get more than
        1 neighbor or you want to get similarity scores as well.

        Parameters
        ----------
        key: str
            Word/URL/etc. to find similar keys to.
        mode: str
            See section below `Returns`.
        n_candidates: int or None
            Number of similar candidates to retrieve. This uses Jaccard
            Similarity which isn't always a great metric for string
            similarity. This is also where the LSH comes in so they're not
            strictly the n best candidates, but rather a close approximation
            of that set. If None, this will fall back to self.n_candidates.
            Keep in mind this determines how many keys to
        n_keys: int or None
            Number of similar keys to return. If None, this will fall back to
            self.n_keys.
        scorer: function or None
            Fuzzywuzzy scoring function, e.g. fuzz.ratio or
            fuzz.partial_ratio, which will be used to score each candidate and
            select which to return. Higher scores indicate higher levels of
            similarity. If None, this will fall back to self.scorer.

        Returns
        -------
        list: List if `mode` is "keys" or "values". List of tuples otherwise.
        """
        candidates = self.forest.query(self.hash_word(key),
                                       n_candidates or self.n_candidates)
        if not candidates: raise KeyError('No similar keys found.')

        # List of (key, score) where higher means more similar.
        pairs = process.extract(key, candidates,
                                limit=n_keys or self.n_keys,
                                scorer=scorer or self.scorer)
        return self._filter_similarity_pairs(pairs, mode=mode)

    @staticmethod
    @add_docstring(ngrams)
    def lsh_hash_word(word, num_perm=128, **ngram_kwargs):
        """Hash an input word (str) and return a MinHash object that can be
        added to an LSHForest.

        Parameters
        ----------
        word: str
            Word to hash.
        num_perm: int
        ngram_kwargs: any
            Forwarded to `ngrams`.

        Returns
        -------
        datasketch MinHash object
        """
        mhash = MinHash(num_perm=num_perm)
        for ng in ngrams(word, **ngram_kwargs):
            mhash.update(ng.encode('utf8'))
        return mhash

In [217]:
def build_utterance_map(model_json, fuzzy=True,
                        exclude_types=('AMAZON.Person', 'AMAZON.SearchQuery'),
                        save_=False, model_path='data/alexa/dialog_model.json',
                        meta_path='data/alexa/utterance2meta.pkl',
                        min_num=0, max_num=100):
    """Given a dictionary copied from Alexa's JSON Editor, return a
    dict or FuzzyKeyDict mapping each possible sample utterance to its
    corresponding intent. This allows our delegate() function to do some
    utterance validation before blindly forwarding an utterance to _reply() or
    the next queued function.

    Warning: because each intent may have several utterances and
    each utterance may contain multiple slots and each slot may have multiple
    sample values, the dimensionality can blow up quickly here.

    Parameters
    ----------
    model_json
    exclude_types: Iterable[str]
        One or more slot types where we want to exclude intents that contain
        any of them from the output map. For example, AMAZON.SearchQuery is
        meant to capture whole utterances matching no particular format as a
        fallback intent, so it wouldn't make sense to try to fuzzy match
        these utterances to an intent. I could see AMAZON.Person being included
        in some contexts but in this skill, we only use it for the choosePerson
        utterance which consists solely of a name. There really shouldn't be a
        reason to fuzzy match that.

    Returns
    -------
    Dict: Maps sample utterance to dict containing 'intent' str and 'slots'
    dict.
    """
    exclude_types = set(exclude_types)
    model = model_json['interactionModel']['languageModel']
    type2vals = {type_['name']: [row['name']['value']
                                 for row in type_['values']]
                 for type_ in model['types']}
    type2vals['AMAZON.NUMBER'] = list(map(str, range(min_num, max_num + 1)))
    utt2meta = {}
    for intent in model['intents']:
        slot2vals = {}
        try:
            for slot_ in intent.get('slots', []):
                assert slot_['type'] not in exclude_types
                slot2vals[slot_['name']] = type2vals[slot_['type']]
        except AssertionError:
            continue

        # Replace all slot names with common slot values.
        for row in intent['samples']:
            for args in product(*slot2vals.values()):
                kwargs = dict(zip(slot2vals, args))
                utt2meta[row.format(**kwargs)] = {'intent': intent['name'],
                                                  'slots': kwargs}
    meta = FuzzyKeyDict(utt2meta) if fuzzy else utt2meta
    if save_:
        save(model_json, model_path)
        save(meta, meta_path)
    return meta

In [274]:
# More up to date than what was in the lib at the time, but we develop a
# more up to date version later in the nb.
def infer_intent(
    utt,
    fuzzy_dict,
    n_keys=5,
    n_candidates=None,
    top_1_thresh=0.9,
    weighted_thresh=0.7,
):
    """
    Try to infer the user's intent from an utterance. Alexa should detect
    this automatically but it sometimes messes up. This also helps if the user
    gets the utterance slightly wrong, e.g. "Lou, set backend to goose ai"
    rather than "Lou, switch backend to goose ai".

    Parameters
    ----------
    utt
    fuzzy_dict
    n_keys
    top_1_thresh
    weighted_thresh

    Returns
    -------
    dict: Contains keys "intent", "confidence", "reason", and "res".
    Intent is the name of the closest matching intent if one was sufficiently
    close (empty string otherwise), confidence is a float between 0 and 1
    indicating our confidence in this being correct (sort of, not anything
    rigorous though; -1 if no matching intent is found), and reason is a string
    indicating our method for determining this ('top_1' means we found 1 sample
    utterance that was very close to the input, 'weighted' means that most of
    the nearest matching utterances tended to belong to the same intent, empty
    string means no matching intent was found). Res is always just the raw
    results of our fuzzy_dict similar() method call, a list of tuples
    containing all n_keys matching utterances, their corresponding intents,
    and similarity scores.
    """
    kwargs = dict(n_keys=n_keys, mode='keys_values_similarities')
    if isinstance(fuzzy_dict, LSHDict):
        kwargs['n_candidates'] = n_candidates
    res = fuzzy_dict.similar(utt, **kwargs)
    top_1_pct = res[0][-1] / 100
    if top_1_pct >= top_1_thresh:
        return {'intent': res[0][1]['intent'],
                'slots': res[0][1]['slots'],
                'confidence': top_1_pct,
                'reason': 'top_1',
                'res': res}
    df = pd.DataFrame(res, columns=['txt', 'intent', 'score'])\
        .assign(slots=lambda df_: df_.intent.apply(lambda x: x['slots']),
                intent=lambda df_: df_.intent.apply(lambda x: x['intent']))
    weighted = df.groupby('intent').score.sum()\
        .to_frame()\
        .assign(pct=lambda x: x / (n_keys * 100))
    if weighted.pct.iloc[0] > weighted_thresh:
        intent = weighted.iloc[0].name
        slots = df.loc[df.intent == intent, 'slots'].iloc[0]
        return {'intent': intent,
                'slots': slots,
                'confidence': weighted.iloc[0].pct,
                'reason': 'weighted',
                'res': res}
    # In this case, confidence is a bit different but it's loosely intended to
    # mean "confidence that the utterance matched no pre-defined intent".
    # Value simply needs to be higher than 1 - weighted_thresh.
    return {'intent': '',
            'slots': {},
            'confidence': 1 - weighted.iloc[0].pct,
            'reason': '',
            'res': res}

In [106]:
def build_intent2utts(fd):
    df = pd.DataFrame(fd).T
    intent2utts = dict(df.reset_index().groupby('intent')['index'].apply(set).items())
    return intent2utts

In [10]:
model = load('data/alexa/dialog_model.json')

Object loaded from data/alexa/dialog_model.json.


In [71]:
fd = build_utterance_map(model, save_=False, max_num=900)

In [107]:
intent2utts = build_intent2utts(fd)

In [74]:
with block_timer():
    lsh = LSHDict(fd)

  0%|          | 0/46983 [00:00<?, ?it/s]

[TIMER]: Block executed in 68.481 seconds.


In [73]:
with block_timer():
    lsh2 = NewLSHDict(fd)

  0%|          | 0/46983 [00:00<?, ?it/s]

[TIMER]: Block 'HASHING' executed in 31.848 seconds.
[TIMER]: Block 'UPDATE' executed in 1.022 seconds.
[TIMER]: Block executed in 33.075 seconds.


In [76]:
# Chunksize 100
with block_timer():
    lsh2 = NewLSHDict(fd)

  0%|          | 0/46983 [00:00<?, ?it/s]

[TIMER]: Block 'HASHING' executed in 33.982 seconds.
[TIMER]: Block 'UPDATE' executed in 0.967 seconds.
[TIMER]: Block executed in 35.201 seconds.


In [80]:
# Chunksize 100, explicitly set processes=cpu_count()
with block_timer():
    lsh2 = NewLSHDict(fd)

  0%|          | 0/46983 [00:00<?, ?it/s]

[TIMER]: Block 'HASHING' executed in 36.665 seconds.
[TIMER]: Block 'UPDATE' executed in 1.029 seconds.
[TIMER]: Block executed in 37.956 seconds.


In [85]:
# Threaded version
with block_timer():
    lsh2 = NewLSHDict(fd)

[TIMER]: Block executed in 69.805 seconds.


In [154]:
utt = 'Lou please change my temp to 36'

In [155]:
infer_intent(utt, fd)

{'intent': 'changeTemperature',
 'slots': {'Number': '36', 'Scope': 'global'},
 'confidence': 0.794,
 'reason': 'weighted',
 'res': [('Lou change temp to 36',
   {'intent': 'changeTemperature',
    'slots': {'Number': '36', 'Scope': 'global'}},
   81),
  ('Lou change temp to 136',
   {'intent': 'changeTemperature',
    'slots': {'Number': '136', 'Scope': 'global'}},
   79),
  ('Lou change temp to 236',
   {'intent': 'changeTemperature',
    'slots': {'Number': '236', 'Scope': 'global'}},
   79),
  ('Lou change temp to 306',
   {'intent': 'changeTemperature',
    'slots': {'Number': '306', 'Scope': 'global'}},
   79),
  ('Lou change temp to 316',
   {'intent': 'changeTemperature',
    'slots': {'Number': '316', 'Scope': 'global'}},
   79)]}

Approach 1. First use lsh dict to infer intent, then use fuzzydict to brute force search over all that intent's utterances.

In [241]:
with block_timer('lsh block'):
    res = infer_intent(utt, lsh, weighted_thresh=.5)
with block_timer('fuzzy block'):
    if res['intent']:
        utts = intent2utts[res['intent']]
        utt_match, score = process.extractOne(utt, utts, scorer=fuzz.ratio)

[TIMER]: Block 'lsh block' executed in 0.007 seconds.
[TIMER]: Block 'fuzzy block' executed in 0.102 seconds.


In [242]:
print(utt_match)
print(score)
print(fd[utt_match]['slots'])

Lou change temp to 36
81
{'Number': '36', 'Scope': 'global'}


In [158]:
len(intent2utts[res['intent']])

14416

Approach 2. Use single lsh dict and have it retrieve a larger group of candidates first before using fuzzywuzzy to re-rank them. Differs from approach 1 in that we brute force search a fixed number of candidates rather than however many sample utts an intent has.

In [228]:
# Use lsh here bc lsh2 used developmental class. Infer_intent checks fuzzy
# dict type when determining whether to pass n_candidates to similar() call.
with block_timer():
    res = infer_intent(utt, lsh, n_candidates=1_000, n_keys=5)

[TIMER]: Block executed in 0.032 seconds.


In [233]:
res['intent'], res['slots']

('changeTemperature', {'Number': '36', 'Scope': 'global'})

Approach 3. Current brute force approach. Fuzzy dict search all possible utts.

In [234]:
with block_timer():
    res = infer_intent(utt, fd)

[TIMER]: Block executed in 0.339 seconds.


In [235]:
res['intent'], res['slots']

('changeTemperature', {'Number': '36', 'Scope': 'global'})

Approach 4. Use LSH dict to infer intent, then if that intent has a Number slot, use separate function to extract number.

In [247]:
res

{'intent': 'changeTemperature',
 'slots': {'Number': '358', 'Scope': 'global'},
 'confidence': 0.73,
 'reason': 'weighted',
 'res': [('Lou change temp to 358',
   {'intent': 'changeTemperature',
    'slots': {'Number': '358', 'Scope': 'global'}},
   75),
  ('Lou change temp to 308',
   {'intent': 'changeTemperature',
    'slots': {'Number': '308', 'Scope': 'global'}},
   75),
  ('Lou change temp to 338',
   {'intent': 'changeTemperature',
    'slots': {'Number': '338', 'Scope': 'global'}},
   75),
  ('Lou change global temp to 308',
   {'intent': 'changeTemperature',
    'slots': {'Number': '308', 'Scope': 'global'}},
   70),
  ('Lou change global temp to 358',
   {'intent': 'changeTemperature',
    'slots': {'Number': '358', 'Scope': 'global'}},
   70)]}

In [250]:
with block_timer('lsh block'):
    res = infer_intent(utt, lsh, weighted_thresh=.5)
with block_timer('fuzzy block'):
    if res['intent']:
        if 'Number' in res['slots']:
            res['slots']['Number'] = get_number(utt)['value']

[TIMER]: Block 'lsh block' executed in 0.007 seconds.
[TIMER]: Block 'fuzzy block' executed in 0.007 seconds.


In [251]:
res

{'intent': 'changeTemperature',
 'slots': {'Number': '36', 'Scope': 'global'},
 'confidence': 0.73,
 'reason': 'weighted',
 'res': [('Lou change temp to 358',
   {'intent': 'changeTemperature',
    'slots': {'Number': '36', 'Scope': 'global'}},
   75),
  ('Lou change temp to 308',
   {'intent': 'changeTemperature',
    'slots': {'Number': '308', 'Scope': 'global'}},
   75),
  ('Lou change temp to 338',
   {'intent': 'changeTemperature',
    'slots': {'Number': '338', 'Scope': 'global'}},
   75),
  ('Lou change global temp to 308',
   {'intent': 'changeTemperature',
    'slots': {'Number': '308', 'Scope': 'global'}},
   70),
  ('Lou change global temp to 358',
   {'intent': 'changeTemperature',
    'slots': {'Number': '358', 'Scope': 'global'}},
   70)]}

**Status**

- lshDict method is a little faster but not as much as I hoped (no big O change). Mostly because a single intent can still have a large number of utts so we still end up fuzzy dict searching a big list.
- For additional complexity to be worth it, I think I'd need to find some further optimizations. Should be at least an order of magnitude faster to be worth it.

In [252]:
len(fd)

46983

In [315]:
fd5 = build_utterance_map(model, save_=False, max_num=5)
len(fd5)

443

In [271]:
with block_timer('lsh block'):
    res = infer_intent(utt, fd5)
with block_timer('fuzzy block'):
    if res['intent'] and 'Number' in res['slots']:
        res['slots']['Number'] = get_number(utt)['value']

[TIMER]: Block 'lsh block' executed in 0.007 seconds.
[TIMER]: Block 'fuzzy block' executed in 0.009 seconds.


In [272]:
res

{'intent': 'changeTemperature',
 'slots': {'Number': '36', 'Scope': 'global'},
 'confidence': 0.756,
 'reason': 'weighted',
 'res': [('Lou change temp to 3',
   {'intent': 'changeTemperature',
    'slots': {'Number': '36', 'Scope': 'global'}},
   78),
  ('Lou change temp to 0',
   {'intent': 'changeTemperature',
    'slots': {'Number': '0', 'Scope': 'global'}},
   75),
  ('Lou change temp to 1',
   {'intent': 'changeTemperature',
    'slots': {'Number': '1', 'Scope': 'global'}},
   75),
  ('Lou change temp to 2',
   {'intent': 'changeTemperature',
    'slots': {'Number': '2', 'Scope': 'global'}},
   75),
  ('Lou change temp to 4',
   {'intent': 'changeTemperature',
    'slots': {'Number': '4', 'Scope': 'global'}},
   75)]}

In [311]:
def infer_intent(
    utt,
    fuzzy_dict,
    n_keys=5,
    n_candidates=None,
    top_1_thresh=0.9,
    weighted_thresh=0.7,
):
    """
    Try to infer the user's intent from an utterance. Alexa should detect
    this automatically but it sometimes messes up. This also helps if the user
    gets the utterance slightly wrong, e.g. "Lou, set backend to goose ai"
    rather than "Lou, switch backend to goose ai".

    Parameters
    ----------
    utt
    fuzzy_dict
    n_keys
    top_1_thresh
    weighted_thresh

    Returns
    -------
    dict: Contains keys "intent", "confidence", "reason", and "res".
    Intent is the name of the closest matching intent if one was sufficiently
    close (empty string otherwise), confidence is a float between 0 and 1
    indicating our confidence in this being correct (sort of, not anything
    rigorous though; -1 if no matching intent is found), and reason is a string
    indicating our method for determining this ('top_1' means we found 1 sample
    utterance that was very close to the input, 'weighted' means that most of
    the nearest matching utterances tended to belong to the same intent, empty
    string means no matching intent was found). Res is always just the raw
    results of our fuzzy_dict similar() method call, a list of tuples
    containing all n_keys matching utterances, their corresponding intents,
    and similarity scores.
    """
    kwargs = dict(n_keys=n_keys, mode='keys_values_similarities')
    if isinstance(fuzzy_dict, LSHDict):
        kwargs['n_candidates'] = n_candidates
    res = fuzzy_dict.similar(utt, **kwargs)
    top_1_pct = res[0][-1] / 100
    res_final = {'res': res}
    if top_1_pct >= top_1_thresh:
        res_final.update(intent=res[0][1]['intent'],
                         slots=res[0][1]['slots'],
                         confidence=top_1_pct,
                         reason='top_1')
    else:
        df = pd.DataFrame(res, columns=['txt', 'intent', 'score'])
        df['slots'] = df.intent.apply(lambda x: x['slots'])
        df['intent'] = df.intent.apply(lambda x: x['intent'])
        weighted = df.groupby('intent').score.sum()\
            .to_frame()\
            .assign(pct=lambda x: x / (n_keys * 100))

    # Only consider weighted method if top 1 check was not satisfied.
    if 'intent' not in res_final:
        if weighted.pct.iloc[0] > weighted_thresh:
            intent = weighted.iloc[0].name
            slots = df.loc[df.intent == intent, 'slots'].iloc[0]
            res_final.update(intent=intent,
                             slots=slots,
                             confidence=weighted.iloc[0].pct,
                             reason='weighted')
        else:
            # In this case, confidence is a bit different but it's loosely 
            # intended to mean "confidence that the utterance matched no 
            # pre-defined intent". Value simply needs to be higher than 
            # 1 - weighted_thresh.
            res_final.update(intent='',
                             slots={},
                             confidence=1 - weighted.iloc[0].pct,
                             reason='')
    if res_final['intent'] and 'Number' in res_final['slots']:
        res_final['slots']['Number'] = get_number(utt)['value']
        # Sometimes no number could be extracted. Unclear what confidence 
        # score should be here so I just leave it unchanged.
        if res_final['slots']['Number'] is None:
            res_final['intent'] = ''
    return res_final

In [312]:
positives = [
    'lou change temperature to 47',
    'lou change my temperature to 100',
    'lou set temperature to 95',
    'lou use temperature 33',
    'lou please change the temperature to 13',
    'lou change global temperature to 99',
    'lou please set the conversation temperature to 43',
    'lou change my person level temperature to 18',
    'lou use max length of 14',
    'lou change my max length to 52',
    'lou set the max length to 9',
    'lou please change global max length to 19',
    'lou can you change my conversation level max length to 77',
    'change max length to 9',
    'please set max length to 45',
    'could you change my person level temp to 67'
]
negatives = [
    'what is the temperature?',
    'can you change that today?',
    'change my mind about #23',
    'the maximum length that is allowed is 2',
    'the global temperature has risen by 3 degrees',
    'I am a temp but I want to change that',
    'the conversation length is 8',
    'lou change temperature to'
]

In [313]:
n_pos = len(positives)
for i, utt in enumerate(positives + negatives):
    inferred = infer_intent(utt, fd5)
    print('POSITIVE:' if i < n_pos else 'NEGATIVE:', utt)
    print(select(inferred, drop=['res']))
    hr()

POSITIVE: lou change temperature to 47
{'intent': 'changeTemperature', 'slots': {'Number': 47, 'Scope': 'global'}, 'confidence': 0.98, 'reason': 'top_1'}

-------------------------------------------------------------------------------

POSITIVE: lou change my temperature to 100
{'intent': 'changeTemperature', 'slots': {'Number': 100, 'Scope': 'global'}, 'confidence': 0.92, 'reason': 'top_1'}

-------------------------------------------------------------------------------

POSITIVE: lou set temperature to 95
{'intent': 'changeTemperature', 'slots': {'Number': 95, 'Scope': 'global'}, 'confidence': 0.98, 'reason': 'top_1'}

-------------------------------------------------------------------------------

POSITIVE: lou use temperature 33
{'intent': 'changeTemperature', 'slots': {'Number': 33, 'Scope': 'global'}, 'confidence': 0.838, 'reason': 'weighted'}

-------------------------------------------------------------------------------

POSITIVE: lou please change the temperature to 13
{'inte