# Summary

Trying to extract slots when alexa fails to recognize the desired intent.

In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [103]:
import eng_to_ipa
from fuzzywuzzy import fuzz, process
from g2p_en import G2p
import matplotlib.pyplot as plt
import numpy as np
import os
from itertools import product
import pandas as pd
from pathlib import Path
import spacy
import spacy
from spacy import displacy
from spacy.matcher import PhraseMatcher

from alexa.utils import infer_intent, get_backend
from jabberwocky.config import C
from jabberwocky.openai_utils import load_prompt, load_openai_api_key
from htools import *

In [4]:
cd_root()

Current directory: /Users/hmamin/jabberwocky


## Data

In [143]:
def build_utterance_map(model_json, fuzzy=True,
                        exclude_types=('AMAZON.Person', 'AMAZON.SearchQuery')):
    """Given a dictionary copied from Alexa's JSON Editor, return a
    dict or FuzzyKeyDict mapping each possible sample utterance to its
    corresponding intent. This allows our delegate() function to do some
    utterance validation before blindly forwarding an utterance to _reply() or
    the next queued function.

    Warning: because each intent may have several utterances and
    each utterance may contain multiple slots and each slot may have multiple
    sample values, the dimensionality can blow up quickly here.

    Parameters
    ----------
    model_json
    exclude_types: Iterable[str]
        One or more slot types where we want to exclude intents that contain
        any of them from the output map. For example, AMAZON.SearchQuery is
        meant to capture whole utterances matching no particular format as a
        fallback intent, so it wouldn't make sense to try to fuzzy match
        these utterances to an intent. I could see AMAZON.Person being included
        in some contexts but in this skill, we only use it for the choosePerson
        utterance which consists solely of a name. There really shouldn't be a
        reason to fuzzy match that.

    Returns
    -------

    """
    exclude_types = set(exclude_types)
    model = model_json['interactionModel']['languageModel']
    type2vals = {type_['name']: [row['name']['value']
                                 for row in type_['values']]
                 for type_ in model['types']}
    type2vals['AMAZON.NUMBER'] = list(map(str, range(10)))
    utt2meta = {}
    for intent in model['intents']:
        slot2vals = {}
        try:
            for slot_ in intent.get('slots', []):
                assert slot_['type'] not in exclude_types
                slot2vals[slot_['name']] = type2vals[slot_['type']]
        except AssertionError:
            continue

        # Replace all slot names with common slot values.
        for row in intent['samples']:
            for args in product(*slot2vals.values()):
                kwargs = dict(zip(slot2vals, args))
                utt2meta[row.format(**kwargs)] = {'intent': intent['name'],
                                                  'slots': kwargs}
    return FuzzyKeyDict(utt2meta) if fuzzy else utt2meta

In [144]:
j = load('data/alexa/dialog_model.json')
model = j['interactionModel']['languageModel']

Object loaded from data/alexa/dialog_model.json.


In [145]:
utt2meta = build_utterance_map(j)
save(utt2meta, 'data/alexa/utterance2meta.pkl')

Writing data to data/alexa/utterance2meta.pkl.


In [149]:
backend_utts = [k for k, v in utt2meta.items() 
                if v['intent'] == 'changeBackend']
len(backend_utts)

70

In [150]:
model['types'][-1]

{'name': 'BackendType',
 'values': [{'name': {'value': 'hobby'}},
  {'name': {'value': 'hugging face'}},
  {'name': {'value': 'huggingface'}},
  {'name': {'value': 'gooseai'}},
  {'name': {'value': 'Goose AI'}},
  {'name': {'value': 'Open AI'}},
  {'name': {'value': 'openai'}}]}

In [121]:
# Notice templates still have filler {backend} rather than sample values.
backend_intent = model['intents'][-1]
backend_templates = backend_intent['samples']
backend_templates

['Lou please change backend to {backend}',
 'Lou please switch backend to {backend}',
 'Lou please use {backend} backend',
 'Lou please change to {backend} backend',
 'Lou please switch to {backend} backend',
 'Lou switch to {backend} backend',
 'Lou change to {backend} backend',
 'Lou use {backend} backend',
 'Lou switch backend to {backend}',
 'Lou change backend to {backend}']

**Observations**
- Looks like pattern "to {VALUE} back" may work pretty well.
- Another option: store mapping of sample utterance to template or sample utterance to slot vals, then just use the slot vals for the closest overall match.

## Try method 2 (use slots for closest fuzzy str match)

In [233]:
def infer_intent(utt, fuzzy_dict, n_keys=5, top_1_thresh=.9,
                 weighted_thresh=.8):
    """Try to infer the user's intent from an utterance. Alexa should detect
    this automatically but it sometimes messes up. This also helps if the user
    gets the utterance slightly wrong, e.g. "Lou, set backend to goose ai"
    rather than "Lou, switch backend to goose ai".

    Parameters
    ----------
    utt
    fuzzy_dict
    n_keys
    top_1_thresh
    weighted_thresh

    Returns
    -------
    dict: Contains keys "intent", "confidence", "reason", and "res".
    Intent is the name of the closest matching intent if one was sufficiently
    close (empty string otherwise), confidence is a float between 0 and 1
    indicating our confidence in this being correct (sort of, not anything
    rigorous though; -1 if no matching intent is found), and reason is a string
    indicating our method for determining this ('top_1' means we found 1 sample
    utterance that was very close to the input, 'weighted' means that most of
    the nearest matching utterances tended to belong to the same intent, empty
    string means no matching intent was found). Res is always just the raw
    results of our fuzzy_dict similar() method call, a list of tuples
    containing all n_keys matching utterances, their corresponding intents,
    and similarity scores.
    """
    res = fuzzy_dict.similar(utt, n_keys=n_keys,
                             mode='keys_values_similarities')
    top_1_pct = res[0][-1] / 100
    if top_1_pct >= top_1_thresh:
        return {'intent': res[0][1]['intent'],
                'slots': res[0][1]['slots'],
                'confidence': top_1_pct,
                'reason': 'top_1',
                'res': res}
    df = pd.DataFrame(res, columns=['txt', 'intent', 'score'])\
        .assign(slots=lambda df_: df_.intent.apply(lambda x: x['slots']),
                intent=lambda df_: df_.intent.apply(lambda x: x['intent']))
    weighted = df.groupby('intent').score.sum()\
        .to_frame()\
        .assign(pct=lambda x: x / x.sum())
    if weighted.pct.iloc[0] > weighted_thresh:
        intent = weighted.iloc[0].name
        slots = df.loc[df.intent == intent, 'slots'].iloc[0]
        return {'intent': intent,
                'slots': slots,
                'confidence': weighted.iloc[0].pct,
                'reason': 'weighted',
                'res': res}
    return {'intent': '',
            'slots': {},
            'confidence': -1,
            'reason': '',
            'res': res}

In [234]:
text = 'lou switch to open i back in'

In [235]:
inferred = infer_intent(text, utt2meta)
inferred

{'intent': 'changeBackend',
 'slots': {'backend': 'Open AI'},
 'confidence': 0.91,
 'reason': 'top_1',
 'res': [('Lou switch to Open AI backend',
   {'intent': 'changeBackend', 'slots': {'backend': 'Open AI'}},
   91),
  ('Lou switch to openai backend',
   {'intent': 'changeBackend', 'slots': {'backend': 'openai'}},
   89),
  ('Lou switch to Goose AI backend',
   {'intent': 'changeBackend', 'slots': {'backend': 'Goose AI'}},
   83),
  ('Lou please switch to Open AI backend',
   {'intent': 'changeBackend', 'slots': {'backend': 'Open AI'}},
   81),
  ('Lou switch to gooseai backend',
   {'intent': 'changeBackend', 'slots': {'backend': 'gooseai'}},
   81)]}

In [237]:
infer_intent('lou set back and to goose', utt2meta)

{'intent': 'changeBackend',
 'slots': {'backend': 'gooseai'},
 'confidence': 1.0,
 'reason': 'weighted',
 'res': [('Lou switch backend to gooseai',
   {'intent': 'changeBackend', 'slots': {'backend': 'gooseai'}},
   81),
  ('Lou switch backend to Goose AI',
   {'intent': 'changeBackend', 'slots': {'backend': 'Goose AI'}},
   80),
  ('Lou change backend to gooseai',
   {'intent': 'changeBackend', 'slots': {'backend': 'gooseai'}},
   78),
  ('Lou change backend to Goose AI',
   {'intent': 'changeBackend', 'slots': {'backend': 'Goose AI'}},
   76),
  ('Lou please switch backend to gooseai',
   {'intent': 'changeBackend', 'slots': {'backend': 'gooseai'}},
   75)]}

In [238]:
infer_intent('lou switch back and to hug and fate', utt2meta)

{'intent': 'changeBackend',
 'slots': {'backend': 'hugging face'},
 'confidence': 1.0,
 'reason': 'weighted',
 'res': [('Lou switch backend to hugging face',
   {'intent': 'changeBackend', 'slots': {'backend': 'hugging face'}},
   84),
  ('Lou switch backend to huggingface',
   {'intent': 'changeBackend', 'slots': {'backend': 'huggingface'}},
   82),
  ('Lou please switch backend to hugging face',
   {'intent': 'changeBackend', 'slots': {'backend': 'hugging face'}},
   76),
  ('Lou please switch backend to huggingface',
   {'intent': 'changeBackend', 'slots': {'backend': 'huggingface'}},
   75),
  ('Lou switch backend to Open AI',
   {'intent': 'changeBackend', 'slots': {'backend': 'Open AI'}},
   75)]}

## Phonetic str similarity

Try to encode texts phonetically, then find similar utterances. Trying to account better for certain types of poor transcriptions: e.g. "hug end faze" isn't as similar to "huggingface" as we'd like for string similarity methods.

In [151]:
g2p = G2p()

In [152]:
def phonetic_str(text):
    return ''.join(g2p(text))

In [155]:
pho2utt = {phonetic_str(k): k for k, v in utt2meta.items()}
utt2pho = {v: k for k, v in pho2utt.items()}

In [156]:
pho2utt

{'LUW1 CHEY1NJH KAA2NVER0SEY1SHAH0N TEH1MP TUW1 ZIH1ROW0': 'Lou change conversation temp to 0',
 'LUW1 CHEY1NJH PER1SAH0N TEH1MP TUW1 ZIH1ROW0': 'Lou change person temp to 0',
 'LUW1 CHEY1NJH GLOW1BAH0L TEH1MP TUW1 ZIH1ROW0': 'Lou change global temp to 0',
 'LUW1 CHEY1NJH KAA2NVER0SEY1SHAH0N TEH1MP TUW1 WAH1N': 'Lou change conversation temp to 1',
 'LUW1 CHEY1NJH PER1SAH0N TEH1MP TUW1 WAH1N': 'Lou change person temp to 1',
 'LUW1 CHEY1NJH GLOW1BAH0L TEH1MP TUW1 WAH1N': 'Lou change global temp to 1',
 'LUW1 CHEY1NJH KAA2NVER0SEY1SHAH0N TEH1MP TUW1 TUW1': 'Lou change conversation temp to 2',
 'LUW1 CHEY1NJH PER1SAH0N TEH1MP TUW1 TUW1': 'Lou change person temp to 2',
 'LUW1 CHEY1NJH GLOW1BAH0L TEH1MP TUW1 TUW1': 'Lou change global temp to 2',
 'LUW1 CHEY1NJH KAA2NVER0SEY1SHAH0N TEH1MP TUW1 THRIY1': 'Lou change conversation temp to 3',
 'LUW1 CHEY1NJH PER1SAH0N TEH1MP TUW1 THRIY1': 'Lou change person temp to 3',
 'LUW1 CHEY1NJH GLOW1BAH0L TEH1MP TUW1 THRIY1': 'Lou change global temp to 3',

In [158]:
pho = phonetic_str(text)
pho

'LUW1 SWIH1CH TUW1 OW1PAH0N AY1 BAE1K IH0N'

In [161]:
candidate_utts = select(utt2pho, keep=[row[0] for row in inferred['res']])
candidate_utts

{'Lou switch to Open AI backend': 'LUW1 SWIH1CH TUW1 OW1PAH0N AY1 BAE2KEH1ND',
 'Lou switch to openai backend': 'LUW1 SWIH1CH TUW1 OW0PEY0NAA1 BAE2KEH1ND',
 'Lou switch to Goose AI backend': 'LUW1 SWIH1CH TUW1 GUW1S AY1 BAE2KEH1ND',
 'Lou please switch to Open AI backend': 'LUW1 PLIY1Z SWIH1CH TUW1 OW1PAH0N AY1 BAE2KEH1ND',
 'Lou switch to gooseai backend': 'LUW1 SWIH1CH TUW1 GUW2SIY1 BAE2KEH1ND'}

In [51]:
process.extract(pho, candidate_utts.values())

[('LUW1 SWIH1CH TUW1 OW1PAH0N AY1 BAE2KEH1ND', 90),
 ('LUW1 PLIY1Z SWIH1CH TUW1 OW1PAH0N AY1 BAE2KEH1ND', 83),
 ('LUW1 SWIH1CH TUW1 OW0PEY0NAA1 BAE2KEH1ND', 79),
 ('LUW1 SWIH1CH TUW1 GUW1S AY1 BAE2KEH1ND', 78),
 ('LUW1 SWIH1CH TUW1 GUW2SIY1 BAE2KEH1ND', 72)]

In [68]:
tmp = get_backend(text)
tmp

 to open i back in


[('open ai', [('open i', 92)]),
 ('openai', [('open', 80)]),
 ('goose ai', [('open i', 57)]),
 ('hugging face', [('i back', 44)]),
 ('gooseai', [('open', 36)]),
 ('huggingface', [('in', 31)]),
 ('hobby', [('to', 29)])]

## Looking for patterns for PhraseMatcher

Rather than testing all possible 1-grams and 2-grams in utterance, we should be able to use our knowledge of the expected structure to test ony good candidates. Trying to figure out what logic defines this - I imagine dependency parsing may help.

In [69]:
nlp = spacy.load('en_core_web_sm')

In [162]:
# docs = [nlp(utt) for utt in backend_utts]
docs = [nlp(utt.replace('{', '').replace('}', ''))
        for utt in backend_templates]

In [163]:
for doc in docs:
    displacy.render(doc, style='dep', jupyter=True)