# Base Pipeline

![](../assets/classification_pipeline.png)

## regexp

In [7]:
import json

dream = json.load(open('../data/records/dream.json'))
dream[0]

{'intent_id': 0,
 'intent_name': 'what_are_you_talking_about',
 'sample_utterances': [],
 'regexp_for_sampling': ['(alexa ){0,1}what are ((you)|(we)) ((talking about)|(discussing))',
  '(alexa ){0,1}what ((you)|(we)) are (even ){0,1}((talking about)|(discussing))',
  '(alexa ){0,1}what does it mean',
  '(alexa ){0,1}pass that by me again',
  "(alexa ){0,1}i ((don't)|(didn't)|(do not)|(did not)) get it",
  '(alexa ){0,1}what it is about',
  '(alexa ){0,1}what is it about',
  'i lost common ground',
  '(alexa ){0,1}what (even ){0,1}is that',
  "(i ((did not get)|(don't understand)|(don't get)) ){0,1}what do you mean( alexa){0,1}",
  "(sorry, ){0,1}i ((don't)|(do not)|(didn't)|(did not)) ((understand)|(get))( ((what you mean)|(what are you talking about)))( alexa){0,1}",
  '((what you mean)|(what are you talking about))( alexa){0,1}',
  "i don't know what you just said"],
 'regexp_as_rules': ['(alexa ){0,1}are we having a communication problem',
  "(alexa ){0,1}i don't think you understan

In [8]:
import re

def regexp_classifier(utterance, intents_patterns):
    detected = set()
    for intent in intents_patterns:
        for pattern in intent['regexp_for_sampling'] + intent['regexp_as_rules']:
            if re.match(pattern, utterance) is None:
                continue
            detected.add(intent['intent_id'])
    return detected

In [9]:
regexp_classifier(
    utterance='what are you talking about',
    intents_patterns=dream
)

{0, 5}

In [10]:
regexp_classifier(
    utterance='tell me something else',
    intents_patterns=dream
)

{1, 6}

In [11]:
regexp_classifier(
    utterance='kind of',
    intents_patterns=dream
)

{6}

## retrieval

In [8]:
import json

banking77 = json.load(open('../data/records/banking77.json'))
banking77[0]

{'intent_id': 0,
 'intent_name': 'activate_my_card',
 'sample_utterances': ["Please help me with my card.  It won't activate.",
  'I tired but an unable to activate my card.',
  'I want to start using my card.',
  'How do I verify my new card?',
  "I tried activating my plug-in and it didn't piece of work"],
 'regexp_for_sampling': [],
 'regexp_as_rules': []}

In [20]:
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

client = chromadb.PersistentClient(path='../data/chroma')
collection = client.get_or_create_collection(
    name="example_collection",
    embedding_function=SentenceTransformerEmbeddingFunction(model_name="Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)
)

In [19]:
client.delete_collection("example_collection")

In [21]:
import itertools as it


all_sample_utterances = list(it.chain.from_iterable([intent['sample_utterances'] for intent in banking77]))
ids = [str(i) for i in range(len(all_sample_utterances))]
intent_labels = list(it.chain.from_iterable([[intent['intent_id']] * len(intent['sample_utterances']) for intent in banking77]))
collection.add(
    documents=all_sample_utterances,
    ids=ids,
    metadatas=[{'intent_id': intent_id} for intent_id in intent_labels]
)

In [22]:
collection.count()

385

In [24]:
collection.query(
    query_texts=['i want a new card'],
    n_results=10,   
)

{'ids': [['197', '196', '218', '61', '201', '198', '204', '46', '148', '2']],
 'distances': [[238.65634155273438,
   254.8294219970703,
   264.3768615722656,
   302.11468505859375,
   304.7674255371094,
   325.4882507324219,
   330.3798522949219,
   334.2269592285156,
   337.5621337890625,
   338.61920166015625]],
 'metadatas': [[{'intent_id': 39},
   {'intent_id': 39},
   {'intent_id': 43},
   {'intent_id': 12},
   {'intent_id': 40},
   {'intent_id': 39},
   {'intent_id': 40},
   {'intent_id': 9},
   {'intent_id': 29},
   {'intent_id': 0}]],
 'embeddings': None,
 'documents': [['I want some extra physical cards.',
   "I'd like to order an additional card",
   'Can I request a card?',
   'I need to get my card quickly',
   'I need to order a new virtual card, how do I do that?',
   'I would like open a second card for my daughter, how can you assist me?',
   'Can you give me a virtual card?',
   'Do I need to do something to get a new card once it expires?',
   'Can I have more disposa