In [5]:
# Let's write a simple intent matching function using NLTK in Python for a restaurant booking system.

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import string
message = 'Hi I would like to book for two people today at 7pm. Also cancel my booking for 6'

# Define a set of intents
intents = {
    "greeting": ["hello", "hi", "greetings", "sup", "what's up","howdy"],
    "booking": ["book", "make a reservation", "reserve", "booking","appointment"],
    "cancellation": ["cancel", "cancel reservation", "cancel booking"],
    "menu": ["show menu", "menu", "what do you have", "what's on the menu", "what is there on the menu", "what food "
                                                                                                         "is there",
             "what specials are there", "what food do you have"],
}


In [6]:
# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # Convert to lower case
    text = text.lower()
    # Tokenize
    tokens = nltk.word_tokenize(text)
    # Remove punctuation
    tokens = [word for word in tokens if word not in string.punctuation]
    # Remove stopwords
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    # Lemmatize
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens

print(preprocess_text(message))


['hi', 'would', 'like', 'book', 'two', 'people', 'today', '7pm']


In [10]:

# Preprocess the message: tokenizes, removed punctuation, lemmatised to root word
tokens = preprocess_text(message)

# Tag the tokens with part of speech. Creates an array of tuples
pos_tags = nltk.pos_tag(tokens)

# Calculate the score for each intent based on word matches
intent_scores = {intent: 0 for intent in intents} # Create dictionary set to 0 for every intent determined above 
for word, pos in pos_tags:
    if pos in ['VB', 'NN']:  # Consider only verbs and nouns for intent matching, using Part Of Speech tagging 
        for intent, keywords in intents.items(): # .items() creates list format of intent and array of keywords
            if lemmatizer.lemmatize(word) in keywords:
                intent_scores[intent] += 1

# Find the intent with the highest score
best_intent = max(intent_scores, key=intent_scores.get)

print(intent_scores)



{'greeting': 1, 'booking': 1, 'cancellation': 0, 'menu': 0}


# Notes to consider through all
Preprocessing: lemmatisation vs stemmatisation; which works better in my program? 1 is quicker

Natural convo; ask questions back
- 
- In main.py, derive intent, ask for **clarification** if one is not obvious
-         **Issues with this:** What if someone starts the sentence with What is your/my name? or How is the weather today?
- Consider by n-grams rather than just 1 word, **'make booking'**  vs **'cancel booking'** are completely different intents. Also, see bookings / **'alter booking'**
-       First part of the flow diagram: intent matching?

-Typo corrections from user input: bookng -> booking, otherwise it will be missed completely
-Remember names, identity 

In [None]:
# Question Answering 
- Vectorized : we want to choose the next word based similarity