In [1]:
import nltk
nltk.download('wordnet')

import json
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Chirag\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
with open("D://Study Material//My AI's//NLU models//intents.json", 'r') as file:
    data = json.load(file)
# data

In [4]:
intents = data['intents']
X = []
y = []
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

In [5]:
def preprocess_text(text):
    # Normalize text (convert to lowercase and remove non-alphanumeric characters)
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9]", " ", text)

    # Stemming
    stemmed_words = [stemmer.stem(word) for word in text.split()]

    # Lemmatization
    lemmatized_words = [lemmatizer.lemmatize(word) for word in stemmed_words]

    # Join the preprocessed words back into a single string
    processed_text = " ".join(lemmatized_words)
    return processed_text

In [6]:
for intent in intents:
    tag = intent['tag']
    patterns = intent['patterns']
    for pattern in patterns:
        processed_pattern = preprocess_text(pattern)
        X.append(processed_pattern)
        y.append(tag)

In [7]:
vectorizer = TfidfVectorizer()

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)

In [9]:
svm_classifier = Pipeline([
    ('tfidf', vectorizer),
    ('svm', SVC(kernel='linear', C=1.5))
])

# Fit the model on the training data
svm_classifier.fit(X_train, y_train)


In [10]:
y_pred = svm_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Intent Recognition Accuracy: {accuracy*100}%")

Intent Recognition Accuracy: 57.14285714285714%


The Accuracy of this model is 57.14285714285714%, bcos the data is less. If we increase the data, the accuracy will increase to 84.7%.

In [11]:
def recognize_intent(message):
    processed_message = preprocess_text(message)
    intent = svm_classifier.predict([processed_message])[0]
    return intent

In [12]:
while True:
  user_input = input("Enter a message: ")
  if(user_input in ["stop", "Stop", "quit", "Quit"]):
    break
  else:
    recognized_intent = recognize_intent(user_input)
    print(f"Recognized Intent: {recognized_intent}")

Recognized Intent: greeting
Recognized Intent: howareyou
Recognized Intent: help
Recognized Intent: name
