In [133]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from random import shuffle

In [134]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /home/ambhi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/ambhi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [136]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
    preprocessed_text = ' '.join(stemmed_tokens)
    return preprocessed_text

In [121]:
training_data = [
    ("Can I join the meeting room?", "joining"),
    ("Is there any available room?", "joining"),
    ("Can I be part of this room?", "joining"),
    ("I want to leave the conference room.", "leaving"),
    ("Leaving the room now.", "leaving"),
    ("I'm done with this meeting room.", "leaving"),
    ("Create a new chat room.", "creating"),
    ("Can you help me create a room?", "creating"),
    ("Let's create a private room for our team.", "creating"),
]
additional_training_data = [
    ("How do I join the video call?", "joining"),
    ("Is there a way to enter the meeting room?", "joining"),
    ("Can you add me to the group?", "joining"),
    ("Exiting the virtual conference.", "leaving"),
    ("I need to leave the call now.", "leaving"),
    ("Leaving the chat room.", "leaving"),
    ("Create a new public chat room.", "creating"),
    ("Let's create a channel for discussions.", "creating"),
    ("I want to create a separate room for this project.", "creating"),
    ("How do I mute myself in the meeting?", "joining"),
    ("I want to mute my audio during the conference.", "joining"),
    ("How do I turn off my video?", "joining"),
    ("Can we end the call now?", "leaving"),
    ("I'm ending the meeting.", "leaving"),
    ("Let's finish this conference.", "leaving"),
    ("Create a new voice call room.", "creating"),
    ("Can you make a room for a one-on-one call?", "creating"),
    ("I need to set up a meeting room for the team.", "creating"),
    ("How do I share my screen?", "joining"),
    ("Can you show me how to share the screen in the meeting?", "joining"),
    ("I want to share my screen with the group.", "joining"),
]
additional_training_data1 = [
    ("How do I delete a chatroom?", "deleting"),
    ("Is there a way to remove a chatroom?", "deleting"),
    ("Can you help me delete a group?", "deleting"),
    ("Closing the chatroom.", "deleting"),
    ("I need to remove this chatroom.", "deleting"),
    ("Deleting the group chat.", "deleting"),
    ("How can I archive a chatroom?", "deleting"),
    ("Can you show me the steps to delete a chatroom?", "deleting"),
    ("I want to close this room permanently.", "deleting"),
]

training_data = training_data + additional_training_data + additional_training_data1
shuffle(training_data)


In [122]:
X = [x[0] for x in training_data]
y = [x[1] for x in training_data]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [123]:
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [124]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [125]:
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train_encoded)

In [None]:
input_text = "Can I join the meeting room?"
preprocessed_text = preprocess_text(input_text)

In [137]:
y_pred_encoded = classifier.predict(X_test_vectorized)
accuracy = accuracy_score(y_test_encoded, y_pred_encoded)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 87.50%


In [139]:
user_input = input("Enter your query: ")
preprocess_input = preprocess_text(user_input)
input_vector = vectorizer.transform([preprocess_input])
predicted_encoded_label = classifier.predict(input_vector)
predicted_intent = label_encoder.inverse_transform(predicted_encoded_label)[0]
predicted_intent

'joining'