In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

# Download NLTK resources (only needed the first time)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize Lemmatizer and stop words
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess(text):
    # Tokenize and clean the text
    tokens = word_tokenize(text.lower())  # Lowercase and tokenize
    tokens = [t for t in tokens if t not in stop_words and t not in string.punctuation]  # Remove stop words and punctuation
    tokens = [lemmatizer.lemmatize(t) for t in tokens]  # Lemmatize
    return tokens


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [2]:
import spacy

# Load spaCy model
nlp = spacy.load('en_core_web_sm')

def preprocess(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return tokens


In [3]:
FAQS = {
    "What is your product?": "Our product is a chatbot solution designed to automate customer support.",
    "How much does the product cost?": "The pricing depends on your requirements, but we offer several packages starting at $99/month.",
    "How can I get support?": "You can contact our support team via email at support@example.com or call our helpline.",
    "What features does the product have?": "Our chatbot has features like natural language understanding, integration with multiple platforms, and detailed analytics.",
}


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def find_best_answer(user_query, faqs):
    # Preprocess the FAQs and user query
    faq_keys = list(faqs.keys())
    documents = [preprocess(faq) for faq in faq_keys]
    user_query_processed = preprocess(user_query)

    # Use TF-IDF to measure similarity
    vectorizer = TfidfVectorizer()
    all_text = [' '.join(doc) for doc in documents + [user_query_processed]]
    tfidf_matrix = vectorizer.fit_transform(all_text)

    # Compute similarity
    similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
    best_match_idx = np.argmax(similarity_scores)

    # Return the most similar FAQ answer
    return faq_keys[best_match_idx], faqs[faq_keys[best_match_idx]]

# Example interaction
user_input = "Can you tell me about the features?"
best_question, best_answer = find_best_answer(user_input, FAQS)
print(f"Q: {best_question}\nA: {best_answer}")


Q: What features does the product have?
A: Our chatbot has features like natural language understanding, integration with multiple platforms, and detailed analytics.


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import string

# Download required NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize lemmatizer and stop words
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Preprocessing function
def preprocess(text):
    tokens = word_tokenize(text.lower())
    tokens = [t for t in tokens if t not in stop_words and t not in string.punctuation]
    tokens = [lemmatizer.lemmatize(t) for t in tokens]
    return tokens

# Sample FAQ data
FAQS = {
    "What is your product?": "Our product is a chatbot solution designed to automate customer support.",
    "How much does the product cost?": "The pricing depends on your requirements, but we offer several packages starting at $99/month.",
    "How can I get support?": "You can contact our support team via email at support@example.com or call our helpline.",
    "What features does the product have?": "Our chatbot has features like natural language understanding, integration with multiple platforms, and detailed analytics.",
}

# Function to find the best matching FAQ answer
def find_best_answer(user_query, faqs):
    faq_keys = list(faqs.keys())
    documents = [' '.join(preprocess(faq)) for faq in faq_keys]
    user_query_processed = ' '.join(preprocess(user_query))

    vectorizer = TfidfVectorizer()
    all_text = documents + [user_query_processed]
    tfidf_matrix = vectorizer.fit_transform(all_text)

    similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
    best_match_idx = np.argmax(similarity_scores)

    return faq_keys[best_match_idx], faqs[faq_keys[best_match_idx]]

# Chatbot function
def chatbot():
    print("Welcome to the FAQ chatbot! Ask me anything about our product.")

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("Chatbot: Goodbye!")
            break

        best_question, best_answer = find_best_answer(user_input, FAQS)
        print(f"Chatbot: {best_answer}")

# Run chatbot
if __name__ == "__main__":
    chatbot()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Welcome to the FAQ chatbot! Ask me anything about our product.

You: What features does the product have?
Chatbot: Our chatbot has features like natural language understanding, integration with multiple platforms, and detailed analytics.
