# Generating Chatbot Response Based on Cosine Similartiy

In [8]:
import nltk
import nltk
nltk.download('wordnet')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Hoashalarajh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [9]:
# Preprocess the response sentences and user input sentence
response_sentences = ['netflix is better than other online streamings', 'Keeping faith is a good movie', 'India is a largest democraic country in South Asia']
user_input = "Which is a good online streaming platform among available"
lemmatizer = nltk.stem.WordNetLemmatizer()
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
response_tokens = [nltk.word_tokenize(sent.lower().translate(remove_punct_dict)) for sent in response_sentences]
user_input_tokens = nltk.word_tokenize(user_input.lower().translate(remove_punct_dict))
response_lemmatized = [[lemmatizer.lemmatize(word) for word in tokens] for tokens in response_tokens]
user_input_lemmatized = [lemmatizer.lemmatize(word) for word in user_input_tokens]

In [10]:
# Convert the preprocessed sentences to numerical vectors using TF-IDF
corpus = [' '.join(sent) for sent in response_lemmatized]
vectorizer = TfidfVectorizer()
response_vectors = vectorizer.fit_transform(corpus)
user_input_vector = vectorizer.transform([' '.join(user_input_lemmatized)])

In [11]:
# Calculate cosine similarity between user input and response sentences
similarity_scores = cosine_similarity(user_input_vector, response_vectors)[0]

In [12]:
# Select the response with the highest cosine similarity score
index_of_max_score = similarity_scores.argmax()
selected_response = response_sentences[index_of_max_score]

In [13]:
print (f"Maximum similarity score is : {max(similarity_scores)}")

Maximum similarity score is : 0.5093988510338425


In [14]:
# printing the selected response from the response list
print (f"The Selected response from the response list is : {selected_response}")

The Selected response from the response list is : netflix is better than other online streamings
