In [11]:
import pandas as pd
import numpy as np
import nltk
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import SnowballStemmer
# Download NLTK data
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [12]:

data = pd.read_csv('/content/Insurance_FAQs.csv')

In [13]:
data

Unnamed: 0,Question,Answer,Class
0,What do you mean by exclusions under the Accid...,Exclusions are situations or conditions where ...,insurance
1,Can I include my spouse in this the Accidental...,"Yes, you can include your spouse and up to 2 c...",insurance
2,How do I make a claim,You can make a claim by calling our toll-free ...,insurance
3,When does my Revive Policy for Personal Accide...,It will commence within 15 days from the date ...,insurance
4,What plans are available under the Revive Pol...,HDFC ERGO offers a wide range of sums insured ...,insurance
...,...,...,...
464,Where can I get the list of hospitals extendin...,There is no standard list of hospitals. The in...,insurance
465,For how many days can the Travel Insurance pol...,The policy may be extended for a maximum of 18...,insurance
466,What are the eligibility criteria for a Travel...,Our Travel Insurance policy is available to pe...,insurance
467,Who do I contact for a claim on the Travel Ins...,In the event of a covered emergency and to reg...,insurance


In [14]:
stemmer = SnowballStemmer(language='english')
def preprocess(text):
    tokens = nltk.word_tokenize(text.lower())
    stems = [stemmer.stem(token) for token in tokens]
    return ' '.join(stems)

data['Question'] = data['Question'].apply(preprocess)

In [16]:
tfidf_vectorizer = TfidfVectorizer()
X = tfidf_vectorizer.fit_transform(data['Question'])

In [17]:
# Train a simple chatbot using cosine similarity
def search_engine(user_input):
    user_input = preprocess(user_input)
    user_input_vector = tfidf_vectorizer.transform([user_input])

    similarities = cosine_similarity(X, user_input_vector)
    # Threshold to consider a response related
    threshold = 0.2
    related_indices = np.argwhere(similarities > threshold).flatten()
    if len(related_indices) == 0:
        return "I'm sorry, I couldn't find relevant information for your query."
    else:
        # Find the index of the most similar question
        most_similar_index = np.argmax(similarities)
        return data['Answer'].iloc[most_similar_index]

In [19]:
# Main function to interact with the chatbot
def main():
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'q':
            break
        else:
            response = search_engine(user_input)
            print("Search:", response)

if __name__ == "__main__":
    main()

You: health insurance
Search: Health insurance is a type of insurance coverage that covers the cost of an insured individual's medical and surgical expenses.
You: travel insurance
Search: Travel Insurance is not mandatory in India. However, it is mandatory in the UK and others countries including Austria, Greece, Portugal, Spain, France, Germany, Belgium, Luxembourg and the Netherlands. However, for a safe and peaceful trip, we recommend that you have a travel insurance policy no matter which country you are travelling to.
You: abc
Search: I'm sorry, I couldn't find relevant information for your query.
You: q


In [22]:
    # Save the trained components
    with open('Search_Engine_trained_components.pkl', 'wb') as f:
        pickle.dump((tfidf_vectorizer, data), f)
    print("Trained components saved successfully.")

Trained components saved successfully.
