In [3]:
import json
import numpy as np
import speech_recognition as sr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pyttsx3
import webbrowser
import datetime
import wikipedia
import pandas as pd


In [4]:
# File paths to FAQ JSON and E-commerce CSV
faq_json_path = "Files/Ecommerce_FAQ_Chatbot_dataset.json"
csv_file_path = "Files/Final Dataset.csv"

# Load the FAQ dataset from JSON
with open(faq_json_path) as file:
    faq_data = json.load(file)

# Handling large CSV files and potential errors during reading
try:
    ecommerce_data = pd.read_csv(
        csv_file_path,
        encoding='utf-8',
        on_bad_lines='skip',
        low_memory=False
    )
except pd.errors.ParserError:
    print("Parsing error while reading the CSV file. Trying to read in smaller chunks...")
    chunk_size = 10000
    try:
        ecommerce_data_chunks = pd.read_csv(csv_file_path, chunksize=chunk_size, encoding='utf-8', on_bad_lines='skip')
        ecommerce_data = pd.concat(ecommerce_data_chunks, ignore_index=True)
    except pd.errors.ParserError as e:
        print(f"Error while reading CSV in chunks: {e}")
        ecommerce_data = None


In [5]:
if ecommerce_data is not None:
    # Extract FAQ questions and answers
    questions = [item['question'] for item in faq_data['questions']]
    answers = [item['answer'] for item in faq_data['questions']]
    
    # Preprocess product titles (assuming 'Product Name' is a key column)
    ecommerce_data['processed_product_info'] = ecommerce_data['Name'].apply(lambda x: str(x).lower() if isinstance(x, str) else '')
    
    # Handle missing values by replacing NaN with empty strings
    ecommerce_data['processed_product_info'] = ecommerce_data['processed_product_info'].fillna('')

    # Preview the dataset
    print(ecommerce_data.head())


  InvoiceNo StockCode         User ID  Product ID  \
0    536365    85123A  A3EI9TX2A4MUSZ   594549507   
1    536365     71053  A2UVFEZ7UDBRAA   594481813   
2    536365    84406B  A2J3WVIQ5LO1O7   594481902   
3    536365    84029G   AJ4QIAKKHW21N  089933623X   
4    536365    84029E  A1PY7ODDSRIFGV   879393742   

                                    Name       Brand  Price Category  \
0                     Aabad Cow Ghee Jar       Aabad  615.0  Grocery   
1                   Aabad Cow Ghee Pouch       Aabad  599.0  Grocery   
2                  Aabad Desi Ghee Pouch       Aabad  599.0  Grocery   
3             Aashirvaad Multigrain Atta  Aashirvaad  337.0  Grocery   
4  Aashirvaad Select Sharbati Wheat Atta  Aashirvaad  321.0  Grocery   

        SubCategory Quantity  \
0  Ghee & Vanaspati        1   
1  Ghee & Vanaspati        1   
2  Ghee & Vanaspati        1   
3   Flours & Grains        5   
4   Flours & Grains        5   

                                         Description  \

In [6]:
    # Vectorize FAQ questions
    vectorizer = TfidfVectorizer(stop_words='english')
    faq_tfidf = vectorizer.fit_transform(questions)

    # Vectorize product titles from CSV
    product_vectorizer = TfidfVectorizer(stop_words='english')
    product_tfidf = product_vectorizer.fit_transform(ecommerce_data['processed_product_info'])


In [7]:
    # Function to find the best FAQ match
    def get_faq_match(query):
        query_vec = vectorizer.transform([query])
        similarity_scores = cosine_similarity(query_vec, faq_tfidf).flatten()
        best_idx = np.argmax(similarity_scores)
        return best_idx, similarity_scores[best_idx]

    # Function to find the best product match
    def get_product_match(query):
        query_vec = product_vectorizer.transform([query])
        similarity_scores = cosine_similarity(query_vec, product_tfidf).flatten()
        best_idx = np.argmax(similarity_scores)
        return best_idx, similarity_scores[best_idx]


In [8]:
    # Step 3: Speech Recognition Setup
    recognizer = sr.Recognizer()

    def listen_to_speech():
        with sr.Microphone() as source:
            print("Listening...")
            audio = recognizer.listen(source)

            try:
                query = recognizer.recognize_google(audio)
                print(f"You said: {query}")
                return query
            except sr.UnknownValueError:
                print("Sorry, I couldn't understand that. Could you please repeat?")
                return None
            except sr.RequestError as e:
                print(f"Could not request results; {e}")
                return None


In [9]:
    # Step 4: Text-to-Speech Setup
    engine = pyttsx3.init()

    def speak_text(text):
        engine.say(text)
        engine.runAndWait()


In [10]:
    def tell_day():
        day = datetime.datetime.today().weekday() + 1
        Day_dict = {1: 'Monday', 2: 'Tuesday', 3: 'Wednesday', 4: 'Thursday', 5: 'Friday', 6: 'Saturday', 7: 'Sunday'}
        day_of_the_week = Day_dict.get(day, "Unknown")
        print(day_of_the_week)
        speak_text("The day is " + day_of_the_week)

    def tell_time():
        time = str(datetime.datetime.now())
        hour = time[11:13]
        minute = time[14:16]
        print(f"The time is {hour} Hours and {minute} Minutes")
        speak_text(f"The time is {hour} Hours and {minute} Minutes")

    def hello():
        speak_text("Hello! I am your virtual assistant. How can I assist you today?")


In [11]:
    def virtual_assistant():
        hello()

        while True:
            print("\nYou can either speak or type your question (type 'exit' to quit).")
            speak_text("You can either speak or type your question. Type exit to quit.")

            mode = input("Type 's' for speech input or 't' for text input: ").lower()

            if mode == 's':
                query = listen_to_speech()
                if not query:
                    continue
            elif mode == 't':
                query = input("You: ")
            else:
                print("Invalid mode selected. Please choose 's' for speech or 't' for text.")
                speak_text("Invalid mode selected. Please choose s for speech or t for text.")
                continue

            if query.lower() in ['exit', 'quit', 'bye']:
                print("Assistant: Goodbye! Have a great day!")
                speak_text("Goodbye! Have a great day!")
                break

            # Handle predefined commands like opening websites, Wikipedia, etc.
            if "open geeksforgeeks" in query:
                speak_text("Opening GeeksforGeeks")
                webbrowser.open("www.geeksforgeeks.com")
                continue
            elif "open google" in query:
                speak_text("Opening Google")
                webbrowser.open("www.google.com")
                continue
            elif "which day it is" in query:
                tell_day()
                continue
            elif "tell me the time" in query:
                tell_time()
                continue
            elif "from wikipedia" in query:
                speak_text("Checking Wikipedia")
                query = query.replace("from wikipedia", "")
                result = wikipedia.summary(query, sentences=4)
                speak_text("According to Wikipedia")
                speak_text(result)
                print(result)
                continue
            elif "tell me your name" in query:
                speak_text("I am Jarvis, your desktop assistant")
                continue

            # First, check if the query is product-related
            product_idx, product_score = get_product_match(query)
            if product_score > 0.2:  # Adjust threshold as needed
                product_info = ecommerce_data.iloc[product_idx]
                product_response = f"Product: {product_info['Product Name']}, Price: {product_info['Price']}, Availability: {product_info.get('Availability', 'Unknown')}"
                print(f"Assistant: {product_response}")
                speak_text(product_response)
                continue

            # If no product match, check FAQ match
            faq_idx, faq_score = get_faq_match(query)
            if faq_score > 0.2:  # Adjust threshold as needed
                faq_answer = answers[faq_idx]
                print(f"Assistant: {faq_answer}")
                speak_text(faq_answer)
            else:
                response = "I'm sorry, I couldn't find a relevant answer. Can you rephrase your question or contact our support team?"
                print(f"Assistant: {response}")
                speak_text(response)


In [None]:
# Run the Virtual Assistant
if __name__ == '__main__':
    virtual_assistant()



You can either speak or type your question (type 'exit' to quit).
Assistant: I'm sorry, I couldn't find a relevant answer. Can you rephrase your question or contact our support team?

You can either speak or type your question (type 'exit' to quit).
Assistant: We accept major credit cards, debit cards, and PayPal as payment methods for online orders.

You can either speak or type your question (type 'exit' to quit).
