In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

# Load the dataset from the uploaded file
try:
    df = pd.read_csv('symbiosis_mba_unique_faq.csv')
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: 'symbiosis_mba_unique_faq.csv' not found.")
    print("Please make sure the file is in the same directory as your Jupyter notebook.")
    df = None

if df is not None:
    # --- 1. Text Preprocessing ---
    def preprocess_text(text):
        """
        This function cleans up the text by:
        1. Converting it to lowercase.
        2. Removing punctuation.
        """
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        return text

    # Apply the preprocessing to all questions in your dataset
    df['Processed_Question'] = df['Question'].apply(preprocess_text)

    # --- 2. TF-IDF Vectorization ---
    # Create a TF-IDF Vectorizer to convert text to numerical data
    vectorizer = TfidfVectorizer()

    # Fit and transform the processed questions
    question_vectors = vectorizer.fit_transform(df['Processed_Question'])


    # --- 3. Chatbot Response Function ---
    def get_response(user_input):
        """
        This function finds the most relevant answer from the dataset
        for a given user question.
        """
        # Preprocess the user's input question
        processed_user_input = preprocess_text(user_input)

        # Convert the user's question into a vector
        user_vector = vectorizer.transform([processed_user_input])

        # Calculate the cosine similarity between the user's question and all dataset questions
        similarities = cosine_similarity(user_vector, question_vectors)

        # Find the index of the most similar question
        most_similar_index = similarities.argmax()

        # Retrieve the corresponding answer
        answer = df['Answer'].iloc[most_similar_index]

        return answer

    # --- 4. Running the Chatbot ---
    print("\nChatbot is ready! Type 'quit' to exit.")

    while True:
        user_question = input("You: ")
        if user_question.lower() == 'quit':
            break
        response = get_response(user_question)
        print(f"Bot: {response}")

Dataset loaded successfully!

Chatbot is ready! Type 'quit' to exit.


KeyboardInterrupt: Interrupted by user

In [3]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.46.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting audioop-lts<1.0 (from gradio)
  Downloading audioop_lts-0.2.2-cp313-abi3-win_amd64.whl.metadata (2.0 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading Brotli-1.1.0-cp313-cp313-win_amd64.whl.metadata (5.6 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.117.1-py3-none-any.whl.metadata (28 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.1-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.13.1 (from gradio)
  Downloading gradio_client-1.13.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting huggingface-hub<1.0,>=0.33.5 (from gradio)
  Downloading huggingface_hub-0.35.0-py3-none-any.whl.metadata (14 kB)
Collecting orjson~=3.0 (from gradio)
  D

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import gradio as gr

# --- All the backend code from before ---

# Load the dataset
try:
    df = pd.read_csv('symbiosis_mba_unique_faq.csv')
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: 'symbiosis_mba_unique_faq.csv' not found.")
    df = None

if df is not None:
    # Text Preprocessing
    def preprocess_text(text):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        return text

    df['Processed_Question'] = df['Question'].apply(preprocess_text)

    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer()
    question_vectors = vectorizer.fit_transform(df['Processed_Question'])

    # Chatbot Response Function
    def get_response(user_input):
        processed_user_input = preprocess_text(user_input)
        user_vector = vectorizer.transform([processed_user_input])
        similarities = cosine_similarity(user_vector, question_vectors)
        most_similar_index = similarities.argmax()
        answer = df['Answer'].iloc[most_similar_index]
        return answer

    # --- New Frontend Code ---
    # This function is a simple wrapper for Gradio
    def chatbot_interface(message, history):
        return get_response(message)

    # Create and launch the Gradio Chat Interface
    iface = gr.ChatInterface(
        fn=chatbot_interface,
        title="SCIT College Chatbot",
        description="Ask me any questions about the Symbiosis MBA program."
    )
    iface.launch()

Dataset loaded successfully!


  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
