In [1]:
import pandas as pd  # For data manipulation
import spacy  # For natural language processing
from sklearn.feature_extraction.text import TfidfVectorizer  # For TF-IDF vectorization
from sklearn.metrics.pairwise import cosine_similarity  # For cosine similarity

In [2]:
data = pd.read_csv('qa.csv')

In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
data['Question'] = data['Question'].fillna('')

In [5]:
def preprocess_text(text):
    doc = nlp(text)  
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens)

In [6]:
data['Question'] = data['Question'].apply(preprocess_text)

In [7]:
vectorizer = TfidfVectorizer()  
X = vectorizer.fit_transform(data['Question'])  

In [8]:
def answer_question(question):
    question = preprocess_text(question)  
    question_vec = vectorizer.transform([question])  
    similarities = cosine_similarity(question_vec, X)  
    most_similar_index = similarities.argmax()  
    return data.iloc[most_similar_index]['Answer']  

In [9]:
while True:
    user_question = input("Ask a question (type 'exit' to quit): ")  
    if user_question.lower() == 'exit':  
        break  
    answer = answer_question(user_question)  
    print("Answer:", answer)  

Ask a question (type 'exit' to quit): what is black hole?
Answer:  and vice versa.
Ask a question (type 'exit' to quit): exit
