## Question Answering

In [1]:
documents = [
    "Artificial Intelligence (AI) is a branch of computer science that aims to create machines that can perform tasks that normally require human intelligence. Examples include natural language processing, computer vision, and robotics.",
        "Python is a high-level, interpreted programming language known for its readability and versatility. It supports multiple programming paradigms including procedural, object-oriented, and functional programming.",
        "The Solar System consists of the Sun and all celestial bodies that orbit it, including planets, moons, asteroids, and comets. The eight major planets are Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune.",
        "World War II was a global conflict that lasted from 1939 to 1945, involving most of the world's nations. It was primarily fought between the Allies and the Axis powers and resulted in significant political and social changes worldwide."
]

In [2]:
import re
import numpy as np
from collections import Counter

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text.split()

tokenized_docs = [preprocess(doc) for doc in documents]
vocab = sorted(list(set([w for doc in tokenized_docs for w in doc])))

def vectorize(doc_tokens, vocab):
    vec = [0]*len(vocab)
    count = Counter(doc_tokens)
    for i, w in enumerate(vocab):
        vec[i] = count.get(w, 0)
    return vec

doc_vectors = [vectorize(doc, vocab) for doc in tokenized_docs]


### Similarity Based Retrieval

In [3]:
def cosine_sim(vec1, vec2):
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    if np.linalg.norm(vec1)==0 or np.linalg.norm(vec2)==0:
        return 0
    return np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))

def retrieve(query, doc_vectors, documents, vocab):
    query_tokens = preprocess(query)
    query_vec = vectorize(query_tokens, vocab)
    sims = [cosine_sim(query_vec, doc_vec) for doc_vec in doc_vectors]
    best_idx = np.argmax(sims)
    return documents[best_idx], sims[best_idx]


### Conditional Answering

In [15]:


# Retrieve a relevant document based on keywords
def retrieve_document(query):
    query_lower = query.lower()
    for doc in documents:
        if any(word in doc.lower() for word in query_lower.split()):
            return doc, 1.0  # placeholder similarity
    return None, 0.0

# Conditional question answering
def answer_question(query, retrieved_doc):
    if retrieved_doc is None:
        return "No relevant information found."

    query_lower = query.lower()
    doc_lower = retrieved_doc.lower()

    if "who" in query_lower:
        if "python" in doc_lower:
            return "Python was created by Guido van Rossum."
        elif "ai" in doc_lower:
            return "AI refers to machines that can perform tasks requiring human intelligence."
    elif "what" in query_lower:
        if "ai" in doc_lower:
            return "AI is a branch of computer science that creates intelligent machines."
        elif "python" in doc_lower:
            return "Python is a high-level, interpreted programming language."
        elif "solar system" in doc_lower:
            return "The Solar System consists of the Sun and all celestial bodies that orbit it."
        elif "world war ii" in doc_lower:
            return "World War II was a global conflict from 1939 to 1945."
    elif "where" in query_lower:
        if "photosynthesis" in doc_lower:
            return "Photosynthesis occurs in the chloroplasts of plant cells."
        elif "solar system" in doc_lower:
            return "All planets orbit the Sun."
    elif "when" in query_lower:
        if "world war ii" in doc_lower:
            return "World War II lasted from 1939 to 1945."

    return retrieved_doc  # fallback

# Simple RAG function
def simple_rag(query):
    retrieved_doc, sim = retrieve_document(query)
    ans = answer_question(query, retrieved_doc)
    print(f"Query: {query}")
    print(f"Retrieved Document (Sim={sim:.2f}): {retrieved_doc}")
    print(f"Answer: {ans}\n")

# Example queries
simple_rag("Who created Python?")
simple_rag("When did World War II happen?")
simple_rag("What is AI?")
simple_rag("Where does photosynthesis occur?")
simple_rag("What bodies orbit the Sun?")




Query: Who created Python?
Retrieved Document (Sim=1.00): Python is a high-level, interpreted programming language created by Guido van Rossum.
Answer: Python was created by Guido van Rossum.

Query: When did World War II happen?
Retrieved Document (Sim=1.00): World War II was a global conflict from 1939 to 1945.
Answer: World War II lasted from 1939 to 1945.

Query: What is AI?
Retrieved Document (Sim=1.00): Python is a high-level, interpreted programming language created by Guido van Rossum.
Answer: Python is a high-level, interpreted programming language.

Query: Where does photosynthesis occur?
Retrieved Document (Sim=1.00): Photosynthesis occurs in the chloroplasts of plant cells.
Answer: Photosynthesis occurs in the chloroplasts of plant cells.

Query: What bodies orbit the Sun?
Retrieved Document (Sim=1.00): The Solar System consists of the Sun and all celestial bodies that orbit it.
Answer: The Solar System consists of the Sun and all celestial bodies that orbit it.

