In [1]:
# ✅ Step 1: Install all necessary dependencies (fixed for LangChain v0.1+)
!pip install -q langchain langchain-community faiss-cpu sentence-transformers transformers

# ✅ Step 2: Offline dummy dataset (no download required)
documents = [
    "The sun is the star at the center of the Solar System. It is a nearly perfect sphere of hot plasma, heated by nuclear fusion reactions.",
    "The water cycle describes how water evaporates from the surface of the earth, rises into the atmosphere, cools and condenses into rain or snow.",
    "Mahatma Gandhi was an Indian lawyer, anti-colonial nationalist and political ethicist who led the successful campaign for India's independence from British rule.",
    "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods from carbon dioxide and water.",
    "A black hole is a region of spacetime where gravity is so strong that nothing—no particles or even electromagnetic radiation such as light—can escape from it.",
    "The French Revolution was a period of far-reaching social and political upheaval in France and its colonies that lasted from 1789 until 1799.",
    "Neurons are the fundamental units of the brain and nervous system, the cells responsible for receiving sensory input and sending motor commands.",
    "The Constitution of India is the supreme law of India. It lays down the framework demarcating fundamental political code, structure, and powers.",
    "Machine learning is a method of data analysis that automates analytical model building. It is a branch of artificial intelligence based on the idea that systems can learn from data.",
    "Einstein's theory of relativity explains how space and time are linked for objects that are moving at a consistent speed in a straight line."
]

# ✅ Step 3: Chunk and embed the dataset
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.create_documents(documents)

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embedding_model)

# ✅ Step 4: Set up the QA model and RAG chain (no API keys!)
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256)
llm = HuggingFacePipeline(pipeline=qa_pipeline)

rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())

# ✅ Step 5: Simple command-line chatbot interface
def chat():
    print("🤖 adam is ready. Ask anything! (type 'exit' to quit)\n")
    while True:
        q = input("You: ")
        if q.lower() in ["exit", "quit"]:
            print("adam: Goodbye 👋")
            break
        a = rag_chain.run(q)
        print("adam:", a)

# ✅ Step 6: Generate and save sample questions and responses
import pandas as pd
qa_pairs = [
    "What is photosynthesis?",
    "Who was Mahatma Gandhi?",
    "What is the water cycle?",
    "Define a black hole.",
    "Explain Einstein's theory of relativity.",
]

output = [(q, rag_chain.run(q)) for q in qa_pairs]
df = pd.DataFrame(output, columns=["Question", "Answer"])
df.to_excel("sample_responses.xlsx", index=False)

print("✅ Setup complete! Use chat() to begin chatting, or check sample_responses.xlsx for example output.")


  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
  llm = HuggingFacePipeline(pipeline=qa_pipeline)
  output = [(q, rag_chain.run(q)) for q in qa_pairs]


✅ Setup complete! Use chat() to begin chatting, or check sample_responses.xlsx for example output.
