In [None]:
#pip install langchain langchain-google-genai google-generativeai faiss-cpu python-dotenv tiktoken

!pip install langchain langchain-google-genai google-generativeai faiss-cpu python-dotenv tiktoken langchain-community

In [None]:
import os
#from dotenv import load_dotenv

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import google.generativeai as genai # For configuring the API key globally


In [None]:
# --- 1. Load Environment Variables (API Key) ---
# Assuming you have the necessary libraries for interacting with Gemini already installed
from google.colab import auth
#auth.authenticate_user()

import google.generativeai as genai

# Replace "YOUR_API_KEY" with your actual API key for Gemini
genai.configure(api_key="API Key")
google_api_key="API Key"

In [None]:
# --- 2. Load Knowledge Base ---
print("Loading documents...")
# loader = TextLoader("/content/") # This was trying to load a directory
loader = TextLoader("/content/cont_map.txt") # Provide the path to your text file
documents = loader.load()
if not documents:
    raise ValueError("No documents loaded. Check the file path and content.")
print(f"Loaded {len(documents)} document(s).")

In [None]:
# --- 3. Split Documents into Chunks ---
print("Splitting documents into chunks...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
)
chunks = text_splitter.split_documents(documents)
if not chunks:
    raise ValueError("Document splitting resulted in no chunks.")
print(f"Split into {len(chunks)} chunks.")


In [None]:
# --- 4. Create Embeddings and Vector Store ---
print("Creating embeddings and vector store...")
# Using Google's embedding model
# Make sure to use a model that supports embedding, e.g., "models/embedding-001"
# List available models:
# for m in genai.list_models():
#   if 'embedContent' in m.supported_generation_methods:
#     print(m.name)
embeddings_model_name = "models/embedding-001"
embeddings_model = GoogleGenerativeAIEmbeddings(model=embeddings_model_name, google_api_key=google_api_key)

# Using FAISS as the vector store
try:
    vectorstore = FAISS.from_documents(documents=chunks, embedding=embeddings_model)
    print("Vector store created successfully.")
except Exception as e:
    print(f"Error creating vector store: {e}")
    print("This might be due to API restrictions or an issue with the embedding model.")
    print("Ensure your Google AI API key is valid and has the Generative Language API enabled.")
    exit()


In [None]:
# --- 5. Create Retriever ---
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 3}
)
print("Retriever created.")

# --- 6. Define the LLM (Gemini) and Prompt Template ---
print("Defining LLM and prompt template...")
# Using Gemini Pro model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=google_api_key,
    temperature=0.7,
    # Gemini models can be sensitive to system prompts.
    # Sometimes, converting the system message to a human message works better or is required.
    # However, recent LangChain versions handle this better.
    # convert_system_message_to_human=True # Uncomment if you face issues with system prompts
)

prompt_template = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer based on the context, just say that you don't know.
Do not try to make up an answer.
Use three sentences maximum and keep the answer concise.

Context:
{context}

Question:
{question}

Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt_template)



In [None]:
# --- 7. Create the RAG Chain ---
print("Creating RAG chain...")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
print("RAG chain created.")



In [None]:
# --- 8. Ask Questions! ---
def ask_question(query):
    print(f"\n❓ Question: {query}")
    try:
        answer = rag_chain.invoke(query)
        print(f"✅ Answer: {answer}")
    except Exception as e:
        print(f"Error during RAG chain invocation: {e}")
        if "blocked" in str(e).lower() or "safety" in str(e).lower():
            print("The response might have been blocked due to safety settings. ")
            print("You can try adjusting safety settings in the ChatGoogleGenerativeAI constructor if needed for your use case,")
            print("or rephrasing the query/context if it triggered a safety filter.")


print("\n--- Starting Q&A ---")
ask_question("Explain memory mapping?")
ask_question("What is available DATA memory?")
ask_question("How many segment")
ask_question("What is induction?") # This should result in "I don't know"

print("\n--- Q&A Finished ---")

# To allow for interactive questioning:
# print("\nEnter your questions (type 'exit' to quit):")
# while True:
#     user_query = input("> ")
#     if user_query.lower() == 'exit':
#         break
#     if user_query.strip():
#         ask_question(user_query)
#     else:
#         print("Please enter a question.")

--- Starting Q&A ---

 Question: Explain memory mapping?

 Answer: I am sorry, but the context provided does not have information about memory mapping.

 Question: What is available DATA memory?

 Answer: Available DATA memory is 41'548 bytes, with an additional 164 absolute bytes.

 Question: How many segment

 Answer: The context mentions segment part 152, segment part 161, segment part 146, segment part 151, segment part 143, and segment part 144. Therefore, the context mentions six segments.

 Question: What is induction?

 Answer: I am sorry, but the provided context does not contain information about induction. Therefore, I cannot answer the question.

--- Q&A Finished ---