<a href="https://colab.research.google.com/github/alsedawy/sedawy/blob/main/Research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Install all required packages in one line
!pip install -q langchain langchain-openai chromadb sentence-transformers langchain-text-splitters

# Import core libraries from specific packages
import os
from google.colab import userdata
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI

# Import LCEL components from langchain_core
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# OPENAI_API_KEY is added Secrets panel
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')


# 1. Load Document
file_path = "IT_Q&A.txt"
# Using latin-1 encoding to resolve the UnicodeDecodeError
loader = TextLoader(file_path, encoding="latin-1")
documents = loader.load()

# 2. Split Text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)
texts = text_splitter.split_documents(documents)
print(f"✅ Documents successfully split into {len(texts)} chunks ready for indexing.")

# 3. Embedding and Indexing
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5"
)

# Create ChromaDB vector store and index the texts
vectorstore = Chroma.from_documents(
    documents=texts,
    embedding=embeddings,
    persist_directory="./chroma_db"
)
print("✅ Technical knowledge base indexing successfully completed!")

# 1. Setup LLM and Retriever
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
retriever = vectorstore.as_retriever()

# 2. Define the Prompt Template
template = """You are an expert AI assistant specializing in Information Technology.
Answer the following question based ONLY on the provided context.
If you cannot find the answer in the context, state: "I could not find the answer in the knowledge base."

Question: {question}
Context: {context}
"""
prompt = ChatPromptTemplate.from_template(template)

# 3. Build the RAG Chain using LCEL
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# 4. Ask a Question and Get the Answer
question = "What are the steps to secure a Wi-Fi network?"
print("\n========================================")
print(f"Question: {question}")
print("----------------------------------------")
print("Answer:")
# Note: This line will fail with RateLimitError if your OpenAI account has no quota.
print(rag_chain.invoke(question))
print("========================================")



✅ Documents successfully split into 386 chunks ready for indexing.
✅ Technical knowledge base indexing successfully completed!

Question: What are the steps to secure a Wi-Fi network?
----------------------------------------
Answer:


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}