<a href="https://colab.research.google.com/github/appdevnick/Colab-Notebooks/blob/main/rag_hands_on_guide.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hands-On with RAG: A Practical Guide Using Legacy Protocols

This notebook contains the exact same code presented in the [blog post](https://nickslinuxlearnings.com/posts/Hands-On-with-RAG-A-Practical-Guide/). You can run each cell to see RAG in action!

In [None]:
# Install required packages with compatible versions
!pip install -q langchain-core==0.1.21 langsmith==0.0.83 langchain-community==0.0.20 langchain-openai openai chromadb python-dotenv

In [None]:
from dotenv import load_dotenv
import os
from getpass import getpass

# For Colab, we'll use getpass instead of .env file
os.environ['OPENAI_API_KEY'] = getpass('Enter your OpenAI API key: ')

In [None]:
# Clone the repository to get the documentation files
!git clone https://github.com/appdevnick/Colab-Notebooks.git
!mv Colab-Notebooks/telecom_docs .

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import glob

# Load all detailed text files individually
docs = []
for file_path in glob.glob('telecom_docs/*detailed.txt'):
    loader = TextLoader(file_path)
    docs.extend(loader.load())

# Split into chunks - adjusted for longer, more detailed content
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,  # Increased for more context
    chunk_overlap=400,  # Increased to maintain context
    separators=["\n\n", "\n", " ", ""]  # More granular separation
)
texts = text_splitter.split_documents(docs)

In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma

# Create embeddings and store in vector database
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(texts, embeddings)

In [None]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import PromptTemplate

# Set up the RAG pipeline with a more detailed prompt
llm = ChatOpenAI(temperature=0.2)
retriever = vectorstore.as_retriever(
    search_kwargs={"k": 3}  # Retrieve more context
)

# Enhanced prompt template for more detailed responses
prompt = PromptTemplate.from_template(
    """Answer the question based on the following context. If the information is available in the context, provide specific details, examples, and technical specifications. If certain aspects of the answer cannot be derived from the context, clearly indicate what information is missing.

Context: {context}
Question: {question}

Answer: """
)

# Create the RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Compare RAG vs Base Model responses
def compare_responses(question):
    print(f"Question: {question}\n")
    
    # Base model response
    print("Base Model Response:")
    base_response = llm.invoke(question).content
    print(f"{base_response}\n")
    
    # RAG-enhanced response
    print("RAG Response:")
    rag_response = rag_chain.invoke(question)
    print(f"{rag_response}\n")
    
    print("-" * 80 + "\n")

# Test questions focusing on technical details and comparisons
questions = [
    "What's the T1 timer in X.25 and why does it vary between vendors?",
    "Compare NTT vs AT&T Accunet timer values and explain the reasons for their differences",
    "Explain the different frame types in X.25 and their specific roles in data transmission",
    "How does X.25 implement error recovery for satellite vs terrestrial links?",
    "What are the key differences in how AT&T Accunet and NTT handle international connections?",
    "Describe the performance impact of T1 timer settings in X.25 networks"
]

print("Testing RAG system with detailed documentation...\n")
for question in questions:
    compare_responses(question)