## Create RAG chain alternative - Using LCEL (LangChain Expression Language)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

custom_prompt = ChatPromptTemplate.from_template("""
    Use the following context to answer the question.
    If you don't know the answer based on the context, say you don't know the answer.
    Provide specific details from the context to support the answer.
                                                 
    context : {context}
    question : {question}
    answer:
""")


In [None]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader(
    "data",
    glob="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
)

documents = loader.load()

print(f"Loaded {len(documents)} documents")
print(f"\nFirst document preview: \n")
print(documents[0].page_content[:200]+"...")

Loaded 3 documents

First document preview: 

Machine Learning Fundamentals

    Machine learning is a subset of artificial intelligence that enables systems to learn
    and improve from experience without being explicitly programmed. There are ...


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500, # Maximum size of each chunk
    chunk_overlap = 50, # Overlap between chunks to maintain context
    length_function = len,
    separators=["\n\n", "\n", ". ", " ", ""] # Hierarchy of separators
)

chunks = text_splitter.split_documents(documents)

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

persist_directory = "./chroma_db"

vector_store = Chroma.from_documents(
    documents = chunks,
    embedding = HuggingFaceEmbeddings(),
    persist_directory = persist_directory,
    collection_name = "rag_collection"
)

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
## convert a vector store to retriever

retriever = vector_store.as_retriever(
    seacrch_kwargs = {"k":3} ## Retrieve top 3 chunks
)

In [None]:
from langchain_core.documents import Document

## format the o/p document for the prompt
def format_docs(docs:Document):
    return "\n\n".join(doc.page_content for doc in docs)

In [21]:
# Loading the gemini api key
import os
from dotenv import load_dotenv
load_dotenv()

gemini = os.getenv("GEMINI_API_KEY")


# Initiating gemini

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=gemini
)

### Build a chain using LCEL

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain_lcel = (
    {"context": retriever | format_docs,
     "question": RunnablePassthrough() }
    | custom_prompt
    | llm
    | StrOutputParser()
)

In [35]:
rag_chain_lcel.invoke("What is deep Learning? ")

'Deep learning is a subset of machine learning based on artificial neural networks. These networks are inspired by the human brain and consist of layers of interconnected nodes. Deep learning has revolutionized fields like computer vision, natural language processing, and speech recognition.'