In [None]:
! pip -q install langchain_community tiktoken langchain-deepseek langchainhub chromadb langchain dotenv bs4 langchain-text-splitters langchain-ollama

In [None]:
import os
from dotenv import load_dotenv
from langchain_ollama import OllamaEmbeddings
from langchain_deepseek import ChatDeepSeek

load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')

# Loading my LLM API Key

EMBEDDING_MODEL_NAME = "qwen3-embedding:0.6b"
DEEPSEEK_MODEL_NAME='deepseek-chat'

OLLAMA_EMBEDDING = OllamaEmbeddings(model=EMBEDDING_MODEL_NAME)
DEEPSEEK_LLM = ChatDeepSeek(model=DEEPSEEK_MODEL_NAME, temperature=0, api_key=os.getenv('DEEPSEEK_API_KEY'))


In [None]:
import bs4
from langchain_classic import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

## Indexing

# Load Documents
loader = WebBaseLoader(
    web_path=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()
## Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
splits = text_splitter.split_documents(docs)

# Embed
embeddings = OLLAMA_EMBEDDING
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

retriever = vectorstore.as_retriever()

### Multi query

In [None]:
from langchain_classic.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

template = """
You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database.
By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.

Provide these alternative questions separated by newlines. Original question: {question}
"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives
    | DEEPSEEK_LLM
    | StrOutputParser()
    | (lambda x: x.split('\n'))
)

In [None]:
from langchain_classic.load import dumps, loads

def get_unique_union(documents: list[list]):
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))

    return [loads(doc) for doc in unique_docs]

question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({'question': question})

len(docs)

In [None]:
from operator import itemgetter

template = """
Answer the following question based on this context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
final_rag_chain = (
    {'context': retrieval_chain, 'question': itemgetter('question')}
    | prompt
    | DEEPSEEK_LLM
    | StrOutputParser()
)

final_rag_chain.invoke({'question': question})

### Rag Defusion

In [None]:
from langchain_classic.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# RAG Fusion related
template = """
You are a helpful assistant language that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n

Output (4 queries):
"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives
    | DEEPSEEK_LLM
    | StrOutputParser()
    | (lambda x: x.split('\n'))
)

In [None]:
from langchain_classic.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_score = {}

    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_score:
                fused_score[doc_str] = 0
            prev_score = fused_score[doc_str]
            fused_score[doc_str] += 1 / (rank + k)
    
    return [
        (loads(doc), score) for doc, score in sorted(fused_score.items(), key= lambda x: x[1], reverse=True)
    ]

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({'question': question})

len(docs)

In [None]:
template = """
Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_fusion_chain = (
    {'context': retrieval_chain_rag_fusion, 'question': itemgetter('question')}
    | prompt
    | DEEPSEEK_LLM
    | StrOutputParser()
)

final_rag_fusion_chain.invoke({'question': question})