In [None]:
! pip -q install langchain_community tiktoken langchain-deepseek langchainhub chromadb langchain dotenv bs4 langchain-text-splitters langchain-ollama

In [None]:
import os
from dotenv import load_dotenv
from langchain_ollama import OllamaEmbeddings
from langchain_deepseek import ChatDeepSeek


load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')

# Loading my LLM API Key
DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')

EMBEDDING_MODEL_NAME = "qwen3-embedding:0.6b"
DEEPSEEK_MODEL_NAME='deepseek-chat'

OLLAMA_EMBEDDING = OllamaEmbeddings(model=EMBEDDING_MODEL_NAME)
DEEPSEEK_LLM = ChatDeepSeek(model=DEEPSEEK_MODEL_NAME, temperature=0, api_key=DEEPSEEK_API_KEY)


## Part 1: Overview

Full RAG Workflow

In [None]:
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200

In [None]:
import bs4
from langchain_classic import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

## Indexing

# Load Documents
loader = WebBaseLoader(
    web_path=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()
## Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
splits = text_splitter.split_documents(docs)

# Embed
embeddings = OLLAMA_EMBEDDING
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

retriever = vectorstore.as_retriever()


## Retrieval and Generation

# Prompt
prompt = hub.pull('rlm/rag-prompt')

# LLM
llm = DEEPSEEK_LLM

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("What is Task Decomposition?")

## Deep dive: Indexing

In [None]:
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [None]:
# Understanding the number of tokens:

import tiktoken

def number_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.get_encoding(encoding_name)
    return len(encoding.encode(string))

number_tokens_from_string(question, 'cl100k_base')


In [None]:
embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL_NAME)
query_result = embeddings.embed_query(question)
document_result = embeddings.embed_query(document)

print(len(query_result), len(document_result))


In [None]:
import numpy as np

def cosine_similarity_search(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity_search(query_result, document_result)
print('cosine search:', similarity)

In [None]:
## Loading documents (a blog post)

import bs4
from langchain_community.document_loaders import WebBaseLoader

# Load Documents
loader = WebBaseLoader(
    web_path=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

In [None]:
# Splitting
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50)
splits = text_splitter.split_documents(docs)

In [None]:
# Indexing
from langchain_community.vectorstores import Chroma

ollama_embedding = OLLAMA_EMBEDDING
vectorstore = Chroma.from_documents(documents=splits, embedding=ollama_embedding)

retriever = vectorstore.as_retriever()

## Deep Dive: Retrieval

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={'k':1})

docs = retriever.invoke("What is task decomposition?")

len(docs)


## Generation

In [None]:
from langchain_core.prompts import ChatPromptTemplate

template = """
Answer the following question only using the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

In [None]:
# LLM
llm = DEEPSEEK_LLM

In [None]:
# Chain

chain = prompt | llm

In [None]:
chain.invoke({'context': docs, 'question': "What is task decomposition?"})

In [None]:
## More efficient way using RAG
from langchain_classic import hub

prompt_hub_rag = hub.pull('rlm/rag-prompt')

In [None]:
prompt_hub_rag

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {'context': retriever, "question": RunnablePassthrough()}
    | prompt_hub_rag
    |llm
    |StrOutputParser()
)

rag_chain.invoke('What is Task Decomposition?')