# 1. Load packge and env

In [1]:
import os
from dotenv import load_dotenv

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub

# Load environment variables from .env file
load_dotenv()

True

# 2. Load document

In [2]:
pdf_path = "./retrieval-augmented_generation.pdf"
loader = PyPDFLoader(file_path=pdf_path)
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
split_documents = text_splitter.split_documents(documents)

# 3. load embedding model and create Faiss databese

In [3]:
#embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings(model="BAAI/bge-small-en-v1.5")
vectorstore = FAISS.from_documents(split_documents, embeddings)

# Save the vector store
vectorstore.save_local("faiss_index")



  from .autonotebook import tqdm as notebook_tqdm


# 4.Load local database

In [4]:
# Load the vector store
new_vectorstore = FAISS.load_local(
       "faiss_index", embeddings, allow_dangerous_deserialization=True
   )

# 5.deploy LLM with api and create retrieval question with prompt template

In [None]:
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")


model=ChatOpenAI(model="deepseek-chat",api_key="<your api key>",base_url="https://api.deepseek.com")

combine_docs_chain = create_stuff_documents_chain(
       model, retrieval_qa_chat_prompt
   )

retrieval_chain = create_retrieval_chain(
       new_vectorstore.as_retriever(), combine_docs_chain
   )



In [6]:
retrieval_qa_chat_prompt.pretty_print()


Answer any use questions based solely on the context below:

<context>
[33;1m[1;3m{context}[0m
</context>


[33;1m[1;3m{chat_history}[0m


[33;1m[1;3m{input}[0m


# 6.Search content

In [8]:
import pprint

res = retrieval_chain.invoke({"input": "Give me the gist of Retrieval-Augmented Generation (RAG) in 3 sentences,and translate it to Chinese."})
pprint.pprint(res["answer"])

('**Gist of RAG in 3 sentences:**  \n'
 '1. RAG combines parametric memory (pre-trained seq2seq models) and '
 'non-parametric memory (retrieval from dense vector indexes like Wikipedia) '
 'to enhance knowledge-intensive NLP tasks.  \n'
 '2. It outperforms purely parametric models in open-domain QA by generating '
 'more factual and specific responses, leveraging retrieved documents '
 'dynamically during generation.  \n'
 '3. RAG allows "hot-swapping" the retrieval index without retraining, '
 "enabling flexible updates to the model's knowledge base.  \n"
 '\n'
 '**Chinese Translation:**  \n'
 '1. '
 'RAG（检索增强生成）结合了参数化记忆（预训练的seq2seq模型）和非参数化记忆（如维基百科的密集向量检索），以提升知识密集型NLP任务的表现。  \n'
 '2. 在开放域问答任务中，它通过动态利用检索到的文档生成更准确、具体的回答，优于纯参数化模型。  \n'
 '3. RAG支持无需重新训练即可“热替换”检索索引，使模型的知识库能灵活更新。')
