# 1. Load packge and env

In [1]:
import os
from dotenv import load_dotenv

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub

# Load environment variables from .env file
load_dotenv()

True

# 2. Load document

In [2]:
pdf_path = "./retrieval-augmented_generation.pdf"
loader = PyPDFLoader(file_path=pdf_path)
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
split_documents = text_splitter.split_documents(documents)

# 3. load embedding model and create Faiss databese

In [3]:
#embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings(model="BAAI/bge-small-en-v1.5")
vectorstore = FAISS.from_documents(split_documents, embeddings)

# Save the vector store
vectorstore.save_local("faiss_index")



  from .autonotebook import tqdm as notebook_tqdm


# 4.Load local database

In [4]:
# Load the vector store
new_vectorstore = FAISS.load_local(
       "faiss_index", embeddings, allow_dangerous_deserialization=True
   )

# 5.deploy LLM with api and create retrieval question with prompt template

In [None]:
from langchain.prompts import PromptTemplate
#load model
api_key = os.getenv("DEEPSEEK_API_KEY")
model=ChatOpenAI(model="deepseek-chat",api_key=api_key,base_url="https://api.deepseek.com")

# Load the prompt from the hub
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

# 定义 PromptTemplate
retrieval_qa_chat_prompt = PromptTemplate(
    input_variables=["context", "input"],
    template="""
You are an expert assistant. Use the following documents to answer the user's question.
Documents: {context}
Question: {input}
Answer:
"""
)

combine_docs_chain = create_stuff_documents_chain(
       model, retrieval_qa_chat_prompt
   )

retrieval_chain = create_retrieval_chain(
       new_vectorstore.as_retriever(), combine_docs_chain
   )



In [23]:
retrieval_qa_chat_prompt.pretty_print()


You are an expert assistant. Use the following documents to answer the user's question.
Documents: [33;1m[1;3m{context}[0m
Question: [33;1m[1;3m{input}[0m
Answer:



# 6.Search content

In [None]:
import pprint
res = retrieval_chain.invoke({"input": "Give me the gist of Retrieval-Augmented Generation (RAG) in 3 sentences,and translate it to Chinese."})
pprint.pprint(res["answer"])

('**Retrieval-Augmented Generation (RAG) combines parametric memory '
 '(pre-trained seq2seq models) with non-parametric memory (e.g., a Wikipedia '
 'vector index) to enhance language generation. It retrieves relevant '
 'documents during inference, improving factual accuracy and specificity '
 'compared to purely parametric models like BART, and allows dynamic updates '
 'by swapping the retrieval index without retraining. RAG achieves '
 'state-of-the-art results on open-domain QA and other knowledge-intensive '
 'tasks by marginalizing over retrieved passages in two variants: RAG-Sequence '
 '(single document per output) and RAG-Token (multiple documents per '
 'token).**  \n'
 '\n'
 '**检索增强生成（RAG）结合了参数化记忆（预训练的seq2seq模型）与非参数化记忆（如维基百科向量索引）以提升语言生成能力。它在推理时检索相关文档，相比纯参数化模型（如BART）提高了事实准确性和细节丰富度，并支持通过替换检索索引动态更新模型而无需重新训练。RAG通过两种变体（RAG-Sequence单文档输出和RAG-Token多文档分token）对检索内容进行边缘化处理，在开放域QA等知识密集型任务中达到领先水平。**')
