## 使用通义千问(Qwen)作为LLM的RAG实现

In [None]:
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import Tongyi

In [2]:
# 1. 加载pdf文档
def load_documents(pdf_paths):
    documents = []
    for path in pdf_paths:
        try:
            loader = PyPDFLoader(path)
            documents.extend(loader.load())
            print(f"成功加载：{path}")
        except Exception as e:
            print(f"加载{path}失败：{str(e)}")
    return documents

In [3]:
# 2. 分割文本
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_length=100,
        separator=["\n\n", "\n", "。", "！", "？", "："]
    )
    return text_splitter.split_documents(documents)

In [None]:
# 3. 创建向量存储
def create_vector_store(documents, embedding_model):
    return FAISS.from_documents(documents, embedding_model)