In [1]:
%pip install -U langchain-community sentence-transformers faiss-cpu dashscope

Looking in indexes: https://mirrors.tencent.com/pypi/simple/
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install -U langchain-core>=0.2.0 langchain-community>=0.2.0

zsh:1: 0.2.0 not found


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [4]:
import faiss
import numpy as np
import os
from dashscope import Generation
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer

# ====== 系统配置 ======
class Config:
    LAWS_FILE = "laws.txt"                # 法律条文文件
    FAISS_INDEX_DIR = "faiss_index"       # FAISS索引目录
    
    # 模型参数
    EMBEDDING_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"  # 语义编码模型
    DASHSCOPE_API_KEY = "sk-7fb2aee47f5d4531855a7ac3412249fe"    # 阿里云API密钥
    LLM_MODEL = "qwen-turbo"              # 大语言模型
    
    # 处理参数
    CHUNK_SIZE = 200                      # 文本分割长度
    TOP_K = 3                             # 检索返回条款数

from langchain_core.language_models.llms import BaseLLM
from langchain_core.outputs import LLMResult
from typing import Any, List, Optional, Dict, Mapping, Iterator

class QwenLangChainWrapper(BaseLLM):
    """LangChain 兼容的 Qwen 模型包装器"""
    
    api_key: str  # 必须声明为类属性
    
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        **kwargs: Any
    ) -> str:
        from dashscope import Generation
        response = Generation.call(
            model=Config.LLM_MODEL,
            prompt=prompt,
            api_key=self.api_key,
            max_length=500,
            top_p=0.7
        )
        return response.output.text if response.status_code == 200 else "请求失败"
    
    @property
    def _llm_type(self) -> str:
        return "qwen-turbo"
    
    def _get_identifying_params(self) -> Mapping[str, Any]:
        return {"model_type": "qwen-turbo"}
    
    def _stream(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        **kwargs: Any
    ) -> Iterator[str]:
        raise NotImplementedError("流式输出暂不支持")

# ====== 文档处理模块 ======
class LawProcessor:
    @staticmethod
    def load_and_split():
        """加载并分割法律文档"""
        if not os.path.exists(Config.LAWS_FILE):
            raise FileNotFoundError(f"法律文件 {Config.LAWS_FILE} 不存在")

        loader = TextLoader(Config.LAWS_FILE, encoding="utf-8")
        documents = loader.load()
        
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=Config.CHUNK_SIZE,
            chunk_overlap=20,
            keep_separator=True
        )
        
        split_docs = text_splitter.split_documents(documents)
        return [doc.page_content.strip() for doc in split_docs if doc.page_content.strip()]

# ====== 向量存储模块 ======
class VectorStoreManager:
    def __init__(self):
        self.encoder = SentenceTransformer(Config.EMBEDDING_MODEL)
        self.vector_store = None
        
    def init_vector_store(self, clauses):
        """初始化向量存储"""
        # 检查索引目录是否存在
        if os.path.exists(Config.FAISS_INDEX_DIR):
            # 加载已有索引
            self.vector_store = FAISS.load_local(
                folder_path=Config.FAISS_INDEX_DIR,
                embeddings=self.encoder,
                allow_dangerous_deserialization=True
            )
        else:
            # 创建新索引
            embeddings = self.encoder.encode(clauses)
            vectors = np.array(embeddings, dtype=np.float32)
            
            # 新建索引
            self.vector_store = FAISS.from_embeddings(
                text_embeddings=list(zip(clauses, vectors)),
                embedding=self.encoder
            )
            # 保存索引到目录
            self.vector_store.save_local(Config.FAISS_INDEX_DIR)
        
        return self.vector_store

# ====== 核心业务逻辑 ======
class LegalComplianceChecker:
    def __init__(self):
        # 初始化组件
        self.clauses = LawProcessor.load_and_split()
        self.vector_store = VectorStoreManager().init_vector_store(self.clauses)
        
        # 使用兼容的 LLM 实例
        self.llm = QwenLangChainWrapper(api_key=Config.DASHSCOPE_API_KEY)
        
        # 构建问答链
        self.qa_chain = self._build_qa_chain()
    
    def _build_qa_chain(self):
        """构建检索增强问答链"""
        prompt_template = """
        根据以下法律条款进行合规性判断：
        {context}
        
        待审查政策：{question}
        
        要求：
        1. 结论（合规/部分合规/不合规）
        2. 理由（50字内） 
        3. 引用条款编号
        """
        
        PROMPT = PromptTemplate(
            template=prompt_template,
            input_variables=["context", "question"]
        )
        
        return RetrievalQA.from_chain_type(
            llm=self.llm,  # 传入兼容的 LLM 实例
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(search_kwargs={"k": Config.TOP_K}),
            chain_type_kwargs={"prompt": PROMPT},
            return_source_documents=True
        )

# ====== 使用示例 ======
if __name__ == "__main__":
    # 初始化系统（首次运行会自动构建索引）
    checker = LegalComplianceChecker()
    
    # 测试案例
    test_case = "地方政府要求出租车公司必须采购指定品牌的计价器"
    
    # 执行审查
    result = checker.query(test_case)
    
    # 输出结果
    print("=== 合规性审查报告 ===")
    print(f"输入政策：{test_case}")
    print("\n" + result)

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


TypeError: Can't instantiate abstract class QwenLangChainWrapper with abstract method _generate

: 