In [1]:
%pip install -U langchain-community sentence-transformers faiss-cpu dashscope

Looking in indexes: https://mirrors.tencent.com/pypi/simple/
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
import faiss
import numpy as np
import os
from dashscope import Generation
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer

# ====== 系统配置 ======
class Config:
    LAWS_FILE = "laws.txt"                # 法律条文文件
    FAISS_INDEX_DIR = "faiss_index"       # FAISS索引目录
    
    # 模型参数
    EMBEDDING_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"  # 语义编码模型
    DASHSCOPE_API_KEY = "sk-7fb2aee47f5d4531855a7ac3412249fe"    # 阿里云API密钥
    LLM_MODEL = "qwen-turbo"              # 大语言模型
    
    # 处理参数
    CHUNK_SIZE = 200                      # 文本分割长度
    TOP_K = 3                             # 检索返回条款数

# ====== 自定义Qwen模型集成 ======
class QwenLLM:
    def __init__(self):
        self.api_key = Config.DASHSCOPE_API_KEY
        
    def __call__(self, prompt):
        response = Generation.call(
            model=Config.LLM_MODEL,
            prompt=prompt,
            api_key=self.api_key,
            max_length=500,
            top_p=0.7
        )
        return response.output.text if response.status_code == 200 else "请求失败"

# ====== 文档处理模块 ======
class LawProcessor:
    @staticmethod
    def load_and_split():
        """加载并分割法律文档"""
        if not os.path.exists(Config.LAWS_FILE):
            raise FileNotFoundError(f"法律文件 {Config.LAWS_FILE} 不存在")

        loader = TextLoader(Config.LAWS_FILE, encoding="utf-8")
        documents = loader.load()
        
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=Config.CHUNK_SIZE,
            chunk_overlap=20,
            keep_separator=True
        )
        
        split_docs = text_splitter.split_documents(documents)
        return [doc.page_content.strip() for doc in split_docs if doc.page_content.strip()]

# ====== 向量存储模块 ======
class VectorStoreManager:
    def __init__(self):
        self.encoder = SentenceTransformer(Config.EMBEDDING_MODEL)
        self.vector_store = None
        
    def init_vector_store(self, clauses):
        """初始化向量存储"""
        embeddings = self.encoder.encode(clauses)
        
        # 转换为FAISS兼容格式
        vectors = np.array(embeddings, dtype=np.float32)
        self.vector_store = FAISS.from_embeddings(
            text_embeddings=list(zip(clauses, vectors)),
            embedding=self.encoder,
            index_path=Config.FAISS_INDEX_DIR
        )
        return self.vector_store

# ====== 核心业务逻辑 ======
class LegalComplianceChecker:
    def __init__(self):
        # 初始化组件
        self.clauses = LawProcessor.load_and_split()
        self.vector_store = VectorStoreManager().init_vector_store(self.clauses)
        self.llm = QwenLLM()
        
        # 构建RetrievalQA链
        self.qa_chain = self._build_qa_chain()
    
    def _build_qa_chain(self):
        """构建检索增强问答链"""
        prompt_template = """
        根据以下法律条款进行合规性判断：
        {context}
        
        待审查政策：{question}
        
        要求：
        1. 结论（合规/部分合规/不合规）
        2. 理由（50字内） 
        3. 引用条款编号
        """
        
        PROMPT = PromptTemplate(
            template=prompt_template,
            input_variables=["context", "question"]
        )
        
        return RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(search_kwargs={"k": Config.TOP_K}),
            chain_type_kwargs={"prompt": PROMPT},
            return_source_documents=True
        )
    
    def query(self, user_input):
        """执行合规性审查"""
        result = self.qa_chain({"query": user_input})
        
        # 格式化输出
        output = f"结论：{result['result']}\n"
        if result['source_documents']:
            output += "依据条款：\n" + "\n".join(
                f"- {doc.metadata.get('clause_id', '未知条款')}" 
                for doc in result['source_documents']
            )
        return output

# ====== 使用示例 ======
if __name__ == "__main__":
    # 初始化系统（首次运行会自动构建索引）
    checker = LegalComplianceChecker()
    
    # 测试案例
    test_case = "地方政府要求出租车公司必须采购指定品牌的计价器"
    
    # 执行审查
    result = checker.query(test_case)
    
    # 输出结果
    print("=== 合规性审查报告 ===")
    print(f"输入政策：{test_case}")
    print("\n" + result)



TypeError: FAISS.__init__() got an unexpected keyword argument 'index_path'