In [1]:
%pip install faiss-cpu sentence-transformers dashscope langchain

Looking in indexes: https://mirrors.tencent.com/pypi/simple/
Collecting langchain
  Downloading https://mirrors.tencent.com/pypi/packages/ed/5c/5c0be747261e1f8129b875fa3bfea736bc5fe17652f9d5e15ca118571b6f/langchain-0.3.25-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading https://mirrors.tencent.com/pypi/packages/30/40/aa440a7cd05f1dab5d7c91a1284eb776c3cf3eb59fa18ed39927650cfa38/langchain_core-0.3.59-py3-none-any.whl (437 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m437.7/437.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain)
  Downloading https://mirrors.tencent.com/pypi/packages/8b/a3/3696ff2444658053c01b6b7443e761f28bb71217d82bb89137a978c5f66f/langchain_text_splitters-0.3.8-py3-none-any.whl (32

In [3]:
%pip install -U langchain-community

Looking in indexes: https://mirrors.tencent.com/pypi/simple/
Collecting langchain-community
  Downloading https://mirrors.tencent.com/pypi/packages/03/a7/b779146b33e1f2b5ef6d44525a8cb476f8d156e2e98a251588f467d74ce3/langchain_community-0.3.23-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading https://mirrors.tencent.com/pypi/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading https://mirrors.tencent.com/pypi/packages/b6/5f/d6d641b490fd3ec2c4c13b4244d68deea3a1b970a97be64f34fb5504ff72/pydantic_settings-2.9.1-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00

In [4]:
import faiss
import numpy as np
import os
from sentence_transformers import SentenceTransformer
from dashscope import Generation
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

# ====== 系统配置 ======
class Config:
    # 文件路径
    LAWS_FILE = "laws.txt"                # 原始法律条文文件
    FAISS_INDEX = "legal_index.faiss"     # FAISS索引文件
    
    # 模型参数
    EMBEDDING_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"  # 语义编码模型
    LLM_MODEL = "qwen-turbo"             # 大语言模型
    DASHSCOPE_API_KEY = "sk-7fb2aee47f5d4531855a7ac3412249fe"   # 阿里云API密钥
    
    # 处理参数
    CHUNK_SIZE = 200                     # 文本分割长度
    TOP_K = 3                            # 检索返回条款数

# ====== 文档处理模块 ======
class LawProcessor:
    @staticmethod
    def load_and_split():
        """加载并分割法律文档"""
        if not os.path.exists(Config.LAWS_FILE):
            raise FileNotFoundError(f"法律文件 {Config.LAWS_FILE} 不存在")

        # 加载文档
        loader = TextLoader(Config.LAWS_FILE, encoding="utf-8")
        documents = loader.load()
        
        # 分割配置
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=Config.CHUNK_SIZE,
            chunk_overlap=20,
            keep_separator=True
        )
        
        # 执行分割
        split_docs = text_splitter.split_documents(documents)
        
        # 清洗处理
        clauses = [doc.page_content.strip() 
                 for doc in split_docs 
                 if len(doc.page_content.strip()) > 10]  # 过滤短文本
        
        return clauses

# ====== 索引管理模块 ======
class IndexManager:
    def __init__(self, clauses):
        self.clauses = clauses
        self.model = SentenceTransformer(Config.EMBEDDING_MODEL)
        
    def build_index(self):
        """构建FAISS索引"""
        embeddings = self.model.encode(self.clauses)
        dimension = embeddings.shape[1]
        
        index = faiss.IndexFlatL2(dimension)
        index.add(embeddings.astype(np.float32))
        faiss.write_index(index, Config.FAISS_INDEX)
        return index
    
    @staticmethod
    def load_existing_index():
        """加载已有索引"""
        if os.path.exists(Config.FAISS_INDEX):
            return faiss.read_index(Config.FAISS_INDEX)
        return None

# ====== 核心业务逻辑 ======
class LegalComplianceChecker:
    def __init__(self):
        # 初始化组件
        self.clauses = LawProcessor.load_and_split()
        self.index = IndexManager.load_existing_index() or \
                    IndexManager(self.clauses).build_index()
        self.encoder = SentenceTransformer(Config.EMBEDDING_MODEL)
    
    def query(self, user_input):
        """处理用户查询全流程"""
        # 1. 语义检索
        query_embedding = self.encoder.encode([user_input])
        distances, indices = self.index.search(query_embedding.astype(np.float32), Config.TOP_K)
        
        # 2. 获取相关条款
        relevant_clauses = [self.clauses[idx] for idx in indices[0] if idx < len(self.clauses)]
        
        # 3. 构建Prompt
        prompt = f"""执行法律合规性审查：
        [相关法律依据]
        {chr(10).join(relevant_clauses)}
        
        [待审查政策]
        {user_input}
        
        请按以下格式输出：
        1. 合规性结论（合规/部分合规/不合规）
        2. 判断理由（不超过50字）
        3. 依据条款（列出条款编号）"""
        
        # 4. 调用大模型
        response = Generation.call(
            model=Config.LLM_MODEL,
            prompt=prompt,
            api_key=Config.DASHSCOPE_API_KEY,
            max_length=500,
            top_p=0.7
        )
        
        return response.output.text if response.status_code == 200 else "请求失败"

# ====== 使用示例 ======
if __name__ == "__main__":
    # 初始化系统
    checker = LegalComplianceChecker()
    
    # 测试案例
    test_case = "地方政府要求网约车平台必须安装指定厂商的监控设备"
    
    # 执行审查
    result = checker.query(test_case)
    
    # 格式化输出
    print("=== 政策审查报告 ===")
    print(f"输入政策：{test_case}")
    print("\n审查结果：")
    print(result)



=== 政策审查报告 ===
输入政策：地方政府要求网约车平台必须安装指定厂商的监控设备

审查结果：
1. 不合规  
2. 强制指定设备厂商，限制了市场竞争。  
3. 第四条（一）、第五条（三）
