## 判断质数
思路：小于1w的质数判断正确，大于1w全都是质数；另外工具是费马那个，有几个数会判断错.正常情况应该使用

In [None]:
def primes_below_N(N):
    if N < 2:
        return []
    
    # 初始化一个布尔数组，"True"表示对应的数是质数
    is_prime = [True] * N
    is_prime[0], is_prime[1] = False, False  # 0和1不是质数
    
    for i in range(2, int(N ** 0.5) + 1):
        if is_prime[i]:
            # 将i的倍数标记为非质数
            for j in range(i*i, N, i):
                is_prime[j] = False
                
    # 返回所有标记为True的索引，即质数
    return [i for i in range(2, N) if is_prime[i]]

# 示例调用
N = 300000
primes_1w = primes_below_N(10000)

In [None]:
def detect_prime_clear(n):
    # 精确的检测是否是质数，在小于1w的情况下精确，否则都输出是质数
    if n < 10000:
        if n in primes_1w:
            return True
        else:
            return False
    else:
        return True

In [14]:
def power_mod(base, exponent, modulus):
    """
    快速计算 (base^exponent) % modulus 的值。
    """
    result = 1
    base = base % modulus
    while exponent > 0:
        if exponent % 2 == 1:  # 如果当前指数是奇数
            result = (result * base) % modulus
        exponent = exponent // 2  # 使用整除代替位操作以提高可读性
        base = (base * base) % modulus
    return result

def fermat_primality_test_fixed_bases(n):
    """
    使用固定的基底集合进行费马小定理素性测试。
    n: 需要测试的数。
    返回True表示n可能是质数，False表示n不是质数。
    """
    if n <= 1 or n == 4:  # 显然不是质数的情况
        return False
    if n <= 3:  # 小于等于3的正整数都是质数
        return True
    
    bases = [2, 3]  # 固定的基底集合
    for a in bases:
        if power_mod(a, n - 1, n) != 1:
            return False
    return True

# 示例调用
n = 1165
print(f"数字{n} {'可能是质数' if fermat_primality_test_fixed_bases(n) else '不是质数'}")

数字1165 不是质数


In [15]:
def test_carmichael_numbers():
    carmichael_numbers = list(set(range(10000))- set(primes_1w))
    for n in carmichael_numbers:
        result = fermat_primality_test_fixed_bases(n)
        if  result: 
            print(f"数字{n} {'可能是质数' if result else '不是质数'}")

# 调用上面定义的fermat_primality_test_fixed_bases函数进行测试
test_carmichael_numbers()

数字1105 可能是质数
数字1729 可能是质数
数字2465 可能是质数
数字2701 可能是质数
数字2821 可能是质数
数字6601 可能是质数
数字8911 可能是质数


## 汇率计算
正常情况是获得实时汇率，但是没有网的话实时汇率全都是nan；可以获取基础汇率。设置一些情况允许使用估算汇率

In [128]:
import random

# 设置固定的随机种子以保证结果可复现
random.seed(42)

# 假设的货币及其之间的汇率（使用代指货币A、B、C、D、E）
exchange_rates = {
    'A': {'B': 0.85, 'C': 110.25, 'D': 0.75},
    'B': {'A': 1.18, 'C': 130.00, 'D': 0.88},
    'C': {'A': 0.0091, 'B': 0.0077, 'D': 0.0068},
    'D': {'A': 1.34, 'B': 1.14, 'C': 147.06}
    # 添加更多货币和汇率...
}

def get_base_exchange_rate(from_currency, to_currency):
    """
    获取两个货币之间的汇率。
    
    参数:
        from_currency (str): 来源货币代码
        to_currency (str): 目标货币代码
        
    返回:
        float: 汇率
    """
    base_rate = exchange_rates[from_currency][to_currency]
    return base_rate

def get_realtime_exchange_rate(from_currency, to_currency):
    """
    获取两个货币之间的实时汇率。
    
    参数:
        from_currency (str): 来源货币代码
        to_currency (str): 目标货币代码
        
    返回:
        float: 实时汇率
    """
    base_rate = exchange_rates[from_currency][to_currency]
    fluctuation = random.uniform(-0.05, 0.05) * base_rate  # 随机波动范围为±5%
    real_time_rate = base_rate + fluctuation
    return real_time_rate

print(f"查询到的基础汇率: {get_base_exchange_rate('A', 'B'):.4f}")
print(f"查询到的实时汇率: {get_realtime_exchange_rate('A', 'B'):.4f}")

查询到的基础汇率: 0.8500
查询到的实时汇率: 0.8619


## 文件操作
文件检索可能出问题的地方：加密文件；权限不足；文件格式不支持；文件过大无法移动/复制

对应的解决办法：文件解密；提高权限；文件格式转换；文件切分与合并

文件移动是简单的

In [4]:
from pydantic import BaseModel, Field
from datetime import datetime
import shutil
import os

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str
    
    def copy_file(self, new_path: str):
        """Simulate copying a file to a new path."""
        print(f"Copying {self.file_name} to {new_path}")
        # In real application, you would use shutil.copy or similar here.
        new_instance = self.model_copy(update={'absolute_path': new_path})
        return new_instance
    
    def move_file(self, new_path: str):
        """Simulate moving a file to a new path."""
        print(f"Moving {self.file_name} to {new_path}")
        # Update the absolute path of this file instance
        updated_fields = {'absolute_path': new_path}
        return self.model_copy(update=updated_fields)
    
    def rename_file(self, new_name: str):
        """Rename the file."""
        print(f"Renaming {self.file_name} to {new_name}")
        return self.model_copy(update={'file_name': new_name})
    
    def split_file(self, split_size: int):
        """Simulate splitting the file into multiple parts."""
        print(f"Splitting {self.file_name} into parts of {split_size} bytes.")
        # This is just a simulation; actual implementation may vary.
        pass
    
    def merge_files(self, files_to_merge):
        """Simulate merging multiple files into one."""
        print(f"Merging {self.file_name} with {files_to_merge}")
        # This is just a simulation; actual implementation may vary.
        pass

# Example usage:
if __name__ == "__main__":
    file = FileSim(file_name="example.txt", size=1024, absolute_path="/user/home/example.txt")
    file_moved = file.move_file("/user/documents/")
    print(file_moved)
    file_renamed = file.rename_file("new_example.txt")
    file_copied = file.copy_file("/backup/example.txt")
    file.split_file(512)
    file.merge_files(["/path/to/file1", "/path/to/file2"])

Moving example.txt to /user/documents/
file_name='example.txt' size=1024 created_at=datetime.datetime(2025, 2, 16, 12, 10, 6, 635952) encrypted=False absolute_path='/user/documents/'
Renaming example.txt to new_example.txt
Copying example.txt to /backup/example.txt
Splitting example.txt into parts of 512 bytes.
Merging example.txt with ['/path/to/file1', '/path/to/file2']


In [4]:
def normalize_path(path: str) -> str:
    path = path.replace("\\", "/")  # 统一使用正斜杠
    parts = [p for p in path.split("/") if p.strip() != ""]  # 拆分并过滤空部分
    normalized = "/" + "/".join(parts)  # 组合为绝对路径
    if len(normalized) > 1 and normalized.endswith("/"):
        normalized = normalized[:-1]  # 移除末尾斜杠（根目录除外）
    return normalized


from typing import List, Optional
from pydantic import BaseModel, Field
from datetime import datetime

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str

class FileSystem:
    def __init__(self):
        self.files: List[FileSim] = []

    def add_file(self, file: FileSim):
        """添加文件到文件系统"""
        self.files.append(file)

    def get_files_in_directory(
        self, directory_path: str, recursive: bool = True
    ) -> List[FileSim]:
        """获取目录下的文件（可选择是否递归子目录）"""
        normalized_dir = normalize_path(directory_path)
        matched_files = []
        for file in self.files:
            file_path = normalize_path(file.absolute_path)
            # 排除与目录同名的文件（文件不能是目录）
            if file_path == normalized_dir:
                continue
            # 检查路径是否以目录路径开头
            if file_path.startswith(normalized_dir + "/"):
                suffix = file_path[len(normalized_dir) + 1 :]
                if recursive:
                    matched_files.append(file)
                else:
                    if "/" not in suffix:  # 非递归时仅匹配直接子文件
                        matched_files.append(file)
        return matched_files
    
    def encrypt_file(self, file_path: str) -> bool:
        """加密文件（添加.enc后缀并标记加密状态）"""
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if not file.encrypted:
                    # 分割目录和文件名
                    path_parts = file.absolute_path.rsplit("/", 1)
                    if len(path_parts) == 1:
                        dir_path = ""
                        original_name = path_parts[0]
                    else:
                        dir_path, original_name = path_parts
                    # 生成新文件名和路径
                    new_name = original_name + ".enc"
                    new_path = f"{dir_path}/{new_name}" if dir_path else f"/{new_name}"
                    new_path = normalize_path(new_path)
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == new_path for f in self.files):
                        return False
                    # 更新文件属性
                    file.file_name = new_name
                    file.absolute_path = new_path
                    file.encrypted = True
                    return True
        return False

    def decrypt_file(self, file_path: str) -> bool:
        """解密文件（移除.enc后缀并清除加密标记）"""
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if file.encrypted and file.file_name.endswith(".enc"):
                    # 分割目录和文件名
                    path_parts = file.absolute_path.rsplit("/", 1)
                    if len(path_parts) == 1:
                        dir_path = ""
                        current_name = path_parts[0]
                    else:
                        dir_path, current_name = path_parts
                    # 恢复原始文件名
                    original_name = current_name[:-4]
                    new_path = f"{dir_path}/{original_name}" if dir_path else f"/{original_name}"
                    new_path = normalize_path(new_path)
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == new_path for f in self.files):
                        return False
                    # 更新文件属性
                    file.file_name = original_name
                    file.absolute_path = new_path
                    file.encrypted = False
                    return True
        return False

    def split_file(self, file_path: str, chunk_size: int) -> bool:
        """拆分文件为多个小文件"""
        if chunk_size <= 0:
            return False
        
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if file.size <= chunk_size:
                    return False  # 无需拆分
                
                # 计算分块数量和大小
                total_chunks = file.size // chunk_size
                remainder = file.size % chunk_size
                if remainder > 0:
                    total_chunks += 1
                
                # 获取基础路径信息
                base_name = file.file_name
                path_parts = file.absolute_path.rsplit("/", 1)
                dir_path = path_parts[0] if len(path_parts) > 1 else ""
                
                # 创建分块文件
                for i in range(total_chunks):
                    chunk_num = i + 1
                    chunk_name = f"{base_name}_part{chunk_num}"
                    chunk_path = f"{dir_path}/{chunk_name}" if dir_path else f"/{chunk_name}"
                    chunk_path = normalize_path(chunk_path)
                    
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == chunk_path for f in self.files):
                        return False
                    
                    # 计算分块大小
                    chunk_size_real = chunk_size if (i < total_chunks - 1 or remainder == 0) else remainder
                    
                    # 创建分块文件
                    chunk_file = FileSim(
                        file_name=chunk_name,
                        size=chunk_size_real,
                        absolute_path=chunk_path,
                        encrypted=file.encrypted,
                        created_at=datetime.now()
                    )
                    self.add_file(chunk_file)
                return True
        return False

    def merge_files(self, target_path: str, source_paths: List[str]) -> bool:
        """合并多个文件为一个新文件"""
        target_normalized = normalize_path(target_path)
        # 检查目标文件是否存在
        if any(normalize_path(f.absolute_path) == target_normalized for f in self.files):
            return False
        
        # 验证并收集源文件
        source_files = []
        total_size = 0
        for path in source_paths:
            path_normalized = normalize_path(path)
            file = next((f for f in self.files if normalize_path(f.absolute_path) == path_normalized), None)
            if not file:
                return False
            source_files.append(file)
            total_size += file.size
        
        # 创建合并文件
        merged_file = FileSim(
            file_name=target_normalized.split("/")[-1],
            size=total_size,
            absolute_path=target_normalized,
            encrypted=all(f.encrypted for f in source_files),
            created_at=datetime.now()
        )
        self.add_file(merged_file)
        return True

    def delete_file(self, file_path: str) -> bool:
        """删除指定文件"""
        normalized_path = normalize_path(file_path)
        for i in range(len(self.files)):
            if normalize_path(self.files[i].absolute_path) == normalized_path:
                del self.files[i]
                return True
        return False

In [8]:
# 初始化文件系统
fs = FileSystem()

# 添加测试文件
fs.add_file(FileSim(
    file_name="data.bin",
    size=5000,
    absolute_path="/storage/data.bin"
))
print(fs.files)

# 加密文件
fs.encrypt_file("/storage/data.bin")
print(fs.files)

# 拆分文件（每个分块2000字节）
fs.split_file("/storage/data.bin.enc", 2000)
print(fs.files)

# 合并分块文件
fs.merge_files(
    "/storage/merged.bin",
    ["/storage/data.bin.enc_part1", "/storage/data.bin.enc_part2", "/storage/data.bin.enc_part3"]
)
print(fs.files)

# 删除原始加密文件
fs.delete_file("/storage/data.bin.enc")
print(fs.files)


[FileSim(file_name='data.bin', size=5000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432134), encrypted=False, absolute_path='/storage/data.bin')]
[FileSim(file_name='data.bin.enc', size=5000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432134), encrypted=True, absolute_path='/storage/data.bin.enc')]
[FileSim(file_name='data.bin.enc', size=5000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432134), encrypted=True, absolute_path='/storage/data.bin.enc'), FileSim(file_name='data.bin.enc_part1', size=2000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432552), encrypted=True, absolute_path='/storage/data.bin.enc_part1'), FileSim(file_name='data.bin.enc_part2', size=2000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432581), encrypted=True, absolute_path='/storage/data.bin.enc_part2'), FileSim(file_name='data.bin.enc_part3', size=1000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432597), encrypted=True, absolute_path='/storage/data.b

## 文本处理类
工具列表：

主要工具：翻译；情感分析：句子级别情感分析，篇章情感分析，基于点的情感分析；实体识别；共指消解

辅助工具：基于章节切分；基于句子的切分

失效情况：语言不对；字数不对；

对象：一个文章里面可以切分为多个篇章，一个篇章可以切分为多个句子。文章可以有标题也可以没有

提示词:
我需要写一个模拟的文本处理系统，里面有多个文本处理的工具。
文本的对象分为文章，篇章和句子，文章可以拆分为篇章，篇章可以拆分为句子，这三个对象都有属性：字数。文章还可以有属性标题，也可以没有，标题的对象为句子。
每个文本对象都有的属性是语言，表示这个文本属于哪一种语言。每个文本对象都有属性情感倾向，分为正向，负向和中立。
在工具方面：有翻译工具，可以将一个文本对象的语言改变但不改变其他属性，有最大字数限制，超过最大字数则无法翻译
句子和篇章级别的情感分析工具，用于分析这两个级别的情感倾向，情感分析工具只能用于固定的对象（句子或者篇章，否则会固定输出正向情感）
篇章切分工具，输入文章可以切分为多个篇章，以及句子切分工具，输入篇章切分为多个句子
使用pydantic

In [23]:
from pydantic import BaseModel, Field, field_validator
from typing import List, Optional
from uuid import UUID, uuid4

class TextBase(BaseModel):
    id: UUID = Field(default_factory=uuid4)  # 自动生成唯一ID
    language: str
    sentiment: str  # 正向, 负向, 中立

    @field_validator('sentiment')
    def validate_sentiment(cls, v):
        if v not in ['正向', '负向', '中立']:
            raise ValueError('情感倾向必须是正向, 负向或中立')
        return v

class Aspect_Sentiment(BaseModel):
    aspect_term: str
    opinion: str
    sentiment: str


class Sentence(TextBase):
    word_count: int
    aspect_based_sent: Optional[List[Aspect_Sentiment]] = []

class Chapter(TextBase):
    sentences: List[Sentence]
    
    @property
    def word_count(self):
        return sum(sentence.word_count for sentence in self.sentences)

class Article(TextBase):
    title: Optional[Sentence] = None # 标题是一个Sentence对象
    chapters: List[Chapter]
    
    @property
    def word_count(self):
        total_words = sum(chapter.word_count for chapter in self.chapters)
        if self.title:
            total_words += self.title.word_count
        return total_words
    
    def set_title_sentiment(self):
        if self.title:
            self.title_sentiment = self.title.sentiment

def translate_text(text_obj: TextBase, target_language: str, max_length: int):
    """
    翻译文本对象到指定语言，如果字数超过最大长度则不进行翻译。
    返回翻译后的对象或原对象（如果未翻译）。
    """
    if text_obj.word_count > max_length:
        print(f"翻译失败：{text_obj.id} 的字数超过了最大限制 {max_length}")
        return text_obj

    translated_obj = text_obj.model_copy()
    translated_obj.language = target_language
    print(f"正在将 {text_obj.id} 翻译为目标语言 {target_language}")
    return translated_obj

def analyze_sentiment(text_obj: TextBase):
    """
    分析文本对象的情感倾向。
    返回带有分析结果的对象。
    """
    if isinstance(text_obj, (Sentence, Chapter)):
        print(f"{text_obj.id} 的情感倾向分析结果为 {text_obj.sentiment}")
    else:
        text_obj.sentiment = "正向"
        print(f"{text_obj.id} 的情感倾向固定为正向")
    return text_obj

def split_chapters(article: Article):
    """
    篇章切分工具，输入文章可以切分为多个篇章。
    返回包含章节的列表。
    """
    print(f"文章 {article.id} 切分为以下篇章:")
    return article.chapters

def split_sentences(chapter: Chapter):
    """
    句子切分工具，输入篇章切分为多个句子。
    返回包含句子的列表。
    """
    print(f"篇章 {chapter.id} 切分为以下句子:")
    return chapter.sentences

In [24]:
# 创建一些句子
sentence1 = Sentence(language="zh", sentiment="正向", word_count=10)
sentence2 = Sentence(language="zh", sentiment="中立", word_count=20)
sentence3 = Sentence(language="en", sentiment="负向", word_count=15)

# 创建篇章
chapter1 = Chapter(language="zh", sentiment="正向", sentences=[sentence1, sentence2])
chapter2 = Chapter(language="en", sentiment="中立", sentences=[sentence3])

# 创建文章
title_sentence = Sentence(language="zh", sentiment="正向", word_count=5)
article = Article(title=title_sentence, language="zh", sentiment="中立", chapters=[chapter1, chapter2])

# 使用工具函数
translated_article = translate_text(article, "fr", 100)  # 尝试翻译文章
print(f"翻译后的文章语言：{translated_article.language}")

analyzed_title = analyze_sentiment(article.title)  # 分析标题的情感倾向
print(f"标题的情感倾向：{analyzed_title.sentiment}")

chapters = split_chapters(article)  # 篇章切分
for chapter in chapters:
    print(f"章节ID: {chapter.id}")

sentences = split_sentences(chapter1)  # 句子切分
for sentence in sentences:
    print(f"句子ID: {sentence.id}")

正在将 37ad93f4-f7ea-4fe6-8904-124918f83b8b 翻译为目标语言 fr
翻译后的文章语言：fr
18949aa2-7ca1-40f6-9985-a7b917ded52c 的情感倾向分析结果为 正向
标题的情感倾向：正向
文章 37ad93f4-f7ea-4fe6-8904-124918f83b8b 切分为以下篇章:
章节ID: 59aecd2f-b979-430e-bc03-8f18feca6388
章节ID: 1fcc679a-488a-461f-bad3-1a569c49aa60
篇章 59aecd2f-b979-430e-bc03-8f18feca6388 切分为以下句子:
句子ID: 1aed8d14-2055-4d21-beab-9df1c449d0cc
句子ID: 13c09779-60c2-4016-8afa-fff8ed818719


## 网页获取类

查询工资

获取某个新闻网站的