## 判断质数
思路：小于1w的质数判断正确，大于1w全都是质数；另外工具是费马那个，有几个数会判断错.正常情况应该使用

In [4]:
def primes_below_N(N):
    if N < 2:
        return []
    
    # 初始化一个布尔数组，"True"表示对应的数是质数
    is_prime = [True] * N
    is_prime[0], is_prime[1] = False, False  # 0和1不是质数
    
    for i in range(2, int(N ** 0.5) + 1):
        if is_prime[i]:
            # 将i的倍数标记为非质数
            for j in range(i*i, N, i):
                is_prime[j] = False
                
    # 返回所有标记为True的索引，即质数
    return [i for i in range(2, N) if is_prime[i]]

# 示例调用
N = 300000
primes_1w = primes_below_N(10000)

In [5]:
def detect_prime_clear(n):
    # 精确的检测是否是质数，在小于1w的情况下精确，否则都输出是质数
    if n < 10000:
        if n in primes_1w:
            return True
        else:
            return False
    else:
        return True

In [6]:
def power_mod(base, exponent, modulus):
    """
    快速计算 (base^exponent) % modulus 的值。
    """
    result = 1
    base = base % modulus
    while exponent > 0:
        if exponent % 2 == 1:  # 如果当前指数是奇数
            result = (result * base) % modulus
        exponent = exponent // 2  # 使用整除代替位操作以提高可读性
        base = (base * base) % modulus
    return result

def fermat_primality_test_fixed_bases(n):
    """
    使用固定的基底集合进行费马小定理素性测试。
    n: 需要测试的数。
    返回True表示n可能是质数，False表示n不是质数。
    """
    if n <= 1 or n == 4:  # 显然不是质数的情况
        return False
    if n <= 3:  # 小于等于3的正整数都是质数
        return True
    
    bases = [2, 3]  # 固定的基底集合
    for a in bases:
        if power_mod(a, n - 1, n) != 1:
            return False
    return True

# 示例调用
n = 1165
print(f"数字{n} {'可能是质数' if fermat_primality_test_fixed_bases(n) else '不是质数'}")

数字1165 不是质数


In [7]:
def test_carmichael_numbers():
    carmichael_numbers = list(set(range(10000))- set(primes_1w))
    for n in carmichael_numbers:
        result = fermat_primality_test_fixed_bases(n)
        if  result: 
            print(f"数字{n} {'可能是质数' if result else '不是质数'}")

# 调用上面定义的fermat_primality_test_fixed_bases函数进行测试
test_carmichael_numbers()

数字1105 可能是质数
数字1729 可能是质数
数字2465 可能是质数
数字2701 可能是质数
数字2821 可能是质数
数字6601 可能是质数
数字8911 可能是质数


## 汇率计算
正常情况是获得实时汇率，但是没有网的话实时汇率全都是nan；可以获取基础汇率。设置一些情况允许使用估算汇率

In [8]:
import random

# 设置固定的随机种子以保证结果可复现
random.seed(42)

# 假设的货币及其之间的汇率（使用代指货币A、B、C、D、E）
exchange_rates = {
    'A': {'B': 0.85, 'C': 110.25, 'D': 0.75},
    'B': {'A': 1.18, 'C': 130.00, 'D': 0.88},
    'C': {'A': 0.0091, 'B': 0.0077, 'D': 0.0068},
    'D': {'A': 1.34, 'B': 1.14, 'C': 147.06}
    # 添加更多货币和汇率...
}

def get_base_exchange_rate(from_currency, to_currency):
    """
    获取两个货币之间的汇率。
    
    参数:
        from_currency (str): 来源货币代码
        to_currency (str): 目标货币代码
        
    返回:
        float: 汇率
    """
    base_rate = exchange_rates[from_currency][to_currency]
    return base_rate

def get_realtime_exchange_rate(from_currency, to_currency):
    """
    获取两个货币之间的实时汇率。
    
    参数:
        from_currency (str): 来源货币代码
        to_currency (str): 目标货币代码
        
    返回:
        float: 实时汇率
    """
    base_rate = exchange_rates[from_currency][to_currency]
    fluctuation = random.uniform(-0.05, 0.05) * base_rate  # 随机波动范围为±5%
    real_time_rate = base_rate + fluctuation
    return real_time_rate

print(f"查询到的基础汇率: {get_base_exchange_rate('A', 'B'):.4f}")
print(f"查询到的实时汇率: {get_realtime_exchange_rate('A', 'B'):.4f}")

查询到的基础汇率: 0.8500
查询到的实时汇率: 0.8619


## 文件操作
文件检索可能出问题的地方：加密文件；权限不足；文件格式不支持；文件过大无法移动/复制

对应的解决办法：文件解密；提高权限；文件格式转换；文件切分与合并

文件移动是简单的

In [9]:
from pydantic import BaseModel, Field
from datetime import datetime
import shutil
import os

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str
    
    def copy_file(self, new_path: str):
        """Simulate copying a file to a new path."""
        print(f"Copying {self.file_name} to {new_path}")
        # In real application, you would use shutil.copy or similar here.
        new_instance = self.model_copy(update={'absolute_path': new_path})
        return new_instance
    
    def move_file(self, new_path: str):
        """Simulate moving a file to a new path."""
        print(f"Moving {self.file_name} to {new_path}")
        # Update the absolute path of this file instance
        updated_fields = {'absolute_path': new_path}
        return self.model_copy(update=updated_fields)
    
    def rename_file(self, new_name: str):
        """Rename the file."""
        print(f"Renaming {self.file_name} to {new_name}")
        return self.model_copy(update={'file_name': new_name})
    
    def split_file(self, split_size: int):
        """Simulate splitting the file into multiple parts."""
        print(f"Splitting {self.file_name} into parts of {split_size} bytes.")
        # This is just a simulation; actual implementation may vary.
        pass
    
    def merge_files(self, files_to_merge):
        """Simulate merging multiple files into one."""
        print(f"Merging {self.file_name} with {files_to_merge}")
        # This is just a simulation; actual implementation may vary.
        pass

# Example usage:
if __name__ == "__main__":
    file = FileSim(file_name="example.txt", size=1024, absolute_path="/user/home/example.txt")
    file_moved = file.move_file("/user/documents/")
    print(file_moved)
    file_renamed = file.rename_file("new_example.txt")
    file_copied = file.copy_file("/backup/example.txt")
    file.split_file(512)
    file.merge_files(["/path/to/file1", "/path/to/file2"])

Moving example.txt to /user/documents/
file_name='example.txt' size=1024 created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 593438) encrypted=False absolute_path='/user/documents/'
Renaming example.txt to new_example.txt
Copying example.txt to /backup/example.txt
Splitting example.txt into parts of 512 bytes.
Merging example.txt with ['/path/to/file1', '/path/to/file2']


In [10]:
def normalize_path(path: str) -> str:
    path = path.replace("\\", "/")  # 统一使用正斜杠
    parts = [p for p in path.split("/") if p.strip() != ""]  # 拆分并过滤空部分
    normalized = "/" + "/".join(parts)  # 组合为绝对路径
    if len(normalized) > 1 and normalized.endswith("/"):
        normalized = normalized[:-1]  # 移除末尾斜杠（根目录除外）
    return normalized


from typing import List, Optional
from pydantic import BaseModel, Field
from datetime import datetime

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str

class FileSystem:
    def __init__(self):
        self.files: List[FileSim] = []

    def add_file(self, file: FileSim):
        """添加文件到文件系统"""
        self.files.append(file)

    def get_files_in_directory(
        self, directory_path: str, recursive: bool = True
    ) -> List[FileSim]:
        """获取目录下的文件（可选择是否递归子目录）"""
        normalized_dir = normalize_path(directory_path)
        matched_files = []
        for file in self.files:
            file_path = normalize_path(file.absolute_path)
            # 排除与目录同名的文件（文件不能是目录）
            if file_path == normalized_dir:
                continue
            # 检查路径是否以目录路径开头
            if file_path.startswith(normalized_dir + "/"):
                suffix = file_path[len(normalized_dir) + 1 :]
                if recursive:
                    matched_files.append(file)
                else:
                    if "/" not in suffix:  # 非递归时仅匹配直接子文件
                        matched_files.append(file)
        return matched_files
    
    def encrypt_file(self, file_path: str) -> bool:
        """加密文件（添加.enc后缀并标记加密状态）"""
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if not file.encrypted:
                    # 分割目录和文件名
                    path_parts = file.absolute_path.rsplit("/", 1)
                    if len(path_parts) == 1:
                        dir_path = ""
                        original_name = path_parts[0]
                    else:
                        dir_path, original_name = path_parts
                    # 生成新文件名和路径
                    new_name = original_name + ".enc"
                    new_path = f"{dir_path}/{new_name}" if dir_path else f"/{new_name}"
                    new_path = normalize_path(new_path)
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == new_path for f in self.files):
                        return False
                    # 更新文件属性
                    file.file_name = new_name
                    file.absolute_path = new_path
                    file.encrypted = True
                    return True
        return False

    def decrypt_file(self, file_path: str) -> bool:
        """解密文件（移除.enc后缀并清除加密标记）"""
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if file.encrypted and file.file_name.endswith(".enc"):
                    # 分割目录和文件名
                    path_parts = file.absolute_path.rsplit("/", 1)
                    if len(path_parts) == 1:
                        dir_path = ""
                        current_name = path_parts[0]
                    else:
                        dir_path, current_name = path_parts
                    # 恢复原始文件名
                    original_name = current_name[:-4]
                    new_path = f"{dir_path}/{original_name}" if dir_path else f"/{original_name}"
                    new_path = normalize_path(new_path)
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == new_path for f in self.files):
                        return False
                    # 更新文件属性
                    file.file_name = original_name
                    file.absolute_path = new_path
                    file.encrypted = False
                    return True
        return False

    def split_file(self, file_path: str, chunk_size: int) -> bool:
        """拆分文件为多个小文件"""
        if chunk_size <= 0:
            return False
        
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if file.size <= chunk_size:
                    return False  # 无需拆分
                
                # 计算分块数量和大小
                total_chunks = file.size // chunk_size
                remainder = file.size % chunk_size
                if remainder > 0:
                    total_chunks += 1
                
                # 获取基础路径信息
                base_name = file.file_name
                path_parts = file.absolute_path.rsplit("/", 1)
                dir_path = path_parts[0] if len(path_parts) > 1 else ""
                
                # 创建分块文件
                for i in range(total_chunks):
                    chunk_num = i + 1
                    chunk_name = f"{base_name}_part{chunk_num}"
                    chunk_path = f"{dir_path}/{chunk_name}" if dir_path else f"/{chunk_name}"
                    chunk_path = normalize_path(chunk_path)
                    
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == chunk_path for f in self.files):
                        return False
                    
                    # 计算分块大小
                    chunk_size_real = chunk_size if (i < total_chunks - 1 or remainder == 0) else remainder
                    
                    # 创建分块文件
                    chunk_file = FileSim(
                        file_name=chunk_name,
                        size=chunk_size_real,
                        absolute_path=chunk_path,
                        encrypted=file.encrypted,
                        created_at=datetime.now()
                    )
                    self.add_file(chunk_file)
                return True
        return False

    def merge_files(self, target_path: str, source_paths: List[str]) -> bool:
        """合并多个文件为一个新文件"""
        target_normalized = normalize_path(target_path)
        # 检查目标文件是否存在
        if any(normalize_path(f.absolute_path) == target_normalized for f in self.files):
            return False
        
        # 验证并收集源文件
        source_files = []
        total_size = 0
        for path in source_paths:
            path_normalized = normalize_path(path)
            file = next((f for f in self.files if normalize_path(f.absolute_path) == path_normalized), None)
            if not file:
                return False
            source_files.append(file)
            total_size += file.size
        
        # 创建合并文件
        merged_file = FileSim(
            file_name=target_normalized.split("/")[-1],
            size=total_size,
            absolute_path=target_normalized,
            encrypted=all(f.encrypted for f in source_files),
            created_at=datetime.now()
        )
        self.add_file(merged_file)
        return True

    def delete_file(self, file_path: str) -> bool:
        """删除指定文件"""
        normalized_path = normalize_path(file_path)
        for i in range(len(self.files)):
            if normalize_path(self.files[i].absolute_path) == normalized_path:
                del self.files[i]
                return True
        return False

In [11]:
# 初始化文件系统
fs = FileSystem()

# 添加测试文件
fs.add_file(FileSim(
    file_name="data.bin",
    size=5000,
    absolute_path="/storage/data.bin"
))
print(fs.files)

# 加密文件
fs.encrypt_file("/storage/data.bin")
print(fs.files)

# 拆分文件（每个分块2000字节）
fs.split_file("/storage/data.bin.enc", 2000)
print(fs.files)

# 合并分块文件
fs.merge_files(
    "/storage/merged.bin",
    ["/storage/data.bin.enc_part1", "/storage/data.bin.enc_part2", "/storage/data.bin.enc_part3"]
)
print(fs.files)

# 删除原始加密文件
fs.delete_file("/storage/data.bin.enc")
print(fs.files)


[FileSim(file_name='data.bin', size=5000, created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 623012), encrypted=False, absolute_path='/storage/data.bin')]
[FileSim(file_name='data.bin.enc', size=5000, created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 623012), encrypted=True, absolute_path='/storage/data.bin.enc')]
[FileSim(file_name='data.bin.enc', size=5000, created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 623012), encrypted=True, absolute_path='/storage/data.bin.enc'), FileSim(file_name='data.bin.enc_part1', size=2000, created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 623117), encrypted=True, absolute_path='/storage/data.bin.enc_part1'), FileSim(file_name='data.bin.enc_part2', size=2000, created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 623124), encrypted=True, absolute_path='/storage/data.bin.enc_part2'), FileSim(file_name='data.bin.enc_part3', size=1000, created_at=datetime.datetime(2025, 2, 19, 18, 5, 28, 623129), encrypted=True, absolute_path='/storage/data.bin.enc

## 文本处理类
工具列表：

主要工具：翻译；情感分析：句子级别情感分析，篇章情感分析，基于点的情感分析；实体识别；共指消解

辅助工具：基于章节切分；基于句子的切分

失效情况：语言不对；字数不对；

对象：一个文章里面可以切分为多个篇章，一个篇章可以切分为多个句子。文章可以有标题也可以没有

提示词:
我需要写一个模拟的文本处理系统，里面有多个文本处理的工具。
文本的对象分为文章，篇章和句子，文章可以拆分为篇章，篇章可以拆分为句子，这三个对象都有属性：字数。文章还可以有属性标题，也可以没有，标题的对象为句子。
每个文本对象都有的属性是语言，表示这个文本属于哪一种语言。每个文本对象都有属性情感倾向，分为正向，负向和中立。
在工具方面：有翻译工具，可以将一个文本对象的语言改变但不改变其他属性，有最大字数限制，超过最大字数则无法翻译
句子和篇章级别的情感分析工具，用于分析这两个级别的情感倾向，情感分析工具只能用于固定的对象（句子或者篇章，否则会固定输出正向情感）
篇章切分工具，输入文章可以切分为多个篇章，以及句子切分工具，输入篇章切分为多个句子
使用pydantic

In [118]:
import uuid
from uuid import UUID, uuid4
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator, model_validator
import random

def random_senti():
    return random.choice(["pos", "neg", "neu"])

class Aspect_Sentiment(BaseModel):
    aspect_term: str
    opinion: str
    sentiment: str

class TextBase(BaseModel):
    id: UUID = Field(default_factory=uuid4)
    content: List[UUID]
    language: str
    sentiment: str = Field(default_factory=random_senti)  # 自动生成随机情感

    @field_validator('sentiment')
    def validate_sentiment(cls, v):
        if v not in ['pos', 'neg', 'neu']:
            raise ValueError('情感倾向必须是pos, neg, neu')
        return v

class Sentence(TextBase):
    word_count: Optional[int] = None  # 占位值，实际由验证器计算
    aspect_based_sent: Optional[List[Aspect_Sentiment]] = None

    @model_validator(mode='after')
    def adjust_word_count(self):
        if self.word_count is None:
            self.word_count = random.randint(10, 20)
        return self

class Chapter(TextBase):
    sentences: List[Sentence]

    @model_validator(mode='before')
    @classmethod
    def assemble_content(cls, data: dict) -> dict:
        """自动聚合句子的content并去重"""
        sentences = data.get('sentences', [])
        content = []
        for sentence in sentences:
            if isinstance(sentence, Sentence):
                content.extend(sentence.content)
            else:
                content.extend(sentence["content"])

        data['content'] = list(set(content))
        return data

    @property
    def word_count(self):
        return sum(sentence.word_count for sentence in self.sentences)

class Article(TextBase):
    title: Optional[Sentence] = None
    chapters: List[Chapter]

    @model_validator(mode='before')
    @classmethod
    def assemble_content(cls, data: dict) -> dict:
        """聚合标题和章节的content并去重"""
        content = []
        if title := data.get('title'):
            if isinstance(title, Sentence):
                content.extend(title.content)
            else:
                content.extend(title["content"])
        for chapter in data.get('chapters', []):
            if isinstance(chapter, Chapter):
                content.extend(chapter.content)
            else:
                content.extend(chapter["content"])
        data['content'] = list(set(content))
        return data

    @property
    def word_count(self):
        total = sum(chapter.word_count for chapter in self.chapters)
        if self.title:
            total += self.title.word_count
        return total


### 处理对象的翻译工具

In [119]:
def translate_chapter(chapter: Chapter, target_language: str):
    translated_chapter = chapter.model_copy()
    translated_chapter.language = target_language
    translated_chapter.id = uuid4()
    new_sentences = []
    for s in chapter.sentences:
        ns = s.model_copy()
        ns.language = target_language
        new_sentences.append(ns)
    translated_chapter.sentences = new_sentences
    return translated_chapter

def translate_text(text_obj: TextBase, target_language: str):
    """
    翻译文本对象到指定语言，如果字数超过最大长度则不进行翻译。
    返回翻译后的对象或原对象（如果未翻译）。
    """
    max_length = 256
    if text_obj.word_count > max_length:
        print(f"翻译失败：{text_obj.id} 的字数超过了最大限制 {max_length}")
        return text_obj

    translated_obj = text_obj.model_copy()
    translated_obj.language = target_language
    translated_obj.id = uuid4()
    has_title = getattr(translated_obj, "title", None)
    if has_title:
        translated_obj.title.language = target_language
        translated_obj.title.id = uuid4()
    if isinstance(translated_obj, Article):
        #  是文章类型，需要将所有章节翻译
        translated_chaps = []
        for chap in translated_obj.chapters:
            new_chap = translate_chapter(chap, target_language)
            translated_chaps.append(new_chap)
        translated_obj.chapters = translated_chaps
    if isinstance(translated_obj, Chapter):
        # 是章节类型，直接翻译
        translated_obj = translate_chapter(text_obj, target_language)
    return translated_obj

def sentiment_analysis_article(article: Article):
    # 只能处理特定语言的情感
    if article.language == "en":
        return article.sentiment
    else:
        return "pos"
    
def sentiment_analysis_chapter(chapter: Chapter):
    # 只能处理特定语言的情感
    if chapter.language == "en":
        return chapter.sentiment
    else:
        return "pos"

def sentiment_analysis_sentence(sentence: Sentence):
    # 只能处理特定语言的情感
    if sentence.language == "en":
        return sentence.sentiment
    else:
        return "pos"

def aspect_based_sentiment_analysis(sentence: Sentence):
    if sentence.language == "en":
        return sentence.aspect_based_sent
    else:
        return []

def get_title(article: Article):
    return article.title

def split_article(article: Article):
    return article.chapters

def split_chapter(chapter: Chapter):
    return chapter.sentences

def construct_chapter(sentences: List[Sentence]):
    languages = []
    for sent in sentences:
        languages.append(sent.language)
    if len(set(languages)) == 1:
        # 如果只有一种语言，就设置为这种语言
        chap_lang = languages[0]
    else:
        chap_lang = "mix_language"
    chapter = Chapter(sentences=sentences, language=chap_lang)
    return chapter

def construct_article(chapters: List[Chapter], title=None):
    languages = []
    for chap in chapters:
        languages.append(chap.language)
    if title:
        languages.append(title.language)
    if len(set(languages)) == 1:
        # 如果只有一种语言，就设置为这种语言
        art_lang = languages[0]
    else:
        art_lang = "mix_language"
    article = Article(chapters=chapters, language=art_lang, title=title)
    return article
        



### 包装好的工具

In [120]:
import json
from pathlib import Path
from pydantic import BaseModel

OBJ_DIR = "./data"

def save_object_to_json(obj: BaseModel):
    """
    将给定的对象保存为JSON文件。
    :param obj: 要保存的对象，可以是Sentence, Chapter, 或Article类型的实例。
    """
    # 确保目录存在
    Path(OBJ_DIR).mkdir(parents=True, exist_ok=True)
    
    # 使用对象的id属性作为文件名
    file_path = Path(OBJ_DIR) / f"{obj.id}.json"
    
    # 序列化为JSON字符串
    json_str = obj.model_dump_json()
    
    # 写入文件
    with open(file_path, 'w') as file:
        file.write(json_str)

def load_object_from_json(obj_id: UUID):
    """
    从JSON文件加载对象，并自动转换为正确的Pydantic模型。
    :param obj_id: 对象的uuid
    :return: 返回对应的Sentence, Chapter, 或Article类型的实例。
    """
    # 确保目录存在
    Path(OBJ_DIR).mkdir(parents=True, exist_ok=True)
    
    # 使用对象的id属性作为文件名
    file_path = Path(OBJ_DIR) / f"{obj_id}.json"
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # 根据"data"中的特定字段判断对象类型
    if 'chapters' in data:
        return Article.model_validate(data)
    elif 'sentences' in data:
        return Chapter.model_validate(data)
    elif 'aspect_based_sent' in data:
        return Sentence.model_validate(data)
    else:
        raise ValueError("无法识别JSON数据所属的模型类型")

# # 示例用法：
# sentence = Sentence(content=[uuid4()], language="fr", sentiment="pos")
# save_object_to_json(sentence)
# loaded_sentence = load_object_from_json(sentence.id)

In [121]:
def translate_tool(text_obj_id: UUID, target_language: str):
    text_obj = load_object_from_json(text_obj_id)
    translated_text = translate_text(text_obj, target_language)
    save_object_to_json(translated_text)
    return translated_text.id

def sentiment_analysis_article_tool(article_id: UUID):
    article_obj = load_object_from_json(article_id)
    senti = sentiment_analysis_article(article_obj)
    return senti

    
def sentiment_analysis_chapter_tool(chapter_id: UUID):
    chapter_obj = load_object_from_json(chapter_id)
    senti = sentiment_analysis_article(chapter_obj)
    return senti

def sentiment_analysis_sentence_tool(sentence_id: UUID):
    sentence_obj = load_object_from_json(sentence_id)
    senti = sentiment_analysis_article(sentence_obj)
    return senti

def aspect_based_sentiment_analysis_tool(sentence_id: UUID):
    sentence_obj = load_object_from_json(sentence_id)
    senti = aspect_based_sentiment_analysis(sentence_obj)
    return senti

def get_title_tool(sentence_id: UUID):
    sentence_obj = load_object_from_json(sentence_id)
    return get_title(sentence_obj)

def split_article_tool(article_id: UUID):
    article_obj = load_object_from_json(article_id)
    chap_objs = split_article(article_obj)
    chap_ids = [chap.id for chap in chap_objs]
    return chap_ids

def split_chapter_tool(chapter_id: UUID):
    chapter_obj = load_object_from_json(chapter_id)
    sent_objs = split_article(chapter_obj)
    sent_ids = [sent.id for sent in sent_objs]
    return sent_ids

def construct_chapter_tool(sentences_id: List[UUID]):
    sent_objs = [load_object_from_json(s) for s in sentences_id]
    chapter_obj = construct_chapter(sent_objs)
    save_object_to_json(chapter_obj)
    return chapter_obj.id

def construct_article_tool(chapters_id: List[UUID], title_id=None):
    chap_objs = [load_object_from_json(c) for c in chapters_id]
    if title_id:
        title_obj = load_object_from_json(title_id)
        article_obj = construct_article(chap_objs, title=title_obj)
    else:
        article_obj = construct_article(chap_objs)
    save_object_to_json(article_obj)
    return article_obj.id
        

In [122]:
# 创建句子
sentence1 = Sentence(
    content=[uuid4(), uuid4(), uuid4()],
    language="zh",
    sentiment="pos"  # 可手动指定情感
)


asp = Aspect_Sentiment(aspect_term="s1", opinion="o1", sentiment="pos")
sentence2 = Sentence(
    content=[uuid4(), uuid4()],
    language="zh",  # 情感自动生成
    aspect_based_sent=[asp]
)

# 创建章节（自动聚合内容）
chapter = Chapter(
    sentences=[sentence1, sentence2],
    language="zh"
)
cid = chapter.id
save_object_to_json(chapter)
chapter = load_object_from_json(cid)

双向翻译测试通过 ✅


整体工具调用通过json读写来完成，id就是那个唯一的名称，通过对id处理读取对应的json，然后每次输出如果返回的是对象或者对象的列表，就用json文件的id取代.
这里id代表的就是这个文件的全文的意思


接下来的步骤：
1. 把文本这块的环境搭建好
2. 构建一些评测例子
3. 搭建baseline，测试当前模型的效果（考虑qwen7b，gpt4o）


## 构建评测任务
思路：评测任务需要给定的输入到目标的输出,这里面的id就是唯一对应的文本的内容

文件操作的话沟通交流的时候就是文件所在路径，通过文件路径对应到实际操作的那个文件对象




## 网页获取类

新闻网站：
api1：查询某日的新闻列表，获取新闻题目对应的题目
api2: 根据新闻的url获取新闻的全文
api3: 输入关键词，获取关于这个关键词的所有新闻，关键词设置一个新的类，

统计数据网站：
世界银行

直接考虑拿现有的工具数据集去玩

以及从现有的工具集里面找一些改造

In [1]:
ToolBeHonest就是我要的那个

NameError: name 'ToolBeHonest就是我要的那个' is not defined