## 判断质数
思路：小于1w的质数判断正确，大于1w全都是质数；另外工具是费马那个，有几个数会判断错.正常情况应该使用

In [None]:
def primes_below_N(N):
    if N < 2:
        return []
    
    # 初始化一个布尔数组，"True"表示对应的数是质数
    is_prime = [True] * N
    is_prime[0], is_prime[1] = False, False  # 0和1不是质数
    
    for i in range(2, int(N ** 0.5) + 1):
        if is_prime[i]:
            # 将i的倍数标记为非质数
            for j in range(i*i, N, i):
                is_prime[j] = False
                
    # 返回所有标记为True的索引，即质数
    return [i for i in range(2, N) if is_prime[i]]

# 示例调用
N = 300000
primes_1w = primes_below_N(10000)

In [None]:
def detect_prime_clear(n):
    # 精确的检测是否是质数，在小于1w的情况下精确，否则都输出是质数
    if n < 10000:
        if n in primes_1w:
            return True
        else:
            return False
    else:
        return True

In [14]:
def power_mod(base, exponent, modulus):
    """
    快速计算 (base^exponent) % modulus 的值。
    """
    result = 1
    base = base % modulus
    while exponent > 0:
        if exponent % 2 == 1:  # 如果当前指数是奇数
            result = (result * base) % modulus
        exponent = exponent // 2  # 使用整除代替位操作以提高可读性
        base = (base * base) % modulus
    return result

def fermat_primality_test_fixed_bases(n):
    """
    使用固定的基底集合进行费马小定理素性测试。
    n: 需要测试的数。
    返回True表示n可能是质数，False表示n不是质数。
    """
    if n <= 1 or n == 4:  # 显然不是质数的情况
        return False
    if n <= 3:  # 小于等于3的正整数都是质数
        return True
    
    bases = [2, 3]  # 固定的基底集合
    for a in bases:
        if power_mod(a, n - 1, n) != 1:
            return False
    return True

# 示例调用
n = 1165
print(f"数字{n} {'可能是质数' if fermat_primality_test_fixed_bases(n) else '不是质数'}")

数字1165 不是质数


In [15]:
def test_carmichael_numbers():
    carmichael_numbers = list(set(range(10000))- set(primes_1w))
    for n in carmichael_numbers:
        result = fermat_primality_test_fixed_bases(n)
        if  result: 
            print(f"数字{n} {'可能是质数' if result else '不是质数'}")

# 调用上面定义的fermat_primality_test_fixed_bases函数进行测试
test_carmichael_numbers()

数字1105 可能是质数
数字1729 可能是质数
数字2465 可能是质数
数字2701 可能是质数
数字2821 可能是质数
数字6601 可能是质数
数字8911 可能是质数


## 汇率计算
正常情况是获得实时汇率，但是没有网的话实时汇率全都是nan；可以获取基础汇率。设置一些情况允许使用估算汇率

In [128]:
import random

# 设置固定的随机种子以保证结果可复现
random.seed(42)

# 假设的货币及其之间的汇率（使用代指货币A、B、C、D、E）
exchange_rates = {
    'A': {'B': 0.85, 'C': 110.25, 'D': 0.75},
    'B': {'A': 1.18, 'C': 130.00, 'D': 0.88},
    'C': {'A': 0.0091, 'B': 0.0077, 'D': 0.0068},
    'D': {'A': 1.34, 'B': 1.14, 'C': 147.06}
    # 添加更多货币和汇率...
}

def get_base_exchange_rate(from_currency, to_currency):
    """
    获取两个货币之间的汇率。
    
    参数:
        from_currency (str): 来源货币代码
        to_currency (str): 目标货币代码
        
    返回:
        float: 汇率
    """
    base_rate = exchange_rates[from_currency][to_currency]
    return base_rate

def get_realtime_exchange_rate(from_currency, to_currency):
    """
    获取两个货币之间的实时汇率。
    
    参数:
        from_currency (str): 来源货币代码
        to_currency (str): 目标货币代码
        
    返回:
        float: 实时汇率
    """
    base_rate = exchange_rates[from_currency][to_currency]
    fluctuation = random.uniform(-0.05, 0.05) * base_rate  # 随机波动范围为±5%
    real_time_rate = base_rate + fluctuation
    return real_time_rate

print(f"查询到的基础汇率: {get_base_exchange_rate('A', 'B'):.4f}")
print(f"查询到的实时汇率: {get_realtime_exchange_rate('A', 'B'):.4f}")

查询到的基础汇率: 0.8500
查询到的实时汇率: 0.8619


## 文件操作
文件检索可能出问题的地方：加密文件；权限不足；文件格式不支持；文件过大无法移动/复制

对应的解决办法：文件解密；提高权限；文件格式转换；文件切分与合并

文件移动是简单的

In [4]:
from pydantic import BaseModel, Field
from datetime import datetime
import shutil
import os

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str
    
    def copy_file(self, new_path: str):
        """Simulate copying a file to a new path."""
        print(f"Copying {self.file_name} to {new_path}")
        # In real application, you would use shutil.copy or similar here.
        new_instance = self.model_copy(update={'absolute_path': new_path})
        return new_instance
    
    def move_file(self, new_path: str):
        """Simulate moving a file to a new path."""
        print(f"Moving {self.file_name} to {new_path}")
        # Update the absolute path of this file instance
        updated_fields = {'absolute_path': new_path}
        return self.model_copy(update=updated_fields)
    
    def rename_file(self, new_name: str):
        """Rename the file."""
        print(f"Renaming {self.file_name} to {new_name}")
        return self.model_copy(update={'file_name': new_name})
    
    def split_file(self, split_size: int):
        """Simulate splitting the file into multiple parts."""
        print(f"Splitting {self.file_name} into parts of {split_size} bytes.")
        # This is just a simulation; actual implementation may vary.
        pass
    
    def merge_files(self, files_to_merge):
        """Simulate merging multiple files into one."""
        print(f"Merging {self.file_name} with {files_to_merge}")
        # This is just a simulation; actual implementation may vary.
        pass

# Example usage:
if __name__ == "__main__":
    file = FileSim(file_name="example.txt", size=1024, absolute_path="/user/home/example.txt")
    file_moved = file.move_file("/user/documents/")
    print(file_moved)
    file_renamed = file.rename_file("new_example.txt")
    file_copied = file.copy_file("/backup/example.txt")
    file.split_file(512)
    file.merge_files(["/path/to/file1", "/path/to/file2"])

Moving example.txt to /user/documents/
file_name='example.txt' size=1024 created_at=datetime.datetime(2025, 2, 16, 12, 10, 6, 635952) encrypted=False absolute_path='/user/documents/'
Renaming example.txt to new_example.txt
Copying example.txt to /backup/example.txt
Splitting example.txt into parts of 512 bytes.
Merging example.txt with ['/path/to/file1', '/path/to/file2']


In [3]:
def normalize_path(path: str) -> str:
    path = path.replace("\\", "/")  # 统一使用正斜杠
    parts = [p for p in path.split("/") if p.strip() != ""]  # 拆分并过滤空部分
    normalized = "/" + "/".join(parts)  # 组合为绝对路径
    if len(normalized) > 1 and normalized.endswith("/"):
        normalized = normalized[:-1]  # 移除末尾斜杠（根目录除外）
    return normalized


from typing import List
from pydantic import BaseModel, Field
from datetime import datetime

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str

class FileSystem:
    def __init__(self):
        self.files: List[FileSim] = []

    def add_file(self, file: FileSim):
        """添加文件到文件系统"""
        self.files.append(file)

    def get_files_in_directory(
        self, directory_path: str, recursive: bool = True
    ) -> List[FileSim]:
        """获取目录下的文件（可选择是否递归子目录）"""
        normalized_dir = normalize_path(directory_path)
        matched_files = []
        for file in self.files:
            file_path = normalize_path(file.absolute_path)
            # 排除与目录同名的文件（文件不能是目录）
            if file_path == normalized_dir:
                continue
            # 检查路径是否以目录路径开头
            if file_path.startswith(normalized_dir + "/"):
                suffix = file_path[len(normalized_dir) + 1 :]
                if recursive:
                    matched_files.append(file)
                else:
                    if "/" not in suffix:  # 非递归时仅匹配直接子文件
                        matched_files.append(file)
        return matched_files
    
# 创建文件系统实例
fs = FileSystem()

# 添加文件
fs.add_file(FileSim(
    file_name="file1.txt",
    size=1024,
    absolute_path="/home/user/docs/file1.txt"
))

fs.add_file(FileSim(
    file_name="report.pdf",
    size=2048,
    absolute_path="/home/user/docs/report.pdf"
))

fs.add_file(FileSim(
    file_name="file3.txt",
    size=3072,
    absolute_path="/home/user/docs/project/file3.txt"
))

# 查询目录下的文件
docs_files = fs.get_files_in_directory("/home/user/docs", recursive=True)
print("递归查询结果:", [f.file_name for f in docs_files])
# 输出: ['file1.txt', 'report.pdf', 'file3.txt']

direct_files = fs.get_files_in_directory("/home/user/docs", recursive=False)
print("非递归查询结果:", [f.file_name for f in direct_files])
# 输出: ['file1.txt', 'report.pdf']

递归查询结果: ['file1.txt', 'report.pdf', 'file3.txt']
非递归查询结果: ['file1.txt', 'report.pdf']


In [4]:
def normalize_path(path: str) -> str:
    path = path.replace("\\", "/")  # 统一使用正斜杠
    parts = [p for p in path.split("/") if p.strip() != ""]  # 拆分并过滤空部分
    normalized = "/" + "/".join(parts)  # 组合为绝对路径
    if len(normalized) > 1 and normalized.endswith("/"):
        normalized = normalized[:-1]  # 移除末尾斜杠（根目录除外）
    return normalized


from typing import List, Optional
from pydantic import BaseModel, Field
from datetime import datetime

class FileSim(BaseModel):
    file_name: str
    size: int = Field(..., description="File size in bytes")
    created_at: datetime = Field(default_factory=datetime.now)
    encrypted: bool = False
    absolute_path: str

class FileSystem:
    def __init__(self):
        self.files: List[FileSim] = []

    def add_file(self, file: FileSim):
        """添加文件到文件系统"""
        self.files.append(file)

    def get_files_in_directory(
        self, directory_path: str, recursive: bool = True
    ) -> List[FileSim]:
        """获取目录下的文件（可选择是否递归子目录）"""
        normalized_dir = normalize_path(directory_path)
        matched_files = []
        for file in self.files:
            file_path = normalize_path(file.absolute_path)
            # 排除与目录同名的文件（文件不能是目录）
            if file_path == normalized_dir:
                continue
            # 检查路径是否以目录路径开头
            if file_path.startswith(normalized_dir + "/"):
                suffix = file_path[len(normalized_dir) + 1 :]
                if recursive:
                    matched_files.append(file)
                else:
                    if "/" not in suffix:  # 非递归时仅匹配直接子文件
                        matched_files.append(file)
        return matched_files
    
    def encrypt_file(self, file_path: str) -> bool:
        """加密文件（添加.enc后缀并标记加密状态）"""
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if not file.encrypted:
                    # 分割目录和文件名
                    path_parts = file.absolute_path.rsplit("/", 1)
                    if len(path_parts) == 1:
                        dir_path = ""
                        original_name = path_parts[0]
                    else:
                        dir_path, original_name = path_parts
                    # 生成新文件名和路径
                    new_name = original_name + ".enc"
                    new_path = f"{dir_path}/{new_name}" if dir_path else f"/{new_name}"
                    new_path = normalize_path(new_path)
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == new_path for f in self.files):
                        return False
                    # 更新文件属性
                    file.file_name = new_name
                    file.absolute_path = new_path
                    file.encrypted = True
                    return True
        return False

    def decrypt_file(self, file_path: str) -> bool:
        """解密文件（移除.enc后缀并清除加密标记）"""
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if file.encrypted and file.file_name.endswith(".enc"):
                    # 分割目录和文件名
                    path_parts = file.absolute_path.rsplit("/", 1)
                    if len(path_parts) == 1:
                        dir_path = ""
                        current_name = path_parts[0]
                    else:
                        dir_path, current_name = path_parts
                    # 恢复原始文件名
                    original_name = current_name[:-4]
                    new_path = f"{dir_path}/{original_name}" if dir_path else f"/{original_name}"
                    new_path = normalize_path(new_path)
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == new_path for f in self.files):
                        return False
                    # 更新文件属性
                    file.file_name = original_name
                    file.absolute_path = new_path
                    file.encrypted = False
                    return True
        return False

    def split_file(self, file_path: str, chunk_size: int) -> bool:
        """拆分文件为多个小文件"""
        if chunk_size <= 0:
            return False
        
        normalized_path = normalize_path(file_path)
        for file in self.files:
            if normalize_path(file.absolute_path) == normalized_path:
                if file.size <= chunk_size:
                    return False  # 无需拆分
                
                # 计算分块数量和大小
                total_chunks = file.size // chunk_size
                remainder = file.size % chunk_size
                if remainder > 0:
                    total_chunks += 1
                
                # 获取基础路径信息
                base_name = file.file_name
                path_parts = file.absolute_path.rsplit("/", 1)
                dir_path = path_parts[0] if len(path_parts) > 1 else ""
                
                # 创建分块文件
                for i in range(total_chunks):
                    chunk_num = i + 1
                    chunk_name = f"{base_name}_part{chunk_num}"
                    chunk_path = f"{dir_path}/{chunk_name}" if dir_path else f"/{chunk_name}"
                    chunk_path = normalize_path(chunk_path)
                    
                    # 检查路径冲突
                    if any(normalize_path(f.absolute_path) == chunk_path for f in self.files):
                        return False
                    
                    # 计算分块大小
                    chunk_size_real = chunk_size if (i < total_chunks - 1 or remainder == 0) else remainder
                    
                    # 创建分块文件
                    chunk_file = FileSim(
                        file_name=chunk_name,
                        size=chunk_size_real,
                        absolute_path=chunk_path,
                        encrypted=file.encrypted,
                        created_at=datetime.now()
                    )
                    self.add_file(chunk_file)
                return True
        return False

    def merge_files(self, target_path: str, source_paths: List[str]) -> bool:
        """合并多个文件为一个新文件"""
        target_normalized = normalize_path(target_path)
        # 检查目标文件是否存在
        if any(normalize_path(f.absolute_path) == target_normalized for f in self.files):
            return False
        
        # 验证并收集源文件
        source_files = []
        total_size = 0
        for path in source_paths:
            path_normalized = normalize_path(path)
            file = next((f for f in self.files if normalize_path(f.absolute_path) == path_normalized), None)
            if not file:
                return False
            source_files.append(file)
            total_size += file.size
        
        # 创建合并文件
        merged_file = FileSim(
            file_name=target_normalized.split("/")[-1],
            size=total_size,
            absolute_path=target_normalized,
            encrypted=all(f.encrypted for f in source_files),
            created_at=datetime.now()
        )
        self.add_file(merged_file)
        return True

    def delete_file(self, file_path: str) -> bool:
        """删除指定文件"""
        normalized_path = normalize_path(file_path)
        for i in range(len(self.files)):
            if normalize_path(self.files[i].absolute_path) == normalized_path:
                del self.files[i]
                return True
        return False

In [8]:
# 初始化文件系统
fs = FileSystem()

# 添加测试文件
fs.add_file(FileSim(
    file_name="data.bin",
    size=5000,
    absolute_path="/storage/data.bin"
))
print(fs.files)

# 加密文件
fs.encrypt_file("/storage/data.bin")
print(fs.files)

# 拆分文件（每个分块2000字节）
fs.split_file("/storage/data.bin.enc", 2000)
print(fs.files)

# 合并分块文件
fs.merge_files(
    "/storage/merged.bin",
    ["/storage/data.bin.enc_part1", "/storage/data.bin.enc_part2", "/storage/data.bin.enc_part3"]
)
print(fs.files)

# 删除原始加密文件
fs.delete_file("/storage/data.bin.enc")
print(fs.files)


[FileSim(file_name='data.bin', size=5000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432134), encrypted=False, absolute_path='/storage/data.bin')]
[FileSim(file_name='data.bin.enc', size=5000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432134), encrypted=True, absolute_path='/storage/data.bin.enc')]
[FileSim(file_name='data.bin.enc', size=5000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432134), encrypted=True, absolute_path='/storage/data.bin.enc'), FileSim(file_name='data.bin.enc_part1', size=2000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432552), encrypted=True, absolute_path='/storage/data.bin.enc_part1'), FileSim(file_name='data.bin.enc_part2', size=2000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432581), encrypted=True, absolute_path='/storage/data.bin.enc_part2'), FileSim(file_name='data.bin.enc_part3', size=1000, created_at=datetime.datetime(2025, 2, 16, 19, 42, 37, 432597), encrypted=True, absolute_path='/storage/data.b

In [11]:
import pandas as pd

# 读取 Parquet 文件
df = pd.read_parquet('/data02/pretrained_model/EvalData/lmms-lab/VideoMMMU/Comprehension/test-00000-of-00001.parquet')

# 显示前5行数据
print(df.head())

# 显示数据维度
print(f"数据集形状: {df.shape}")

# 显示列名
print(f"列名: {df.columns.tolist()}")

                   id                                           question  \
0       dev_Biology_3  What will be the name of the two compounds sho...   
1     dev_Geography_5  If N_s is increased from 120 turns to 150 turn...   
2    test_Geography_8  In the problem at the end of the video, what i...   
3   test_Geography_14  Based on the video, which of the following sta...   
4  test_Geography_114  Evaluate the following statements based on the...   

                                             options answer  \
0  [left: Aldotetrose, right: Ketotetrose, left: ...      F   
1  [0.210 A, 0.230 A, 0.150 A, 0.300 A, 0.175 A, ...      J   
2  [500 J, 520 J, 540 J, 560 J, 480 J, 580 J, 600...      E   
3  [1, 2, and 5 are correct., 3 and 4 are correct...      F   
4  [Only Statements 1, 2, and 3 are correct, Stat...      C   

                                 link_selected    question_type  
0  https://www.youtube.com/watch?v=U7YqCUd_ZfA  multiple-choice  
1  https://www.youtube.com/watch