In [2]:
!unzip ./dataset.zip

Archive:  ./dataset.zip
   creating: dataset/
   creating: dataset/angry/
  inflating: dataset/angry/1.png     
  inflating: dataset/angry/202412221.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-27-14.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-27-55.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-28-49.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-53-10.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-53-58.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-54-20.png  
  inflating: dataset/angry/Snipaste_2024-12-21_20-54-32.png  
  inflating: dataset/angry/Snipaste_2024-12-21_21-01-06.png  
  inflating: dataset/angry/Snipaste_2024-12-21_21-07-26.png  
  inflating: dataset/angry/Snipaste_2024-12-21_21-10-46.png  
  inflating: dataset/angry/Snipaste_2024-12-21_21-11-07.png  
  inflating: dataset/angry/Snipaste_2024-12-21_21-11-27.png  
  inflating: dataset/angry/Snipaste_2024-12-21_21-11-41.png  
  inflating: dataset/angry/Snipaste_202

In [1]:
!rm -rf ./dataset/

In [3]:
import os
import uuid
import logging
from pathlib import Path

def setup_logging(log_file='rename_log.txt'):
    """
    设置日志记录配置。
    """
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_file, mode='w', encoding='utf-8'),
            logging.StreamHandler()
        ]
    )

def is_supported_image(file_extension):
    """
    检查文件扩展名是否为支持的图像格式。
    
    :param file_extension: 文件的扩展名（小写）
    :return: 布尔值，是否支持
    """
    supported_extensions = {'.png', '.jpg' }
    return file_extension in supported_extensions

def generate_unique_filename(directory, extension):
    """
    生成一个唯一的文件名，确保在目标目录中不存在同名文件。
    
    :param directory: 目标目录路径
    :param extension: 文件扩展名（包括点号，如 '.png'）
    :return: 唯一的文件名（字符串）
    """
    while True:
        unique_id = str(uuid.uuid4())
        new_filename = f"{unique_id}{extension}"
        new_file_path = os.path.join(directory, new_filename)
        if not os.path.exists(new_file_path):
            return new_filename

def rename_files_recursive(root_directory):
    """
    递归地重命名指定目录及其子目录中的所有支持的图像文件。
    
    :param root_directory: 根目录路径，例如 'dataset/'
    """
    # 确保根目录存在
    if not os.path.isdir(root_directory):
        logging.error(f"目录不存在: {root_directory}")
        return

    # 使用 Path 对象处理路径
    root_path = Path(root_directory)

    # 遍历目录及其子目录
    for file_path in root_path.rglob('*'):
        # 仅处理文件
        if file_path.is_file():
            # 获取文件扩展名并转换为小写
            extension = file_path.suffix.lower()
            
            # 检查是否为支持的图像文件
            if is_supported_image(extension):
                try:
                    # 生成唯一的文件名
                    new_filename = generate_unique_filename(file_path.parent, extension)
                    
                    # 构建新的文件路径
                    new_file_path = file_path.parent / new_filename
                    
                    # 重命名文件
                    file_path.rename(new_file_path)
                    
                    # 记录成功的重命名操作
                    logging.info(f"已重命名: {file_path} -> {new_file_path}")
                except Exception as e:
                    # 记录重命名失败的信息
                    logging.error(f"重命名失败: {file_path}. 错误: {e}")
            else:
                # 记录跳过不支持的文件类型
                logging.info(f"跳过不支持的文件类型: {file_path}")
        else:
            # 记录跳过子目录
            logging.info(f"跳过子目录: {file_path}")

def main():
    """
    主函数，设置日志并执行重命名操作。
    """
    # 设置日志记录
    setup_logging()

    # 设置根目录路径（请根据实际情况修改）
    root_directory = "dataset/"  # 例如: "path/to/your/dataset/"

    # 开始重命名操作
    logging.info(f"开始重命名操作，目标目录: {root_directory}")
    rename_files_recursive(root_directory)
    logging.info("重命名操作完成。")

if __name__ == "__main__":
    main()


2024-12-22 10:52:13,364 - INFO - 开始重命名操作，目标目录: dataset/
2024-12-22 10:52:13,365 - INFO - 跳过子目录: dataset/angry
2024-12-22 10:52:13,365 - INFO - 跳过子目录: dataset/disgust
2024-12-22 10:52:13,366 - INFO - 跳过子目录: dataset/fear
2024-12-22 10:52:13,366 - INFO - 跳过子目录: dataset/happy
2024-12-22 10:52:13,367 - INFO - 跳过子目录: dataset/neutral
2024-12-22 10:52:13,367 - INFO - 跳过子目录: dataset/sad
2024-12-22 10:52:13,368 - INFO - 跳过子目录: dataset/surprise
2024-12-22 10:52:13,370 - INFO - 已重命名: dataset/angry/1.png -> dataset/angry/d02854af-302c-482e-bbbd-aab7861aa673.png
2024-12-22 10:52:13,370 - INFO - 已重命名: dataset/angry/202412221.png -> dataset/angry/3bfbf328-38ef-4132-b382-68d51d2137a2.png
2024-12-22 10:52:13,371 - INFO - 已重命名: dataset/angry/Snipaste_2024-12-21_20-27-14.png -> dataset/angry/2730cc76-be3e-4ab0-b499-97167e32996a.png
2024-12-22 10:52:13,372 - INFO - 已重命名: dataset/angry/Snipaste_2024-12-21_20-27-55.png -> dataset/angry/37f1ef66-5b61-4675-9fd2-239a8fc0823f.png
2024-12-22 10:52:13,373 - INFO -

In [4]:
!zip -r dataset.zip dataset/

/bin/bash: zip: command not found


In [6]:
import os
import zipfile

def zip_directory(folder_path, output_path):
    """
    压缩指定的文件夹为一个 zip 文件。

    :param folder_path: 要压缩的文件夹路径，例如 'dataset/'
    :param output_path: 压缩后的 zip 文件路径，例如 'dataset.zip'
    """
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # 获取文件的完整路径
                file_path = os.path.join(root, file)
                # 计算相对路径以保持文件夹结构
                relative_path = os.path.relpath(file_path, os.path.dirname(folder_path))
                zipf.write(file_path, relative_path)
    print(f"文件夹 '{folder_path}' 已成功压缩为 '{output_path}'")

if __name__ == "__main__":
    folder_to_zip = "dataset/"        # 要压缩的文件夹路径
    zip_file_output = "dataset.zip"   # 压缩后的文件名
    zip_directory(folder_to_zip, zip_file_output)


文件夹 'dataset/' 已成功压缩为 'dataset.zip'
