In [1]:
#!/usr/bin/env python3
"""
clean_CSVMP4.py

"""
import sys
from pathlib import Path

def clean_folder(csv_dir: Path, video_dir: Path):
    """
    csv_dir, video_dir 폴더의 CSV/MP4 파일을 1:1 매칭하여
    대응되지 않는 파일을 삭제합니다.
    """
    print(f"-- Cleaning: {csv_dir.parent.name} --")
    if not csv_dir.is_dir():
        print(f"Error: {csv_dir} 폴더가 없습니다.")
        return
    if not video_dir.is_dir():
        print(f"Error: {video_dir} 폴더가 없습니다.")
        return

    ext_csv = ".csv"
    ext_video = ".mp4"

    csv_files = list(csv_dir.glob(f"*{ext_csv}"))
    video_files = list(video_dir.glob(f"*{ext_video}"))

    csv_stems = {f.stem for f in csv_files}
    video_stems = {f.stem for f in video_files}

    unmatched_csv = csv_stems - video_stems
    unmatched_video = video_stems - csv_stems

    for stem in unmatched_csv:
        path = csv_dir / f"{stem}{ext_csv}"
        try:
            path.unlink()
            print(f"Removed unmatched CSV: {path}")
        except Exception as e:
            print(f"Failed to remove {path}: {e}")

    for stem in unmatched_video:
        path = video_dir / f"{stem}{ext_video}"
        try:
            path.unlink()
            print(f"Removed unmatched video: {path}")
        except Exception as e:
            print(f"Failed to remove {path}: {e}")

    print(f"Completed: {csv_dir.parent.name}\n")


def main():
    # test 폴더로 청소, train 원하면 train으로 변경
    base_dir = Path(r"D:\golfDataset\dataset\train")
    # 상위 폴더 리스트 (balanced_true, false)
    parents = ["balanced_true", "false"]

    for parent in parents:
        csv_dir = base_dir / parent / "crop_keypoint"
        video_dir = base_dir / parent / "crop_video"
        clean_folder(csv_dir, video_dir)

    print("All cleaning tasks finished.")

if __name__ == "__main__":
    main()


-- Cleaning: balanced_true --
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201118_General_014_DOC_A_M40_MT_016_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201120_General_024_NOC_A_M40_MM_063_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201127_General_056_DOC_A_F20_MM_025_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201117_General_009_DOC_A_M40_SM_015_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201127_General_052_DOS_A_F30_MM_054_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201123_General_032_DOS_A_F30_MM_063_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\20201117_General_010_DOC_A_M40_MM_031_crop.csv
Removed unmatched CSV: D:\golfDataset\dataset\train\balanced_true\crop_keypoint\2