In [2]:
from __future__ import annotations

import shutil
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Optional, Tuple


@dataclass(frozen=True)
class CopySettings:
    """
    Settings for copying experiment outputs into a normalized report folder structure.
    """
    ls_folder_output: List[Path]
    folder_store: Path

    # If True, preserve each source folder in its own namespace:
    # folder_store/<source_name>/noise_x/alpha_y/...
    # If False, merge directly into folder_store/noise_x/alpha_y/...
    namespace_by_source: bool = False

    # If True, allow overwriting existing files in destination.
    overwrite: bool = True

    # If True, print actions but do not copy anything.
    dry_run: bool = True


def _iter_experiment_files(folder_output: Path) -> Iterable[Tuple[Path, Path]]:
    """
    Yield (src_path, relative_path_under_output) pairs for files we care about, relative to folder_output.

    Expected patterns:
      - noise_*/alpha_*/experiment_summary.csv
      - noise_*/alpha_*/iteration_*/train_kept_*.csv
    """
    # experiment_summary.csv
    for src in folder_output.glob("noise_*/alpha_*/experiment_summary.csv"):
        yield src, src.relative_to(folder_output)

    # train_kept_{i}.csv under iteration folders
    for src in folder_output.glob("noise_*/alpha_*/iteration_*/train_kept_*.csv"):
        yield src, src.relative_to(folder_output)


def _safe_copy_file(
    src: Path,
    dst: Path,
    overwrite: bool,
    dry_run: bool,
) -> None:
    """
    Copy src -> dst, creating parent directories if needed.
    Uses shutil.copy2 to preserve timestamps/metadata.
    """
    dst.parent.mkdir(parents=True, exist_ok=True)

    if dst.exists() and not overwrite:
        # Skip to avoid silent overwrites
        return

    if dry_run:
        print(f"[DRY-RUN] COPY: {src} -> {dst}")
        return

    shutil.copy2(src, dst)


def copy_experiment_outputs(settings: CopySettings) -> None:
    """
    Copy all relevant experiment output files from ls_folder_output into folder_store
    while preserving the expected folder structure.

    Default behavior namespaces by source folder name to avoid collisions:
      folder_store/<source_name>/noise_*/alpha_*/...
    """
    folder_store = settings.folder_store.resolve()
    folder_store.mkdir(parents=True, exist_ok=True)

    for folder_output in settings.ls_folder_output:
        folder_output = folder_output.resolve()
        if not folder_output.exists() or not folder_output.is_dir():
            print(f"[WARN] Skip (not found or not dir): {folder_output}")
            continue

        source_name = folder_output.name
        base_dst = folder_store / source_name if settings.namespace_by_source else folder_store

        copied = 0
        skipped = 0

        for src, rel in _iter_experiment_files(folder_output):
            dst = base_dst / rel

            if dst.exists() and not settings.overwrite:
                skipped += 1
                continue

            _safe_copy_file(
                src=src,
                dst=dst,
                overwrite=settings.overwrite,
                dry_run=settings.dry_run,
            )
            copied += 1

        print(
            f"[OK] {folder_output} -> {base_dst} | copied={copied} skipped={skipped}"
        )


if __name__ == "__main__":
    # ====== INPUT SETTINGS (edit here) ======
    # Provide one or multiple output folders from your experiments:
    ls_folder_output = [
        # Example:
        "/mnt/c/Users/truon/learning/ptit/research/trung/M_10_01_2025/code_v2/project/notebooks/cifar10_iter_ema_noise_validation",
        "/mnt/d/code_v2/project/notebooks/cifar10_iter_ema_noise_validation",
        # r"/mnt/c/Users/truon/learning/ptit/research/trung/M_10_01_2025/code_v2/project/notebooks",
    ]

    # Destination folder to store normalized structure for reporting:
    folder_store = "store_output_cifar10_iter_ema_noise_validation_v2"

    settings = CopySettings(
        ls_folder_output=[Path(p) for p in ls_folder_output],
        folder_store=Path(folder_store),

        # Safer default: avoid collisions across multiple runs
        namespace_by_source=False,

        # Set True only if you really want to overwrite existing files
        overwrite=True,

        # Set True first to verify what will be copied
        dry_run=False,
    )
    # =======================================

    copy_experiment_outputs(settings)


[OK] /mnt/c/Users/truon/learning/ptit/research/trung/M_10_01_2025/code_v2/project/notebooks/cifar10_iter_ema_noise_validation -> /mnt/c/Users/truon/learning/ptit/research/trung/M_10_01_2025/code_v2/project/notebooks/store_output_cifar10_iter_ema_noise_validation_v2 | copied=185 skipped=0
[OK] /mnt/d/code_v2/project/notebooks/cifar10_iter_ema_noise_validation -> /mnt/c/Users/truon/learning/ptit/research/trung/M_10_01_2025/code_v2/project/notebooks/store_output_cifar10_iter_ema_noise_validation_v2 | copied=190 skipped=0
