In [5]:
import os
import glob
import pandas as pd
from PIL import Image

def get_image_by_prefix(directory, prefix):
    """
    Return the first file in directory that starts with prefix and has a .png extension.
    If there are more than one, print a warning.

    Args:
        directory (str): Directory path where to search.
        prefix (str): File name prefix to look for.

    Returns:
        selected_file (str or None): The selected file path, or None if none found.
        file_list (list): List of all matching files.
    """
    pattern = os.path.join(directory, f"{prefix}*.png")
    files = glob.glob(pattern)

    if not files:
        return None, []
    if len(files) > 1:
        print(f"Warning: Found multiple files starting with '{prefix}' in {directory}. Files: {files}")
    return files[0], files

def save_overlay_images_from_csv(csv_file,
                                 dest_dir,
                                 include_filter=None,
                                 exclude_filter=None):
    """
    Reads session paths from a CSV file (with a column named 'path'), then for each session:
      - Navigates to its 'MIR_Aligned' subdirectory.
      - Searches for an overlay image (filename starting with 'overlay').
      - If multiple overlays are found, tries to disambiguate using the predictions file signature.
      - Saves each selected overlay to dest_dir under a unique name.

    Args:
        csv_file (str): Path to the CSV file containing a 'path' column.
        dest_dir (str): Directory where overlay images will be saved.
        include_filter (list of str, optional): If provided, only session paths containing all these keywords are processed.
        exclude_filter (list of str, optional): If provided, any session path containing any of these keywords is skipped.
    """
    # Load session paths from the CSV file
    try:
        df = pd.read_csv(csv_file)
    except Exception as e:
        print(f"Error reading CSV file {csv_file}: {e}")
        return

    if 'path' not in df.columns:
        print("CSV does not contain a 'path' column.")
        return

    session_paths = df['path'].tolist()

    # Ensure destination directory exists
    os.makedirs(dest_dir, exist_ok=True)

    for sp in session_paths:
        # Apply include filtering: all keywords in include_filter must be in the path.
        if include_filter and not all(keyword in sp for keyword in include_filter):
            continue

        # Apply exclude filtering: if any keyword in exclude_filter is found in the path, skip.
        if exclude_filter and any(keyword in sp for keyword in exclude_filter):
            continue

        align_dir = os.path.join(sp, "MIR_Aligned")
        if not os.path.isdir(align_dir):
            print(f"Directory not found: {align_dir}. Skipping session: {sp}")
            continue

        # Find overlay image(s)
        overlay_file, overlays = get_image_by_prefix(align_dir, "overlay")

        # If there are multiple overlays, try to disambiguate using the predictions file signature
        if overlays and len(overlays) > 1:
            preds = (
                glob.glob(os.path.join(align_dir, "aligned_predictions*.[hH]5")) +
                glob.glob(os.path.join(align_dir, "aligned_predictions*.[nN]c"))
            )
            if preds:
                if len(preds) > 1:
                    non_default = [p for p in preds if "default" not in p.lower()]
                    predictions_file = non_default[0] if non_default else preds[0]
                else:
                    predictions_file = preds[0]

                basename = os.path.basename(predictions_file)
                if "F_" in basename:
                    signature = basename.rsplit("F_", 1)[1].rsplit('.', 1)[0]
                    candidate = None
                    for f in overlays:
                        base_overlay = os.path.basename(f)
                        if base_overlay.startswith("overlay_") and base_overlay.endswith(".png"):
                            candidate_sig = base_overlay[len("overlay_"):-len(".png")]
                            if candidate_sig == signature:
                                candidate = f
                                break
                    if candidate:
                        overlay_file = candidate
                    else:
                        print(f"Warning: No overlay matches signature '{signature}'. Using first overlay.")
                else:
                    print(f"Warning: Predictions file '{basename}' lacks 'F_'. Using first overlay.")

        if not overlay_file:
            found = overlays if overlays else "None"
            print(f"No overlay found in {align_dir}. Found: {found}. Skipping.")
            continue

        # Load and save the overlay image
        try:
            overlay_img = Image.open(overlay_file)
        except Exception as e:
            print(f"Error loading overlay in {align_dir}: {e}. Skipping.")
            continue

        # Construct a unique filename for saving
        session_label = os.path.basename(os.path.normpath(sp))
        overlay_basename = os.path.basename(overlay_file)
        save_name = f"{session_label}__{overlay_basename}"
        save_path = os.path.join(dest_dir, save_name)

        try:
            overlay_img.save(save_path, format='PNG')
        except Exception as e:
            print(f"Error saving overlay '{overlay_file}' to '{save_path}': {e}")
            continue

        print(f"Saved overlay for session '{session_label}' to '{save_path}'")

if __name__ == '__main__':
    csv_file = "/home/lq53/mir_repos/BBOP/random_tests/25mar_minibbop_integration/250331_sum_aligned_good_path.csv" #"/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/socialcom_3.csv"
    destination_folder = "/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles"
    include_keys = []   # e.g., ["v1"]
    exclude_keys = []   # e.g., ["bad_session"]

    save_overlay_images_from_csv(
        csv_file,
        dest_dir=destination_folder,
        include_filter=include_keys,
        exclude_filter=exclude_keys
    )


Saved overlay for session '20240916v1r1_16_37' to '/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles/20240916v1r1_16_37__overlay_plot.png'
Saved overlay for session '20240916v1r1_16_53' to '/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles/20240916v1r1_16_53__overlay_plot.png'
Saved overlay for session '20240916v1r2_14_30' to '/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles/20240916v1r2_14_30__overlay_plot.png'
Saved overlay for session '20240916v1r2_15_58' to '/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles/20240916v1r2_15_58__overlay_plot.png'
Saved overlay for session '20240819V1r1_13_41' to '/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles/20240819V1r1_13_41__overlay_wnd1500_stp700_max25_diff3.5_pnr1.1.png'
Saved overlay for session '20240819V1r1_14_25' to '/home/lq53/mir_repos/BBOP/random_tests/25may_social_try/overlays_singles/20240819V1r1_14_25__overlay_wnd1000