In [1]:
import os
import sleap
import pandas as pd
from pathlib import Path

In [2]:
# Define the age groups and versions
groups = ["sorghum", "soybean"]
versions = ["000", "001", "002"]

# Base path
base_path = "D:/SLEAP/20250102_generalizability_experiment/lateral"

output_dir = "sorghum_soybean"

In [3]:
split_types = ["train", "val", "test"]

# Create the main new directory and subdirectories
new_main_dir = Path(base_path) / output_dir
for version in versions:
    (new_main_dir / f"train_test_split.v{version}").mkdir(exist_ok=True, parents=True)

# Data structure to store summary information
summary_data = []

# Iterate over each version and split type
for version in versions:
    for split_type in split_types:
        merged_labels = None
        frame_counts = {}
        # Merge labels from each age group and count frames
        for group in groups:
            file_path = Path(f"{base_path}/{group}/train_test_split.v{version}/{split_type}.pkg.slp")
            try:
                labels = sleap.load_file(file_path.as_posix())
                print(f"Loaded: {file_path}")
                frame_counts[group] = len(labels)

                if merged_labels is None:
                    merged_labels = labels
                else:
                    merged_labels.extend_from(labels, unify=True)
            except FileNotFoundError:
                print(f"File not found: {file_path}")
                frame_counts[group] = 0  # Assign 0 frames if file is missing

        # Save the merged labels to the new directory
        save_path = new_main_dir / f"train_test_split.v{version}/{split_type}.pkg.slp"
        if merged_labels:
            merged_labels.save(save_path, with_images=True)
            print(f"Merged: {save_path}")
        else:
            print(f"No labels to save for {split_type} in version {version}. Skipping.")
            continue

        # Append summary info dynamically based on groups
        summary_entry = {
            'path': save_path.as_posix(),
            'version': version,
            'labeled_frames': len(merged_labels) if merged_labels else 0,
            'split_type': split_type,
        }
        for group in groups:
            summary_entry[group] = frame_counts.get(group, 0)  # Dynamically add group counts
        summary_data.append(summary_entry)

# Create a DataFrame and save as CSV
df_summary = pd.DataFrame(summary_data)
summary_csv = new_main_dir / 'train_test_splits.csv'
df_summary.to_csv(summary_csv, index=False)

print(f"Summary saved to {summary_csv}")


Loaded: D:\SLEAP\20250102_generalizability_experiment\lateral\sorghum\train_test_split.v000\train.pkg.slp
Loaded: D:\SLEAP\20250102_generalizability_experiment\lateral\soybean\train_test_split.v000\train.pkg.slp
Merged: D:\SLEAP\20250102_generalizability_experiment\lateral\sorghum_soybean\train_test_split.v000\train.pkg.slp
Loaded: D:\SLEAP\20250102_generalizability_experiment\lateral\sorghum\train_test_split.v000\val.pkg.slp
Loaded: D:\SLEAP\20250102_generalizability_experiment\lateral\soybean\train_test_split.v000\val.pkg.slp
Merged: D:\SLEAP\20250102_generalizability_experiment\lateral\sorghum_soybean\train_test_split.v000\val.pkg.slp
Loaded: D:\SLEAP\20250102_generalizability_experiment\lateral\sorghum\train_test_split.v000\test.pkg.slp
Loaded: D:\SLEAP\20250102_generalizability_experiment\lateral\soybean\train_test_split.v000\test.pkg.slp
Merged: D:\SLEAP\20250102_generalizability_experiment\lateral\sorghum_soybean\train_test_split.v000\test.pkg.slp
Loaded: D:\SLEAP\20250102_gener