In [1]:
import os
import pandas as pd
import sleap
import numpy as np
import random
import re
import matplotlib.pyplot as plt

from pathlib import Path

In [2]:
# labels path
labels_path = "D:/SLEAP/20250102_generalizability_experiment/lateral/canola/labels_canola_lateral_3nodes.v014.slp" # multiple crops

# Directory output path to save the labels of the first crop
output_path = "D:/SLEAP/20250102_generalizability_experiment/lateral/pennycress"

# Directory output path to save the labels of the second crop
output_path2 = "D:/SLEAP/20250102_generalizability_experiment/lateral/canola"

In [3]:
# 'D:/SLEAP/SLEAP_Canola_Pennycress/primary_root/h5_files_for_PR_sleap_project/9_do/33PFABDMTW.h5'
# Z:/Experiments/Canola/CYL_Canola_Diversity_Screen/Downstream_Data_Extraction_and_Analysis/4-19-23_data_v000_not_qc_not_proofread/H5_w_SLEAP_predictions_preqc_preproofread/W22/2do/HMDQXXWRSQ.h5
# D:\SLEAP\SLEAP_Canola_Pennycress\primary_root\h5_files_for_PR_sleap_project\pennycress\14_do
# D:/SLEAP/SLEAP_arabidopsis/h5_files_7dap_041522/1027_D_R1.h5

def separate_crops(labels, match_string='pennycress'):
    """Load sleap labels and separate the crops using matches from the video filenames.
    
    Args:
        labels (sleap.Labels): Labels to separate.
        match_string (str): String to match for the first crop.
        
    Returns:
        first_crop_labels (sleap.Labels): Labels from the match.
        second_crop_labels (sleap.Labels): Rest of the labels.
    """
    first_crop_labels = []
    second_crop_labels = []
    for label in labels:
        if match_string in label.video.filename:
            first_crop_labels.append(label)
            print(f"Found {match_string} data: {label.video.filename}")
        else:
            second_crop_labels.append(label)
    first_crop_labels = sleap.Labels(first_crop_labels)
    second_crop_labels = sleap.Labels(second_crop_labels)
    print(f"Number of {match_string} crop labels: {len(first_crop_labels)}")
    print(f"Number of other crop labels: {len(second_crop_labels)}")
    return first_crop_labels, second_crop_labels


def count_frames_per_crop(labels, match_string1, match_string2, match_string3=None):
    """Count the number of frames per crop.
    
    Args:
        labels (sleap.Labels): Labels to count frames from.
        match_string1 (str): Match string for the first crop.
        match_string2 (str): Match string for the second crop.
        match_string3 (str): Match string for the third crop (if any).
        
    Returns:
        counts (dict): Dictionary with counts of frames per crop.
    """
    counts = {match_string1: 0, match_string2: 0, "missing": 0}
    if match_string3:
        counts[match_string3] = 0
    
    for label in labels:
        if match_string1 in label.video.filename:
            counts[match_string1] += 1
        elif match_string2 in label.video.filename:
            counts[match_string2] += 1
        elif match_string3 and match_string3 in label.video.filename:
            counts[match_string3] += 1
        else:
            counts['missing'] += 1
    
    return counts

In [4]:
base_labels = sleap.load_file(Path(labels_path).as_posix()) # load labels file
user_labels = base_labels.with_user_labels_only() # user instances only

In [5]:
# Make the output directory if it doesn't exist
Path(output_path).mkdir(parents=True, exist_ok=True)

In [6]:
# Print the first few items to check their structure
for label in user_labels[:5]:
    print(label)

LabeledFrame(video=HDF5Video('D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/7_do/PJHG7P9FZ8.h5'), frame_idx=0, instances=8)
LabeledFrame(video=HDF5Video('D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/7_do/9HO9HUCQEF.h5'), frame_idx=0, instances=16)
LabeledFrame(video=HDF5Video('D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/7_do/2XKMH7E4SW.h5'), frame_idx=0, instances=10)
LabeledFrame(video=HDF5Video('D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/7_do/34X56QQ58E.h5'), frame_idx=0, instances=7)
LabeledFrame(video=HDF5Video('D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/7_do/BZ3DC53ZOY.h5'), frame_idx=51, instances=6)


In [7]:
counts = count_frames_per_crop(user_labels, 'pennycress', 'Canola_Pennycress')
counts

{'pennycress': 201, 'Canola_Pennycress': 429, 'missing': 1}

In [8]:
print(f"There are {len(user_labels)} labels total.")

# Separate the data using the match string
match_labels, other_labels = separate_crops(user_labels, match_string='pennycress')

# Save the match labels
match_labels.save(Path(output_path) / "labels_ONLYpennycress_lateral_3nodes.v000.slp")
print(f"Saved pennycress labels to {output_path}")

# Save the other labels
other_labels.save(Path(output_path2) / "labels_ONLYcanola_lateral_3nodes.v000.slp")
print(f"Saved canola labels to {output_path2}")

There are 631 labels total.
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/OH3MXW2DT2.h5
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/OH3MXW2DT2.h5
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/PJWP66PP1Z.h5
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/PJWP66PP1Z.h5
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/VZ1OZPLQ3S.h5
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/VZ1OZPLQ3S.h5
Found pennycress data: D:/SLEAP/SLEAP_Canola_Pennycress/lateral_3_nodes/h5_files_for_LR_sleap_project/pennycress/14_do/0JC8VEXVQN.h5
Found pennycress data: D:/SLEAP/SLEAP_Can