### Validated unused clips tracker

In [1]:
import os
import pandas as pd
from pathlib import Path

In [2]:
# Path to validated clips folder
validated_clips_path = '/mnt/d/retraining_BirdNET_2025/iterative_training/segments_validation/unused_validated_clips/'
# Tracker path generated dynamically
tracker_path = os.path.join(validated_clips_path, 'unused_validated_clips_tracker.csv')

In [3]:
def extract_core_filename(filename):
    parts = Path(filename).stem.split('_')  # Remove extension and split filename
    if len(parts) >= 6:
        return '_'.join(parts[1:6])  # Ignore score and keep elements [1] to [5]
    return None
    

In [4]:
def update_validated_clips_tracker():
    """
    Create a validated clips tracker.
    """
    # Read all .WAV files in validated_clips_path
    validated_clips = []
    for clip in os.listdir(validated_clips_path):
        if clip.lower().endswith('.wav'):
            core_filename = extract_core_filename(clip)
            if core_filename:
                validated_clips.append({
                    'file': os.path.join(validated_clips_path, clip),  # Full path
                    'core_filename': core_filename
                })

    
    # Create a DataFrame
    tracker_df = pd.DataFrame(validated_clips)

    if tracker_df.empty:
        print("No validated files found. Nothing to update.")
        return

    # Check for duplicates (based on core_filename)
    duplicates = tracker_df['core_filename'].duplicated(keep=False)

    if duplicates.any():
        # Display a warning for duplicates
        dupes = tracker_df.loc[duplicates, 'core_filename'].unique().tolist()
        print(f"WARNING: The following core filenames appear more than once in the folder:")
        for d in dupes:
            print(f"   → {d}")
            
    else:
        print(f"No duplicates found")

    # Overwrite the old tracker with the updated one
    tracker_df.to_csv(tracker_path, index=False)
    print(f"Tracker created at {tracker_path}")
    print(f"Total files tracked: {len(tracker_df)}")

# Run the tracker update
update_validated_clips_tracker()

   → 69_M17_SN24_20220824_190000
Tracker created at /mnt/d/retraining_BirdNET_2025/iterative_training/segments_validation/unused_validated_clips/unused_validated_clips_tracker.csv
Total files tracked: 2935
