### Tracker Management

Tracks down the clips incorporated to each class of the traiining set on each iteration

In [2]:
import os
import pandas as pd

def update_training_set_tracker(directory, tracker_path, current_iteration):
    """
    Update the training set tracker with files from the training directory.
    Each file is tracked by its filename and associated with the iteration and class.
    """
    # List to hold new training clip data
    training_clips = []

    # Walk through each class folder in the directory
    for class_folder in os.listdir(directory):
        class_path = os.path.join(directory, class_folder)
        if os.path.isdir(class_path):  # Ensure it's a directory
            for clip in os.listdir(class_path):
                if clip.endswith('.wav'):
                    training_clips.append({
                        "file": os.path.join(class_folder, clip),
                        "class": class_folder
                    })

    # Create a DataFrame for new clips
    new_clips_df = pd.DataFrame(training_clips)

    # Check if tracker CSV exists
    if os.path.exists(tracker_path):
        tracker_df = pd.read_csv(tracker_path)
    else:
        # Initialize a new tracker DataFrame if it doesn't exist
        tracker_df = pd.DataFrame(columns=["file"] + list(new_clips_df["class"].unique()))
        tracker_df = tracker_df.fillna(0)  # Fill missing values with 0

    # Merge new data into the tracker
    for _, row in new_clips_df.iterrows():
        file_path = row["file"]
        class_name = row["class"]

        if file_path not in tracker_df["file"].values:
            # Add new file entry (use pd.concat instead of .append)
            new_row = pd.DataFrame({"file": [file_path]})
            tracker_df = pd.concat([tracker_df, new_row], ignore_index=True)
        
        if class_name not in tracker_df.columns:
            # Add new class column
            tracker_df[class_name] = 0
                        
        # Update the iteration for the file and class
        tracker_df.loc[tracker_df["file"] == file_path, class_name] = current_iteration

    # Save the updated tracker
    tracker_df.to_csv(tracker_path, index=False)
    print(f"Tracker updated. Total files in tracker: {len(tracker_df)}")


# Specify the directory containing training clips
training_directory = "/mnt/d/retraining_BirdNET/model_train_2025/train_set"

# Specify the path to the tracker CSV
tracker_csv_path = "/mnt/d/retraining_BirdNET/model_train_2025/train_set/training-set_tracker.csv"

# Set the current iteration (update this for each iteration)
current_iteration = 0  # Update this for each new iteration

# Run the tracker update
update_training_set_tracker(training_directory, tracker_csv_path, current_iteration)


Tracker updated. Total files in tracker: 4794
