### Tracker Management

Tracks down the clips incorporated to each class of the traiining set on each iteration

In [None]:
import os
import pandas as pd

# Function to extract the desired parts of the filename
def extract_clip_name(filename):
    parts = filename.split('_')
    return '_'.join(parts[1:7])  # Join parts [1] to [6]

# Function to initialize or update the tracker
def update_training_set_tracker(directory, tracker_path, current_iteration):
    # List to hold training clip data
    training_clips = []

    # Walk through each folder in the directory
    for class_folder in os.listdir(directory):
        class_path = os.path.join(directory, class_folder)
        if os.path.isdir(class_path):  # Ensure it's a folder
            for clip in os.listdir(class_path):
                if clip.endswith('.wav'):
                    clip_name = extract_clip_name(clip)
                    training_clips.append({
                        "clip_name": clip_name,
                        "class_name": class_folder,
                        "iteration": current_iteration
                    })

    # Create a DataFrame from the gathered data
    new_clips_df = pd.DataFrame(training_clips)

    # Check if tracker CSV exists
    if os.path.exists(tracker_path):
        existing_tracker_df = pd.read_csv(tracker_path)
        # Combine existing tracker with new entries
        updated_tracker_df = pd.concat([existing_tracker_df, new_clips_df]).drop_duplicates(subset=['clip_name', 'class_name'], keep='last')
    else:
        # If no tracker exists, initialize with new clips
        updated_tracker_df = new_clips_df

    # Save the updated tracker
    updated_tracker_df.to_csv(tracker_path, index=False)
    print(f"Tracker updated. Total clips in tracker: {len(updated_tracker_df)}")

# Specify the directory containing training clips
training_directory = "/mnt/d/DiscoTrainEval_Backup/retraining_BirdNET/model_train/"

# Specify the path to the tracker CSV
tracker_csv_path = "/mnt/d/DiscoTrainEval_Backup/retraining_BirdNET/training_set_tracker.csv"

# Set the current iteration (update this for each iteration)
current_iteration = 0  # Update this as needed

# Run the tracker update
update_training_set_tracker(training_directory, tracker_csv_path, current_iteration)