Select 264 random clips in total (considering 33 classes) or 8 random clips per class (decide how many classes to account for, for example are we interested in improve and validate clips for rooster? or for noise? 

### Draft for Random Sampling Logic

In [None]:
import pandas as pd
from pathlib import Path
import random

# Paths
big_df_path = "/path/to/big_df.csv"
tracker_path = "/path/to/training_set_tracker.csv"
output_dir = "/path/to/random_sampled_clips/"
iteration_number = 1  # Current iteration number

# Load data
big_df = pd.read_csv(big_df_path)
training_set_tracker = pd.read_csv(tracker_path)

# Filter clips
# Step 1: Remove clips already in the training set
used_clips = training_set_tracker["filename"].tolist()
filtered_df = big_df[~big_df["filename"].isin(used_clips)]

# Step 2: Random Sampling
random_samples = []
per_class_samples = 8  # Or set to None for global sampling
total_random_clips = 248

if per_class_samples:  # Per-class sampling
    for class_name, class_df in filtered_df.groupby("class"):
        # Randomly select up to `per_class_samples` clips for this class
        random_clips = class_df.sample(min(per_class_samples, len(class_df)), random_state=42)
        random_samples.append(random_clips)
else:  # Global sampling
    random_samples = filtered_df.sample(min(total_random_clips, len(filtered_df)), random_state=42)

# Combine selected random clips
random_samples_df = pd.concat(random_samples, ignore_index=True)

# Save selected random clips
for _, row in random_samples_df.iterrows():
    class_name = row["class"]
    class_dir = Path(output_dir) / class_name
    class_dir.mkdir(parents=True, exist_ok=True)
    
    filename = row["filename"]
    offset = int(row["offset"])
    score = row["score"]
    output_filename = f"{score:.2f}_{offset}_{filename}.wav"

    # Assuming clips are extracted similarly as before
    audio_path = Path("/path/to/audio/files/") / f"{filename}.wav"
    try:
        audio = Audio.from_file(audio_path, offset=offset, duration=3)
        audio.save(class_dir / output_filename)
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")

print("Random sampling complete. Clips saved.")

# Step 3: Update the tracker with newly validated clips
new_clips = random_samples_df[["filename", "class"]]
for _, row in new_clips.iterrows():
    filename = row["filename"]
    class_name = row["class"]
    
    # Update the tracker with the iteration number for this class
    training_set_tracker.loc[training_set_tracker["filename"] == filename, class_name] = iteration_number

# Save the updated tracker
training_set_tracker.to_csv(tracker_path, index=False)
