In [2]:
import soundfile as sf
from tqdm import tqdm
import os

def convert_flac_to_wav(flac_file_path, output_directory):
    # Extract the base name of the FLAC file and create a new WAV file name
    base_name = os.path.splitext(os.path.basename(flac_file_path))[0]
    wav_file_name = f"{base_name}.wav"
    output_wav_file_path = os.path.join(output_directory, wav_file_name)
    
    # Read the FLAC file and write to a WAV file
    data, samplerate = sf.read(flac_file_path)
    sf.write(output_wav_file_path, data, samplerate)
    return output_wav_file_path

def split_wav_file(wav_file_path, output_directory, segment_length_sec=10):
    # Read the WAV file
    data, samplerate = sf.read(wav_file_path)
    total_samples = data.shape[0]
    num_samples_per_segment = segment_length_sec * samplerate

    # Calculate the number of segments
    num_segments = total_samples // num_samples_per_segment + (1 if total_samples % num_samples_per_segment > 0 else 0)

    for i in range(num_segments):
        start_sample = i * num_samples_per_segment
        end_sample = min((i + 1) * num_samples_per_segment, total_samples)
        segment_data = data[start_sample:end_sample]
        
        segment_file_name = f"{os.path.splitext(os.path.basename(wav_file_path))[0]}_{i}.wav"
        output_segment_file_path = os.path.join(output_directory, segment_file_name)
        
        # Write the segment to a new WAV file
        sf.write(output_segment_file_path, segment_data, samplerate)

# Specify your directories
flac_file_directory = "/media/george-vengrovski/disk2/budgie/raw_data/Long Lab Budgie Pieozo"
output_wav_directory = "/media/george-vengrovski/disk2/budgie/raw_data/LongLabWav"

# Ensure the output directory exists
os.makedirs(output_wav_directory, exist_ok=True)

# Convert all FLAC files to WAV and then split them into 10-second segments
for flac_file in tqdm(os.listdir(flac_file_directory), desc="Processing birds"):
    full_flac_file_path = os.path.join(flac_file_directory, flac_file)
    if os.path.isfile(full_flac_file_path) and flac_file.lower().endswith('.flac'):
        wav_file_path = convert_flac_to_wav(full_flac_file_path, output_wav_directory)
        split_wav_file(wav_file_path, output_wav_directory)

Processing birds: 100%|██████████| 12/12 [19:28<00:00, 97.36s/it] 
