In [12]:
import os
import numpy as np
import torchaudio
import torch
from transformers import AutoProcessor, EncodecModel

# Load the Encodec model and processor
processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
model = EncodecModel.from_pretrained("facebook/encodec_24khz")

# Function to process and encode a single WAV file
def encode_wav_file(file_path):
    # Load the audio file
    waveform, sample_rate = torchaudio.load(file_path)
    
    # Resample to 24kHz if necessary
    if sample_rate != 24000:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=24000)
        waveform = resampler(waveform)

    waveform = waveform.squeeze().numpy()

    # Ensure that the audio has a single channel
    if waveform.ndim > 1:
        waveform = waveform[0]
    # Encode the waveform
    inputs = processor(waveform, sampling_rate=24000, return_tensors="pt")
    with torch.no_grad():
        audio_codes = model.encode(inputs["input_values"], inputs["padding_mask"], 24.0)
    return audio_codes[0].squeeze().cpu().numpy()

# Directory containing the soundscape folder
root_dir = "clusters225-2000Hz"

# Collect all WAV files and their encoded audio codes
audio_codes_list = []

for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.endswith(".wav"):
            file_path = os.path.join(subdir, file)
            print(f"Processing {file_path}...")
            audio_codes = encode_wav_file(file_path)
            audio_codes_list.append(audio_codes)

# Convert the list of audio codes to a numpy array
audio_codes_array = np.array(audio_codes_list, dtype=object)

# Save the numpy array to an NPY file
np.save("audio_codes.npy", audio_codes_array)

print("All audio codes have been saved to audio_codes.npy")




Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_162702_0_00027_255.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_162702_0_00027_255_denoised.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_203102_0_00027_362.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_203202_0_00041_271.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_203302_0_00000_000.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_204202_0_00051_617.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_210702_0_00004_876.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_210702_0_00036_258.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_210802_0_00005_154.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_210902_0_00041_698.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_215602_0_00032_311.wav...
Processing clusters225-2000Hz\000\SM15XPRIZE_20240409_215602_0_00049_932.wav...
Processing clusters225-2000Hz\0

KeyboardInterrupt: 