In [None]:
!pip install torchaudio matplotlib tqdm

In [None]:
import os
def get_file_extensions(folder_path):
    extensions = set() 
    for root, _, files in os.walk(folder_path):
        for file in files:
            _, ext = os.path.splitext(file)
            if ext:
                extensions.add(ext.lower()) 
    return extensions

folder_path = 'for-original' 
extensions = get_file_extensions(folder_path)
print(f"File extensions found: {extensions}")

In [None]:
import os
import torchaudio

dataset_path = "./for-original"
audio_stats = []

for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith((".wav",".mp3")):
            file_path = os.path.join(root, file)
            try:
                info = torchaudio.info(file_path)
                sample_rate = info.sample_rate
                channels = info.num_channels
                duration = info.num_frames / sample_rate

                audio_stats.append((file_path, sample_rate, channels, duration))
                print(f"{file_path} | Sample rate: {sample_rate} Hz | Channels: {channels} | Duration: {duration:.2f} sec")

            except Exception as e:
                print(f"Error reading {file_path}: {e}")

# Optional: summarize sample rates and durations
sample_rates = set([sr for _, sr, _, _ in audio_stats])
channels_set = set([ch for _, _, ch, _ in audio_stats])
durations = [dur for _, _, _, dur in audio_stats]

print("\n--- Summary ---")
print("Unique sample rates:", sample_rates)
print("Unique channel counts:", channels_set)
print("Min duration:", min(durations))
print("Max duration:", max(durations))


In [None]:
import os
import librosa
import soundfile as sf
import numpy as np
from tqdm import tqdm

TARGET_SR = 16000
TARGET_DURATION = 3.0  
TARGET_SAMPLES = int(TARGET_SR * TARGET_DURATION)  
TARGET_RMS = 0.1 

#Load audio file and resample to target sample rate.
def load_audio(file_path, target_sr=TARGET_SR):     
    audio, sr = librosa.load(file_path, sr=target_sr, mono=True)
    return audio, sr

#Normalize audio to a target RMS volume.
def normalize_rms(audio, target_rms=TARGET_RMS):
    rms = np.sqrt(np.mean(audio**2))
    if rms == 0:
        return audio
    return audio * (target_rms / rms)

#Pad or truncate audio to a fixed number of samples.
def pad_or_truncate(audio, target_length=TARGET_SAMPLES):
    current_length = len(audio)
    if current_length > target_length:
        return audio[:target_length]
    elif current_length < target_length:
        return np.pad(audio, (0, target_length - current_length))
    return audio

#Save audio file in .wav format.
def save_audio(audio, output_path, sr=TARGET_SR):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    sf.write(output_path, audio, sr)

#Load, normalize, trim/pad, and save an audio file.
def preprocess_audio_file(input_path, output_path):
    try:
        audio, _ = load_audio(input_path)
        audio = normalize_rms(audio)
        audio = pad_or_truncate(audio)
        save_audio(audio, output_path)
    except Exception as e:
        print(f"[Error] {input_path}: {e}")

def preprocess_folder(input_folder, output_folder):
    """
    Process all audio files in a folder and save them to a new location.
    """
    for root, _, files in os.walk(input_folder):
        for file in tqdm(files, desc=f"Processing {os.path.basename(input_folder)}"):
            if file.lower().endswith(('.wav', '.flac', '.mp3', '.ogg')):
                relative_path = os.path.relpath(root, input_folder)
                output_subdir = os.path.join(output_folder, relative_path)
                os.makedirs(output_subdir, exist_ok=True)
                input_path = os.path.join(root, file)
                output_path = os.path.join(output_subdir, os.path.splitext(file)[0] + ".wav")
                preprocess_audio_file(input_path, output_path)

input_folder = 'for-original' 
output_folder = 'data_preprocessed' 

preprocess_folder(os.path.join(input_folder, 'training'), os.path.join(output_folder, 'training'))
preprocess_folder(os.path.join(input_folder, 'validation'), os.path.join(output_folder, 'validation'))
preprocess_folder(os.path.join(input_folder, 'testing'), os.path.join(output_folder, 'testing'))

print("All audio files have been preprocessed and saved.")


In [None]:
import os
import torchaudio

dataset_path = "./data_preprocessed"
audio_stats = []

for root, dirs, files in os.walk(dataset_path):
    for file in files:
            file_path = os.path.join(root, file)
            try:
                info = torchaudio.info(file_path)
                sample_rate = info.sample_rate
                channels = info.num_channels
                duration = info.num_frames / sample_rate
                audio_stats.append((file_path, sample_rate, channels, duration))
                print(f"{file_path} | Sample rate: {sample_rate} Hz | Channels: {channels} | Duration: {duration:.2f} sec")
            except Exception as e:
                print(f"Error reading {file_path}: {e}")

sample_rates = set([sr for _, sr, _, _ in audio_stats])
channels_set = set([ch for _, _, ch, _ in audio_stats])
durations = [dur for _, _, _, dur in audio_stats]

print("\n--- Summary ---")
print("Unique sample rates:", sample_rates)
print("Unique channel counts:", channels_set)
print("Min duration:", min(durations))
print("Max duration:", max(durations))
