In [1]:
from pydub import AudioSegment
import os
import glob
import random
from tqdm.notebook import tqdm
import librosa
import soundfile as sf

os.makedirs("mixed_up_data_no_talk/noisy", exist_ok = True)
os.makedirs("mixed_up_data_no_talk/music", exist_ok=True)

BG_DIR = os.path.abspath("datasets/background_simplified")
OUT_DIR_MUSIC = os.path.abspath("mixed_up_data_no_talk/music")
OUT_DIR_NOISE = os.path.abspath("mixed_up_data_no_talk/noisy")

def augment_file(input_path, output_dir, sample_rate, stretch_factors, pitch_steps, amplification):
    
    y, sr = librosa.load(input_path, sr=sample_rate)
    basename = os.path.splitext(os.path.basename(input_path))[0]
    
    ops = ['stretch', 'pitch', 'gain', 'none']
    choice = random.choice(ops)
    
    if choice == 'stretch':
        factor = random.choice(stretch_factors)
        y_stretch = librosa.effects.time_stretch(y, rate=factor)
        out_name = f"{basename}_stretch{factor:.2f}.wav"
        sf.write(os.path.join(output_dir, out_name), y_stretch, sr)

    if choice == 'pitch':
        steps = random.choice(pitch_steps)
        y_shift = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=steps)
        out_name = f"{basename}_pitch{steps:+d}.wav"
        sf.write(os.path.join(output_dir, out_name), y_shift, sr)
    
    if choice == 'gain':
        gain = random.choice(amplification)
        y_amplify = y * gain
        out_name = f"{basename}_amplified_{gain:.1f}x.wav"
        sf.write(os.path.join(output_dir, out_name), y_amplify, sr)
    
    if ops == 'none': sf.write(os.path.join(output_dir, basename), y, sr)

def batch_augment_music(input_dir, output_dir,
                  sample_rate=44100,
                  stretch_factors=(0.9, 1.1),
                  pitch_steps=(-2, 2), amplification = 0):
    
    for f in tqdm(os.listdir(input_dir), desc="Augumenting Music"):
        base, ext = os.path.splitext(f)
        
        inp_path = os.path.join(input_dir, f)
        augment_file(inp_path, output_dir, sample_rate, stretch_factors, pitch_steps, amplification)
        
def batch_augment_noise(input_dir, output_dir,
                  sample_rate=44100,
                  stretch_factors=(0.9, 1.1),
                  pitch_steps=(-2, 2), amplification = 0):
    
    bg_noise_type = ["cafeteria_noises", "metro_noises", "park_noises", "station_noises", "traffic_noises"]       
    
    for i in tqdm(bg_noise_type, desc="Augumenting Noise"):
        dir_path = os.path.join(input_dir,i)
        for f in os.listdir(dir_path):                        
            inp_path = os.path.join(dir_path, f)
            augment_file(inp_path, output_dir, sample_rate, stretch_factors, pitch_steps, amplification)


In [2]:
noise_path = os.path.abspath("datasets/background_simplified")
instrumentals_path = os.path.abspath("datasets/music_set_wav/instrumentals")

sr       = 44100
stretches = [0.8, 1, 1.2]
pitches  = [-3, 0, +3]
amplification = [-5, 1, +5]

batch_augment_music(instrumentals_path, OUT_DIR_MUSIC, sr, stretches, pitches, amplification)
batch_augment_noise(noise_path, OUT_DIR_NOISE, sr, stretches, pitches, amplification)

print("Augmentazione completata!") 

Augumenting Music:   0%|          | 0/93 [00:00<?, ?it/s]

Augumenting Noise:   0%|          | 0/5 [00:00<?, ?it/s]

Augmentazione completata!


In [3]:
from math import floor
import os
import random
from pydub import AudioSegment

def segment_file(in_path: str, out_dir: str, window_s: float, segments: int) -> None:
    
    audio = AudioSegment.from_file(in_path)
    dur_ms = len(audio)
    window_ms = int(window_s * 1000)
    base, _ = os.path.splitext(os.path.basename(in_path))

    if dur_ms < window_ms:
        out_name = f"{base}segment{1:03d}.wav"
        audio.export(os.path.join(out_dir, out_name), format="wav")
        return
        
    for i in range(segments):
        start = random.randint(0, dur_ms - window_ms)
        end = start + window_ms
        segment = audio[start:end]

        out_name = f"{base}segment{i:03d}.wav"
        segment.export(os.path.join(out_dir, out_name), format="wav")

In [4]:
win_s = 3

os.makedirs("mixed_up_data_no_talk_segmented", exist_ok=True)
os.makedirs("mixed_up_data_no_talk_segmented/music", exist_ok=True)
os.makedirs("mixed_up_data_no_talk_segmented/noisy", exist_ok=True)

output_dir_music = os.path.abspath("mixed_up_data_no_talk_segmented/music")
output_dir_noisy = os.path.abspath("mixed_up_data_no_talk_segmented/noisy")

music_dir = os.path.abspath("mixed_up_data_no_talk/music")
noisy_dir = os.path.abspath("mixed_up_data_no_talk/noisy")
# ripetiamo per chiarezza

music_files = [f for f in os.listdir(music_dir)]
noisy_files = [f for f in os.listdir(noisy_dir)]

In [5]:
for fname in tqdm(music_files, desc="Finestratura Audio Music"):
    in_path = os.path.join(music_dir,fname)
    try:
        segment_file(in_path, output_dir_music, win_s, random.choice([12,14,16]))
    except Exception as e:
            print(f"Errore con {fname}: {e}")

music_files = [f for f in os.listdir(output_dir_music)]
print(len(music_files))

Finestratura Audio Music:   0%|          | 0/132 [00:00<?, ?it/s]

1888


In [6]:
for fname in tqdm(noisy_files, desc="Finestratura Audio Noise"):
    in_path = os.path.join(noisy_dir,fname)
    try:
        segment_file(in_path, output_dir_noisy, win_s, random.choice([16,18,20]))
    except Exception as e:
            print(f"Errore con {fname}: {e}")

noisy_files = [f for f in os.listdir(output_dir_noisy)]
print(len(noisy_files))

Finestratura Audio Noise:   0%|          | 0/114 [00:00<?, ?it/s]

2116


In [8]:
import pandas as pd

music_dir = os.path.abspath("mixed_up_data_no_talk_segmented/music")
noisy_dir = os.path.abspath("mixed_up_data_no_talk_segmented/noisy")

music_files = [f for f in os.listdir(music_dir)]
noisy_files = [f for f in os.listdir(noisy_dir)]
# ripetiamo per chiarezza

records = []
for f in music_files:
    records.append({f"filepath": os.path.join(music_dir,f), "label": "music"})
for f in noisy_files:
    records.append({"filepath": os.path.join(noisy_dir,f), "label": "noisy"})

df = pd.DataFrame.from_records(records)

df.to_csv("datasets/labels_mfcc_no_talk.csv", index=False)
print("Dataset salvato in datasets/labels_mfcc_no_talk.csv")

Dataset salvato in datasets/labels_mfcc_no_talk.csv
