In [1]:
from pydub import AudioSegment
import os
import glob
import random
import librosa
import soundfile as sf
from tqdm.notebook import tqdm

os.makedirs("mixed_up_data_talk/noisy", exist_ok = True)

VOICE_DIR = os.path.abspath("datasets/mathurinache/the-lj-speech-dataset/versions/1/LJSpeech-1.1/wavs")
BG_DIR = os.path.abspath("datasets/background_simplified")
OUT_DIR = os.path.abspath("mixed_up_data_talk/noisy")
AMPLIFICATION = [10, 15, 20]

voice_files = glob.glob(os.path.join(VOICE_DIR, "*.wav"))
bg_noise_type = ["cafeteria_noises", "metro_noises", "park_noises", "station_noises", "traffic_noises"]

def mix_it_up(idx):
    for type in bg_noise_type:
        
        bg_files    = glob.glob(os.path.join(BG_DIR, type, "*.wav"))

        voice_path = random.choice(voice_files)
        bg_path = random.choice(bg_files)

        voice = AudioSegment.from_file(voice_path)
        background = AudioSegment.from_file(bg_path)

        background += random.choice(AMPLIFICATION)

        combined = voice.overlay(background)

        out_path = os.path.join(OUT_DIR, f"mixed_noisy_{type}_{idx}.wav")
        combined.export(out_path, format = 'wav')

for i in tqdm(range(70), desc="Creazione Noisy Set"): mix_it_up(i)

Creazione Noisy Set:   0%|          | 0/70 [00:00<?, ?it/s]

In [2]:
os.makedirs("mixed_up_data_talk/noisy", exist_ok = True)
os.makedirs("mixed_up_data_talk/music", exist_ok=True)

BG_DIR = os.path.abspath("datasets/background_simplified")
OUT_DIR_MUSIC = os.path.abspath("mixed_up_data_talk/music")
OUT_DIR_NOISE = os.path.abspath("mixed_up_data_talk/noisy")

def augment_file(input_path, output_dir, sample_rate, stretch_factors, pitch_steps, amplification):
    
    y, sr = librosa.load(input_path, sr=sample_rate)
    basename = os.path.splitext(os.path.basename(input_path))[0]
    
    ops = ['stretch', 'pitch', 'gain', 'none']
    choice = random.choice(ops)
    
    if choice == 'stretch':
        factor = random.choice(stretch_factors)
        y_stretch = librosa.effects.time_stretch(y, rate=factor)
        out_name = f"{basename}_stretch{factor:.2f}.wav"
        sf.write(os.path.join(output_dir, out_name), y_stretch, sr)

    if choice == 'pitch':
        steps = random.choice(pitch_steps)
        y_shift = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=steps)
        out_name = f"{basename}_pitch{steps:+d}.wav"
        sf.write(os.path.join(output_dir, out_name), y_shift, sr)
    
    if choice == 'gain':
        gain = random.choice(amplification)
        y_amplify = y * gain
        out_name = f"{basename}_amplified_{gain:.1f}x.wav"
        sf.write(os.path.join(output_dir, out_name), y_amplify, sr)
    
    if ops == 'none': sf.write(os.path.join(output_dir, basename), y, sr)

def batch_augment(input_dir, output_dir,
                  sample_rate=44100,
                  stretch_factors=(0.9, 1.1),
                  pitch_steps=(-2, 2), amplification = 0):
    
    for f in tqdm(os.listdir(input_dir), desc="Augumenting Music"):
                inp_path = os.path.join(input_dir, f)
                augment_file(inp_path, output_dir, sample_rate, stretch_factors, pitch_steps, amplification)

In [3]:
os.makedirs("mixed_up_data_talk/music", exist_ok = True)
os.makedirs("mixed_up_data_talk/noise_augmented", exist_ok=True)

OUT_DIR_MUSIC = os.path.abspath("mixed_up_data_talk/music")
song_path = os.path.abspath("datasets/music_set_wav/complete_song")

OUT_DIR_NOISE = os.path.abspath("mixed_up_data_talk/noise_augmented")
noise_path = os.path.abspath("mixed_up_data_talk/noisy")

sr       = 44100
stretches = [0.8, 1.2]
pitches  = [-3, 3]
amplification = [-5, 1, +5]


batch_augment(song_path, OUT_DIR_MUSIC, sr, stretches, pitches, amplification)
batch_augment(noise_path, OUT_DIR_NOISE, sr, stretches, pitches, amplification)

print("Augmentazione completata!") 

Augumenting Music:   0%|          | 0/93 [00:00<?, ?it/s]

Augumenting Music:   0%|          | 0/350 [00:00<?, ?it/s]

Augmentazione completata!


In [10]:
from math import floor
import os
import random
from pydub import AudioSegment

def segment_file(in_path: str, out_dir: str, window_s: float, segments: int) -> None:
    
    audio = AudioSegment.from_file(in_path)
    dur_ms = len(audio)
    window_ms = int(window_s * 1000)
    base, _ = os.path.splitext(os.path.basename(in_path))

    if dur_ms < window_ms:
        out_name = f"{base}segment{1:03d}.wav"
        audio.export(os.path.join(out_dir, out_name), format="wav")
        return
        
    for i in range(segments):
        start = random.randint(0, dur_ms - window_ms)
        end = start + window_ms
        segment = audio[start:end]

        out_name = f"{base}segment{i:03d}.wav"
        segment.export(os.path.join(out_dir, out_name), format="wav")

In [11]:
win_s = 3

os.makedirs("mixed_up_data_talk_segmented", exist_ok=True)
os.makedirs("mixed_up_data_talk_segmented/music", exist_ok=True)
os.makedirs("mixed_up_data_talk_segmented/noisy", exist_ok=True)

output_dir_music = os.path.abspath("mixed_up_data_talk_segmented/music")
output_dir_noisy = os.path.abspath("mixed_up_data_talk_segmented/noisy")

music_dir = os.path.abspath("mixed_up_data_talk/music")
noisy_dir = os.path.abspath("mixed_up_data_talk/noise_augmented")
# ripetiamo per chiarezza

music_files = [f for f in os.listdir(music_dir)]
noisy_files = [f for f in os.listdir(noisy_dir)]

In [12]:
for fname in tqdm(music_files, desc="Finestratura Audio Music"):
    in_path = os.path.join(music_dir,fname)
    try:
        segment_file(in_path, output_dir_music, win_s, random.choice([12,14,16]))
    except Exception as e:
            print(f"Errore con {fname}: {e}")

music_files = [f for f in os.listdir(output_dir_music)]
print(len(music_files))

Finestratura Audio Music:   0%|          | 0/132 [00:00<?, ?it/s]

1844


In [13]:
for fname in noisy_files:
    in_path = os.path.join(noisy_dir,fname)
    try:
        segment_file(in_path, output_dir_noisy, win_s, 4)
    except Exception as e:
            print(f"Errore con {fname}: {e}")
noisy_files = [f for f in os.listdir(output_dir_noisy)]
print(len(noisy_files))

1895


In [14]:
import pandas as pd

music_dir = os.path.abspath("mixed_up_data_talk_segmented/music")
noisy_dir = os.path.abspath("mixed_up_data_talk_segmented/noisy")

music_files = [f for f in os.listdir(music_dir)]
noisy_files = [f for f in os.listdir(noisy_dir)]
# ripetiamo per chiarezza

records = []
for f in music_files:
    records.append({f"filepath": os.path.join(music_dir,f), "label": "music"})
for f in noisy_files:
    records.append({"filepath": os.path.join(noisy_dir,f), "label": "noisy"})

df = pd.DataFrame.from_records(records)

df.to_csv("datasets/labels_mfcc_talk_segmented.csv", index=False)
print("Dataset salvato in datasets/labels_mfcc_talk_segmented.csv")

Dataset salvato in datasets/labels_mfcc_talk_segmented.csv
