In [2]:
import os
from pydub import AudioSegment
import random
import csv



## Script para generar un audio que intercala audios de llanto y ruido con una duración mínima y máxima.

In [None]:
# Duración objetivo en milisegundos
TARGET_DURATION    = 10 * 60 * 1000  # 10 minutos

# Parámetros para los segmentos de llanto (en ms)
MIN_CRYING_DURATION = 5  * 1000
MAX_CRYING_DURATION = 10 * 1000

# Parámetros para los segmentos de ruido (en ms)
MIN_NOISE_DURATION  = 5  * 1000
MAX_NOISE_DURATION  = 10 * 1000

# Función para recolectar archivos .wav de una carpeta
def get_wav_files(folder):
    return [
        os.path.join(folder, f)
        for f in os.listdir(folder)
        if f.lower().endswith(".wav")
    ]

# Listado de WAV en cada carpeta
crying_files = get_wav_files("../data_test/crying")
noise_files  = get_wav_files("../data_test/noise")

# Audio final y anotaciones
final_audio = AudioSegment.empty()
annotations  = []
current_time = 0

# Función para extraer llanto de duración aleatoria entre min y max
def get_crying_segment(min_dur, max_dur):
    desired = random.randint(min_dur, max_dur)
    segment = AudioSegment.empty()
    while len(segment) < desired:
        src = random.choice(crying_files)
        audio = AudioSegment.from_wav(src)
        remain = desired - len(segment)
        if len(audio) > remain:
            start = random.randint(0, len(audio) - remain)
            segment += audio[start:start+remain]
        else:
            segment += audio
    return segment[:desired]

# Función para extraer ruido de duración aleatoria entre min y max
def get_noise_segment(min_dur, max_dur):
    desired = random.randint(min_dur, max_dur)
    segment = AudioSegment.empty()
    while len(segment) < desired:
        src = random.choice(noise_files)
        audio = AudioSegment.from_wav(src)
        remain = desired - len(segment)
        if len(audio) > remain:
            start = random.randint(0, len(audio) - remain)
            segment += audio[start:start+remain]
        else:
            segment += audio
    return segment[:desired]

# Combinar segmentos hasta TARGET_DURATION
while len(final_audio) < TARGET_DURATION:
    tipo = random.choice(["crying", "noise"])
    if tipo == "crying" and crying_files:
        segment = get_crying_segment(MIN_CRYING_DURATION, MAX_CRYING_DURATION)
        label   = "Llanto"
    elif tipo == "noise" and noise_files:
        segment = get_noise_segment(MIN_NOISE_DURATION, MAX_NOISE_DURATION)
        label   = "Ruido"
    else:
        if not crying_files:
            segment = get_noise_segment(MIN_NOISE_DURATION, MAX_NOISE_DURATION)
            label = "Ruido"
        else:
            segment = get_crying_segment(MIN_CRYING_DURATION, MAX_CRYING_DURATION)
            label = "Llanto"

    seg_dur = len(segment)
    annotations.append({
        "start_time_ms": current_time,
        "end_time_ms":   current_time + seg_dur,
        "label":         label
    })
    final_audio += segment
    current_time += seg_dur

# Recortar a EXACTO TARGET_DURATION
final_audio = final_audio[:TARGET_DURATION]

# Exportar audio y CSV
os.makedirs("audio_for_test", exist_ok=True)
minutes = TARGET_DURATION // 60000
wav_path = f"audio_for_test/audio_test_{minutes}min.wav"
csv_path = f"audio_for_test/audio_test_{minutes}min.csv"

final_audio.export(wav_path, format="wav")
print(f"Audio generado: {wav_path}")

with open(csv_path, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["start_time_ms","end_time_ms","label"])
    writer.writeheader()
    for row in annotations:
        writer.writerow(row)
print(f"Anotaciones guardadas: {csv_path}")

Audio generado: audio_for_test/audio_test_5min.wav
Anotaciones guardadas: audio_for_test/audio_test_5min.csv
