### Prérequis

In [1]:
# Fonctionne avec Python 3.13 :
# pip install --upgrade pyloudnorm soundfile pedalboard
# conda install -n audio ipykernel --update-deps --force-reinstall

### Imports

In [2]:
import glob
import pyloudnorm as pyln
import soundfile as sf
import numpy as np
from scipy import signal
from pedalboard import load_plugin
from pedalboard.io import AudioFile
from pathlib import Path

### Récupérer les fichiers à traiter

In [3]:
# Recherche des fichiers audio formattés ainsi :
# {langue}_0.wav
# Par exemple :
# fr_0.wav
# en_0.wav

# Glob des fichiers avec le pattern {langue}_0.wav


audio_files = [
    Path(f)
    for f in glob.glob("??_0.wav")
    if Path(f).is_file() and Path(f).stat().st_size > 0
]

# Déductions des langues, selon les fichiers trouvés dans le répertoire audio


# Cela permet de normaliser ensuite les fichiers par langue


langues = list(set([f.stem.split("_")[0] for f in audio_files]))


# Affichage des langues trouvées


if not langues:
    print("Aucun fichier audio trouvé dans le répertoire 'audio'")


else:
    print(f"Langues : {langues}")

Langues : ['es', 'fr', 'de', 'en']


### Conversion en 16kHz mono

In [4]:
for fic in audio_files:
    # Charger le fichier audio
    data, sr = sf.read(fic)

    # Convertir en mono si le fichier est en stéréo
    if len(data.shape) > 1:
        data = np.mean(data, axis=1)

    # Rééchantillonner à 16kHz si nécessaire
    if sr != 16000:
        # Calculer le ratio pour le rééchantillonnage
        ratio = 16000 / sr
        new_length = int(len(data) * ratio)
        data = signal.resample_poly(data, 16000, sr)

    # Sauvegarder le fichier converti
    sf.write(fic, data, 16000)

### Création des fichiers audio dégradés

In [5]:
# {Program 0 name: Room
# Program 1 name: Hallway
# Program 2 name: Cathedral
# Program 3 name: Elevator Speaker
# Program 4 name: Haunted Cave
# Program 5 name: Headphones, Outdoor
# Program 6 name: Houston, we've had a problem
# Program 7 name: Laptop, Living Room
# Program 8 name: Laptop, Office
# Program 9 name: Megaphone, Outdoor
# Program 10 name: Neighbor's Radio
# Program 11 name: Neighbor's Speakers
# Program 12 name: Neighbor's TV
# Program 13 name: Outside The Club
# Program 14 name: P.A. System, Airplane
# Program 15 name: P.A. System, Airport
# Program 16 name: P.A. System, Bus
# Program 17 name: P.A. System, Cathedral
# Program 18 name: P.A. System, Conference Room
# Program 19 name: P.A. System, Gym
# Program 20 name: P.A. System, Hospital
# Program 21 name: P.A. System, Museum
# Program 22 name: P.A. System, Outdoor
# Program 23 name: P.A. System, School Hallway
# Program 24 name: P.A. System, Shopping Mall
# Program 25 name: P.A. System, Spaceship
# Program 26 name: P.A. System, Train Station
# Program 27 name: P.A. System, Waiting Room
# Program 28 name: Party Downstairs
# Program 29 name: Phone, On Ear
# Program 30 name: Phone, On Loudspeaker
# Program 31 name: Police Radio, Car
# Program 32 name: Radio, Bathroom
# Program 33 name: Radio, Car
# Program 34 name: Radio, Kitchen
# Program 35 name: Radio, Living Room
# Program 36 name: Smart Home Speaker
# Program 37 name: The Almighty
# Program 38 name: TV, Bedroom
# Program 39 name: TV, Living Room
# Program 40 name: Walkie Talkie, Outdoor}

In [6]:
# Utilisation du plugin PlacIt.vst3 avec pedalboard pour dégrader la qualité sonore selon plusieurs profils
effect = load_plugin("PlaceIt.vst3")

# Effets à utiliser
effets = (
    "Cathedral",
    "Elevator Speaker",
    "Phone, On Ear",
    "TV, Living Room",
    "Megaphone, Outdoor",
    "Houston, we've had a problem",
    "Walkie Talkie, Outdoor",
    "Police Radio, Car",
)

# Itération sur les fichiers audio trouvés
audio_files_result = []
for fic in audio_files:
    print(f"Traitement du fichier {fic}")
    audio_files_result.append(fic)
    # Lire le fichier audio
    with AudioFile(str(fic)) as f_audio:
        audio = f_audio.read(f_audio.frames)
        # Appliquer les effets en utilisant enumerate pour obtenir l'index et le nom de l'effet
        for index, effet in enumerate(effets):
            effect.program = effet
            print(f"Effet {index + 1}: {effet}")
            # Appliquer l'effet
            effected = effect(audio, f_audio.samplerate)
            # Ecrire le fichier audio résultant
            with AudioFile(
                str(fic).replace("_0.wav", f"_{index + 1}.wav"),
                "w",
                samplerate=f_audio.samplerate,
                num_channels=f_audio.num_channels,
            ) as f:
                f.write(effected)
            # Compléter audio_files avec le fichier nouvellement créé
            audio_files_result.append(
                Path(str(fic).replace("_0.wav", f"_{index + 1}.wav"))
            )

Traitement du fichier de_0.wav
Effet 1: Cathedral
Effet 2: Elevator Speaker
Effet 3: Phone, On Ear
Effet 4: TV, Living Room
Effet 5: Megaphone, Outdoor
Effet 6: Houston, we've had a problem
Effet 7: Walkie Talkie, Outdoor
Effet 8: Police Radio, Car
Traitement du fichier en_0.wav
Effet 1: Cathedral
Effet 2: Elevator Speaker
Effet 3: Phone, On Ear
Effet 4: TV, Living Room
Effet 5: Megaphone, Outdoor
Effet 6: Houston, we've had a problem
Effet 7: Walkie Talkie, Outdoor
Effet 8: Police Radio, Car
Traitement du fichier es_0.wav
Effet 1: Cathedral
Effet 2: Elevator Speaker
Effet 3: Phone, On Ear
Effet 4: TV, Living Room
Effet 5: Megaphone, Outdoor
Effet 6: Houston, we've had a problem
Effet 7: Walkie Talkie, Outdoor
Effet 8: Police Radio, Car
Traitement du fichier fr_0.wav
Effet 1: Cathedral
Effet 2: Elevator Speaker
Effet 3: Phone, On Ear
Effet 4: TV, Living Room
Effet 5: Megaphone, Outdoor
Effet 6: Houston, we've had a problem
Effet 7: Walkie Talkie, Outdoor
Effet 8: Police Radio, Car


### Normalisation du volume sonore

In [7]:
target_loudness = -22.0

# On groupe les fichiers par langue
for l in langues:
    print(audio_files_result)
    files = [f for f in audio_files_result if f.stem.split("_")[0] == l]
    print(files)
    for f in files:
        print(f)
        print(f"Normalizing {f}", end="... ")
        data, sr = sf.read(f)
        meter = pyln.Meter(sr)
        loudness = meter.integrated_loudness(data)

        # Calculate the actual normalization applied
        normalization_amount = target_loudness - loudness
        rounded_amount = round(normalization_amount, 1)
        print(f"{rounded_amount:+} LUFS", flush=True)

        normalized_data = pyln.normalize.loudness(data, loudness, target_loudness)
        out_path = str(f).replace(".wav", f"_norm_{rounded_amount:+}db.wav")
        sf.write(out_path, normalized_data, sr)

[WindowsPath('de_0.wav'), WindowsPath('de_1.wav'), WindowsPath('de_2.wav'), WindowsPath('de_3.wav'), WindowsPath('de_4.wav'), WindowsPath('de_5.wav'), WindowsPath('de_6.wav'), WindowsPath('de_7.wav'), WindowsPath('de_8.wav'), WindowsPath('en_0.wav'), WindowsPath('en_1.wav'), WindowsPath('en_2.wav'), WindowsPath('en_3.wav'), WindowsPath('en_4.wav'), WindowsPath('en_5.wav'), WindowsPath('en_6.wav'), WindowsPath('en_7.wav'), WindowsPath('en_8.wav'), WindowsPath('es_0.wav'), WindowsPath('es_1.wav'), WindowsPath('es_2.wav'), WindowsPath('es_3.wav'), WindowsPath('es_4.wav'), WindowsPath('es_5.wav'), WindowsPath('es_6.wav'), WindowsPath('es_7.wav'), WindowsPath('es_8.wav'), WindowsPath('fr_0.wav'), WindowsPath('fr_1.wav'), WindowsPath('fr_2.wav'), WindowsPath('fr_3.wav'), WindowsPath('fr_4.wav'), WindowsPath('fr_5.wav'), WindowsPath('fr_6.wav'), WindowsPath('fr_7.wav'), WindowsPath('fr_8.wav')]
[WindowsPath('es_0.wav'), WindowsPath('es_1.wav'), WindowsPath('es_2.wav'), WindowsPath('es_3.wav')



fr_3.wav
Normalizing fr_3.wav... +0.3 LUFS
fr_4.wav
Normalizing fr_4.wav... +6.7 LUFS
fr_5.wav
Normalizing fr_5.wav... +3.0 LUFS
fr_6.wav
Normalizing fr_6.wav... 



-1.2 LUFS
fr_7.wav
Normalizing fr_7.wav... +4.5 LUFS
fr_8.wav
Normalizing fr_8.wav... +9.8 LUFS
[WindowsPath('de_0.wav'), WindowsPath('de_1.wav'), WindowsPath('de_2.wav'), WindowsPath('de_3.wav'), WindowsPath('de_4.wav'), WindowsPath('de_5.wav'), WindowsPath('de_6.wav'), WindowsPath('de_7.wav'), WindowsPath('de_8.wav'), WindowsPath('en_0.wav'), WindowsPath('en_1.wav'), WindowsPath('en_2.wav'), WindowsPath('en_3.wav'), WindowsPath('en_4.wav'), WindowsPath('en_5.wav'), WindowsPath('en_6.wav'), WindowsPath('en_7.wav'), WindowsPath('en_8.wav'), WindowsPath('es_0.wav'), WindowsPath('es_1.wav'), WindowsPath('es_2.wav'), WindowsPath('es_3.wav'), WindowsPath('es_4.wav'), WindowsPath('es_5.wav'), WindowsPath('es_6.wav'), WindowsPath('es_7.wav'), WindowsPath('es_8.wav'), WindowsPath('fr_0.wav'), WindowsPath('fr_1.wav'), WindowsPath('fr_2.wav'), WindowsPath('fr_3.wav'), WindowsPath('fr_4.wav'), WindowsPath('fr_5.wav'), WindowsPath('fr_6.wav'), WindowsPath('fr_7.wav'), WindowsPath('fr_8.wav')]
[Wi



+1.6 LUFS
de_6.wav
Normalizing de_6.wav... -1.3 LUFS
de_7.wav
Normalizing de_7.wav... +4.5 LUFS
de_8.wav
Normalizing de_8.wav... +9.6 LUFS
[WindowsPath('de_0.wav'), WindowsPath('de_1.wav'), WindowsPath('de_2.wav'), WindowsPath('de_3.wav'), WindowsPath('de_4.wav'), WindowsPath('de_5.wav'), WindowsPath('de_6.wav'), WindowsPath('de_7.wav'), WindowsPath('de_8.wav'), WindowsPath('en_0.wav'), WindowsPath('en_1.wav'), WindowsPath('en_2.wav'), WindowsPath('en_3.wav'), WindowsPath('en_4.wav'), WindowsPath('en_5.wav'), WindowsPath('en_6.wav'), WindowsPath('en_7.wav'), WindowsPath('en_8.wav'), WindowsPath('es_0.wav'), WindowsPath('es_1.wav'), WindowsPath('es_2.wav'), WindowsPath('es_3.wav'), WindowsPath('es_4.wav'), WindowsPath('es_5.wav'), WindowsPath('es_6.wav'), WindowsPath('es_7.wav'), WindowsPath('es_8.wav'), WindowsPath('fr_0.wav'), WindowsPath('fr_1.wav'), WindowsPath('fr_2.wav'), WindowsPath('fr_3.wav'), WindowsPath('fr_4.wav'), WindowsPath('fr_5.wav'), WindowsPath('fr_6.wav'), WindowsPat