### Lister tous les fichiers audio avec leurs labels

In [1]:
import os 
import glob

genres = sorted(os.listdir("../Data/genres_original"))  


filepaths = []
labels = []

for idx , genre in enumerate(genres):
    files = glob.glob(f"../Data/genres_original/{genre}/*.wav")
    for f in files :
        filepaths.append(f)
        labels.append(idx)

### Découper en train / val / test

In [2]:
from sklearn.model_selection import train_test_split


X_temp, X_test, y_temp, y_test = train_test_split(
    filepaths, labels, test_size=0.15, random_state=42, stratify=labels)


X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.1765, random_state=42, stratify=y_temp)
 

print(len(X_train), "train")
print(len(X_val),   "val")
print(len(X_test),  "test")


699 train
151 val
150 test


### découper les fichiers en segments

In [3]:
import librosa
import audioread  # force fallback

def split_audio(file, segment_duration=3, sr=22050):
    try:
        signal, _ = librosa.load(file, sr=sr)
    except Exception as e:
        print("Erreur sur:", file, e)
        return []  # ignorer le fichier problématique

    samples_per_segment = sr * segment_duration
    segments = []

    for start in range(0, len(signal), samples_per_segment):
        end = start + samples_per_segment
        part = signal[start:end]
        if len(part) == samples_per_segment:
            segments.append(part)

    return segments


### Appliquer la découpe à train

In [4]:
X_train_segments = []
y_train_segments = []

for idx, file in enumerate(X_train):
    segs = split_audio(file)
    X_train_segments.extend(segs)
    y_train_segments.extend([y_train[idx]] * len(segs))

print("Train segments:", len(X_train_segments))


  signal, _ = librosa.load(file, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Erreur sur: ../Data/genres_original/jazz/jazz.00054.wav 
Train segments: 6977


### Appliquer la découpe à test

In [5]:
X_test_segments = []
y_test_segments = []

for idx, file in enumerate(X_test):
    segs = split_audio(file)
    X_test_segments.extend(segs)
    y_test_segments.extend([y_test[idx]] * len(segs))

print("Test segments:", len(X_test_segments))


Test segments: 1495


### Appliquer la découpe à val

In [6]:
X_val_segments = []
y_val_segments = []

for idx, file in enumerate(X_val):
    segs = split_audio(file)
    X_val_segments.extend(segs)
    y_val_segments.extend([y_val[idx]] * len(segs))

print("Val segments:", len(X_val_segments))


Val segments: 1509


### convertir un segment → log-mel spectrogram

In [7]:
import numpy as np
import librosa

def segment_to_logmel(segment, sr=22050, n_mels=128, n_fft=1024, hop_length=512):
    # Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=segment,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    
    # Log-mel
    log_mel = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Normalisation
    log_mel_norm = (log_mel - log_mel.min()) / (log_mel.max() - log_mel.min())
    
    # Ajustement de la largeur à 128
    log_mel_norm = librosa.util.fix_length(log_mel_norm, size=128, axis=1)
    
    return log_mel_norm


### convertir train, val, test en log mel spectrogram

In [8]:
X_train_mel = [segment_to_logmel(seg) for seg in X_train_segments]
X_val_mel  = [segment_to_logmel(seg) for seg in X_val_segments]
X_test_mel = [segment_to_logmel(seg) for seg in X_test_segments]


### Convertir en tableaux NumPy

In [9]:
X_train_mel = np.array(X_train_mel)
X_val_mel   = np.array(X_val_mel)
X_test_mel  = np.array(X_test_mel)

y_train_segments = np.array(y_train_segments)
y_val_segments   = np.array(y_val_segments)
y_test_segments  = np.array(y_test_segments)


[[[0.6522759  0.68333435 0.6592578  ... 0.8412137  0.8027954  0.74052453]
  [0.7117088  0.70386237 0.67119807 ... 0.9242443  0.91053003 0.8599804 ]
  [0.77321136 0.788961   0.67103887 ... 0.92467755 0.8816436  0.8647879 ]
  ...
  [0.2694404  0.27743533 0.34883147 ... 0.30937254 0.30073985 0.31034997]
  [0.267318   0.3154547  0.35683668 ... 0.3237297  0.29547837 0.32847595]
  [0.28054652 0.34697285 0.36976275 ... 0.3378744  0.29761022 0.3493141 ]]

 [[0.689574   0.6273447  0.7044112  ... 0.514724   0.5665124  0.7375984 ]
  [0.79096967 0.7956825  0.85980785 ... 0.70432305 0.7413368  0.8563047 ]
  [0.811707   0.84493744 0.88232523 ... 0.81141275 0.78041434 0.83390504]
  ...
  [0.18884464 0.24049427 0.22129907 ... 0.29020214 0.20701759 0.23663019]
  [0.16575457 0.24549459 0.22913764 ... 0.2901885  0.24573296 0.25550425]
  [0.13995406 0.282635   0.22397512 ... 0.28945732 0.23636882 0.2711687 ]]

 [[0.5825044  0.67911786 0.6737902  ... 0.44005218 0.61448485 0.7965949 ]
  [0.6391564  0.741308