### Lister tous les fichiers audio avec leurs labels

In [1]:
import os 
import glob

genres = sorted(os.listdir("../Data/genres_original"))  


filepaths = []
labels = []

for idx , genre in enumerate(genres):
    files = glob.glob(f"../Data/genres_original/{genre}/*.wav")
    for f in files :
        filepaths.append(f)
        labels.append(idx)

### Découper en train / val / test

In [2]:
from sklearn.model_selection import train_test_split


X_temp, X_test, y_temp, y_test = train_test_split(
    filepaths, labels, test_size=0.15, random_state=42, stratify=labels)


X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.1765, random_state=42, stratify=y_temp)
 

print(len(X_train), "train")
print(len(X_val),   "val")
print(len(X_test),  "test")


699 train
151 val
150 test


### découper les fichiers en segments

In [3]:
import librosa
import audioread  # force fallback

def split_audio(file, segment_duration=3, sr=22050):
    try:
        signal, _ = librosa.load(file, sr=sr)
    except Exception as e:
        print("Erreur sur:", file, e)
        return []  # ignorer le fichier problématique

    samples_per_segment = sr * segment_duration
    segments = []

    for start in range(0, len(signal), samples_per_segment):
        end = start + samples_per_segment
        part = signal[start:end]
        if len(part) == samples_per_segment:
            segments.append(part)

    return segments


### Appliquer la découpe à train

In [4]:
X_train_segments = []
y_train_segments = []

for idx, file in enumerate(X_train):
    segs = split_audio(file)
    X_train_segments.extend(segs)
    y_train_segments.extend([y_train[idx]] * len(segs))

print("Train segments:", len(X_train_segments))


  signal, _ = librosa.load(file, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Erreur sur: ../Data/genres_original/jazz/jazz.00054.wav 
Train segments: 6977


### Appliquer la découpe à test

In [5]:
X_test_segments = []
y_test_segments = []

for idx, file in enumerate(X_test):
    segs = split_audio(file)
    X_test_segments.extend(segs)
    y_test_segments.extend([y_test[idx]] * len(segs))

print("Test segments:", len(X_test_segments))


Test segments: 1495


### Appliquer la découpe à val

In [6]:
X_val_segments = []
y_val_segments = []

for idx, file in enumerate(X_val):
    segs = split_audio(file)
    X_val_segments.extend(segs)
    y_val_segments.extend([y_val[idx]] * len(segs))

print("Val segments:", len(X_val_segments))


Val segments: 1509


### convertir un segment → log-mel spectrogram

In [7]:
import numpy as np
import librosa

def segment_to_logmel(segment, sr=22050, n_mels=128, n_fft=1024, hop_length=512):
    # Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=segment,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    
    # Log-mel
    log_mel = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Normalisation
    log_mel_norm = (log_mel - log_mel.min()) / (log_mel.max() - log_mel.min())
    
    # Ajustement de la largeur à 128
    log_mel_norm = librosa.util.fix_length(log_mel_norm, size=128, axis=1)
    
    return log_mel_norm


### convertir train, val, test en log mel spectrogram

In [8]:
X_train_mel = [segment_to_logmel(seg) for seg in X_train_segments]
X_val_mel  = [segment_to_logmel(seg) for seg in X_val_segments]
X_test_mel = [segment_to_logmel(seg) for seg in X_test_segments]


KeyboardInterrupt: 

### Convertir en tableaux NumPy

In [None]:
X_train_mel = np.array(X_train_mel)
X_val_mel   = np.array(X_val_mel)
X_test_mel  = np.array(X_test_mel)

y_train_segments = np.array(y_train_segments)
y_val_segments   = np.array(y_val_segments)
y_test_segments  = np.array(y_test_segments)


### Créer un Dataset PyTorch avec log-me

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class GTZANDataset(Dataset):
    def __init__(self, X, y):
        # X : numpy array des log-mels
        # y : numpy array des labels
        self.X = torch.tensor(X, dtype=torch.float32)  # convertir en tensor float
        self.y = torch.tensor(y, dtype=torch.long)     # labels int

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


### Créer les Datasets et DataLoaders

In [None]:
batch_size = 32

train_dataset = GTZANDataset(X_train_mel, y_train_segments)
val_dataset   = GTZANDataset(X_val_mel, y_val_segments)
test_dataset  = GTZANDataset(X_test_mel, y_test_segments)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


### Importer le modèle et le charger

In [None]:
import sys
import os

# chemin vers le dossier contenant ast_models.py
sys.path.append("../../../ast/src/models")

from ast_models import ASTModel

num_classes = 10  # pour GTZAN

model = ASTModel(
    label_dim=num_classes,     # change la head pour 10 classes
    input_fdim=128,            # nombre de bins mel
    input_tdim=128,             # nombre de frames temporelles de tes log‑mels
    imagenet_pretrain=False,    # ou selon ce que tu veux
    audioset_pretrain=False    # si tu veux partir de pré‑entraînement AudioSet ou pas
)

---------------AST Model Summary---------------
ImageNet pretraining: False, AudioSet pretraining: False
frequncey stride=10, time stride=10
number of patches=144


### Définir loss et optimizer

In [None]:
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


### Boucle d’entraînement simplifiée

In [None]:
num_epochs = 1

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1/10, Loss: 2.1887
Epoch 2/10, Loss: 1.7320
Epoch 3/10, Loss: 1.5603
Epoch 4/10, Loss: 1.3306


KeyboardInterrupt: 