# Préparation

In [None]:
!pip install panns_inference

Collecting panns_inference
  Downloading panns_inference-0.1.1-py3-none-any.whl.metadata (2.4 kB)
Collecting torchlibrosa (from panns_inference)
  Downloading torchlibrosa-0.1.0-py3-none-any.whl.metadata (3.5 kB)
Downloading panns_inference-0.1.1-py3-none-any.whl (8.3 kB)
Downloading torchlibrosa-0.1.0-py3-none-any.whl (11 kB)
Installing collected packages: torchlibrosa, panns_inference
Successfully installed panns_inference-0.1.1 torchlibrosa-0.1.0


In [None]:
import os
import pandas as pd
import numpy as np
import torchaudio
from google.colab import files
from panns_inference import AudioTagging, SoundEventDetection
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn
from torchsummary import summary

In [None]:
# Charger le modèle sur CNN14
audio_tagging = AudioTagging(checkpoint_path=None)

Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth


  checkpoint = torch.load(checkpoint_path, map_location=self.device)


GPU number: 1


In [None]:
print(len(os.listdir()))

2353


In [None]:
input_file = 'annotations_clean_and_noised.xlsx'

# Charger le fichier Excel
df = pd.read_excel(input_file)

# Ajouter une colonne pour les embeddings
df["embeddings"] = None

# Ajouter chaque embeddings de chaque instance dans le dataframe
for index, row in df.iterrows():
    try:
      # Charger la donnée audio depuis la source
      wav_path = '/content/' + str(row['filename'])

      # Obtenir le bon format
      waveform, sample_rate = torchaudio.load(wav_path)

      # Applique le modèle d'extraction d'embeddings à des données audio WAV
      _, embedding = audio_tagging.inference(waveform)

      # Ajouter le résultat dans le DataFrame
      df.at[index, "embeddings"] = embedding
    except Exception as e:
        print(f"Erreur")

In [None]:
# Split des données
def split_data(df):
    train_df = df[df["fold"].isin([1, 2, 3, 4, 5, 6])]
    valid_df = df[df["fold"] == 7]

    return train_df, valid_df

train_df, valid_df = split_data(df)

In [None]:
# Dataset des données pour le dataloader
class CustomDataset(Dataset):
    def __init__(self, data):
        self.features = torch.tensor(data["embeddings"].tolist(), dtype=torch.float32)
        self.labels = torch.tensor(data["target"].tolist(), dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Création des datasets
train_dataset = CustomDataset(train_df)
valid_dataset = CustomDataset(valid_df)

  self.features = torch.tensor(data["embeddings"].tolist(), dtype=torch.float32)


In [None]:
# Création des DataLoaders
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# Modèle

In [None]:
# Entrée : Un vecteur de taille 2048 (représentant un embedding)
# Couche cachée : des couches denses avec 128 neurones et une activation ReLU (pleinement connectées)
# Couche sortie : nombre de classe

class MLP(nn.Module):
    def __init__(self, input_size = 2048, num_classes = 5):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, 128)

        self.relu = nn.ReLU()

        self.out = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.out(x)
        return x

# Initialisation du modèle
model = MLP(input_size=2048, num_classes= 5).to('cuda')

# Résumé de l'architecture
summary(model, input_size=(2048,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 128]         262,272
              ReLU-2                  [-1, 128]               0
            Linear-3                  [-1, 128]          16,512
              ReLU-4                  [-1, 128]               0
            Linear-5                  [-1, 128]          16,512
              ReLU-6                  [-1, 128]               0
            Linear-7                    [-1, 5]             645
Total params: 295,941
Trainable params: 295,941
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.01
Params size (MB): 1.13
Estimated Total Size (MB): 1.14
----------------------------------------------------------------


# Entrainement

In [None]:
class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.best_loss = float('inf')
        self.counter = 0
        self.early_stop = False

    def __call__(self, valid_loss):
        if valid_loss < self.best_loss:
            self.best_loss = valid_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [None]:
# Initialisation d'early stopping
early_stopping = EarlyStopping(patience=5)

In [None]:
# Entrainement
epochs = 100
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

for epoch in range(epochs):

    # Entraînement
    model.train()
    total_train_loss = 0
    correct_train = 0
    total_train = 0

    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features.to('cuda')).squeeze(1)

        # Calcul de la loss
        loss = criterion(outputs, labels.to('cuda'))
        loss.backward()
        optimizer.step()

        # Mise à jour des métriques d'entraînement
        total_train_loss += loss.item()
        predictions = torch.argmax(outputs, dim=1)
        correct_train += (predictions.to('cuda') == labels.to('cuda')).sum().item()
        total_train += labels.size(0)

    train_loss = total_train_loss / len(train_loader)
    train_accuracy = correct_train / total_train

    # Validation
    model.eval()
    total_valid_loss = 0
    correct_valid = 0
    total_valid = 0

    with torch.no_grad():
        for features, labels in valid_loader:
            outputs = model(features.to('cuda')).squeeze(1)

            # Calcul de la loss
            loss = criterion(outputs, labels.to('cuda'))
            total_valid_loss += loss.item()

            # Mise à jour des métriques de validation
            predictions = torch.argmax(outputs, dim=1)
            correct_valid += (predictions == labels.to('cuda')).sum().item()
            total_valid += labels.size(0)

    valid_loss = total_valid_loss / len(valid_loader)
    valid_accuracy = correct_valid / total_valid

    # Métriques pour l'époque en cours
    print(f"Epoch {epoch + 1}:")
    print(f"Train Loss: {train_loss:}, Train Accuracy: {train_accuracy:}")
    print(f"Valid Loss: {valid_loss:}, Valid Accuracy: {valid_accuracy:}")

    # Early stopping
    early_stopping(valid_loss)
    if early_stopping.early_stop:
        print("Early stopping stopped the training!")
        break

Epoch 1:
Train Loss: 1.2592209978029132, Train Accuracy: 0.47860304968027545
Valid Loss: 1.2022551417350769, Valid Accuracy: 0.5331230283911672
Epoch 2:
Train Loss: 1.1905184015631676, Train Accuracy: 0.49090014756517464
Valid Loss: 1.1789111971855164, Valid Accuracy: 0.5394321766561514
Epoch 3:
Train Loss: 1.135809998959303, Train Accuracy: 0.5012297097884899
Valid Loss: 1.1316990077495575, Valid Accuracy: 0.5425867507886435
Epoch 4:
Train Loss: 1.0913202753290534, Train Accuracy: 0.5189375307427447
Valid Loss: 1.1435083210468293, Valid Accuracy: 0.5488958990536278
Epoch 5:
Train Loss: 1.0679459497332573, Train Accuracy: 0.5302508607968519
Valid Loss: 1.1055566728115083, Valid Accuracy: 0.5772870662460567
Epoch 6:
Train Loss: 1.0554489009082317, Train Accuracy: 0.5081160846040335
Valid Loss: 1.0553266167640687, Valid Accuracy: 0.5772870662460567
Epoch 7:
Train Loss: 1.0454010488465428, Train Accuracy: 0.5282833251352681
Valid Loss: 1.0601918756961823, Valid Accuracy: 0.574132492113564

# # Sauvegarder le modèle entrainé

In [None]:
torch.save(model.state_dict(), "MLP_on_CNN14_clean_and_noised.pth")

In [None]:
# Téléchargement du modèle
from google.colab import files
files.download('MLP_on_CNN14_clean_and_noised.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>