In [None]:

!pip install pyannote.audio

Collecting pyannote.audio
  Downloading pyannote_audio-4.0.4-py3-none-any.whl.metadata (13 kB)
Collecting asteroid-filterbanks>=0.4.0 (from pyannote.audio)
  Downloading asteroid_filterbanks-0.4.0-py3-none-any.whl.metadata (3.3 kB)
Collecting lightning>=2.4 (from pyannote.audio)
  Downloading lightning-2.6.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting opentelemetry-exporter-otlp>=1.34.0 (from pyannote.audio)
  Downloading opentelemetry_exporter_otlp-1.39.1-py3-none-any.whl.metadata (2.4 kB)
Collecting pyannote-core>=6.0.1 (from pyannote.audio)
  Downloading pyannote_core-6.0.1-py3-none-any.whl.metadata (1.9 kB)
Collecting pyannote-database>=6.1.1 (from pyannote.audio)
  Downloading pyannote_database-6.1.1-py3-none-any.whl.metadata (30 kB)
Collecting pyannote-metrics>=4.0.0 (from pyannote.audio)
  Downloading pyannote_metrics-4.0.0-py3-none-any.whl.metadata (2.2 kB)
C

In [None]:
from huggingface_hub import notebook_login
notebook_login()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import torch
import os
import csv
import huggingface_hub
from pyannote.audio import Pipeline
from pyannote.core import Annotation, Segment
from pyannote.metrics.diarization import DiarizationErrorRate

# --- CONFIGURATION ---
AUDIO_FOLDER = "/content/drive/MyDrive/GroundTruth_clean_multispeaker/audioMono"
RTTM_FOLDER = "/content/drive/MyDrive/GroundTruth_clean_multispeaker/rttm"
CSV_OUTPUT_PATH = "/content/drive/MyDrive/GroundTruth_clean_multispeaker/resultats_pyannotediarization_250ms.csv"
HF_TOKEN = "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"

# Authentification Hugging Face
huggingface_hub.login(token=HF_TOKEN)

# Paramètres d'évaluation
COLLAR = 0.25
SKIP_OVERLAP = False

# --- FONCTION CHARGEMENT RTTM ---
def load_rttm(file_path):
    annotation = Annotation()
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 8 and parts[0] == 'SPEAKER':
                start = float(parts[3])
                duration = float(parts[4])
                label = parts[7]
                annotation[Segment(start, start + duration)] = label
    return annotation

# --- CHARGEMENT DU MODÈLE ---
print("Chargement du modèle Pyannote...")
pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    token=HF_TOKEN
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipeline.to(device)
print(f"Succès ! Modèle chargé sur {device}")

# --- INITIALISATION ---
der_metric = DiarizationErrorRate(collar=COLLAR, skip_overlap=SKIP_OVERLAP)
results_data = []

# --- BOUCLE DE TRAITEMENT ---
audio_files = [f for f in os.listdir(AUDIO_FOLDER) if f.endswith('.wav')]
audio_files.sort()

print(f"\nDébut de l'évaluation sur {len(audio_files)} fichiers...\n")
print(f"{'FICHIER':<30} | {'DER (%)':<10} | {'VRAI':<5} | {'PRED':<5} | {'DIFF':<5}")
print("-" * 80)

for filename in audio_files:
    file_id = os.path.splitext(filename)[0]
    audio_path = os.path.join(AUDIO_FOLDER, filename)
    rttm_path = os.path.join(RTTM_FOLDER, file_id + ".rttm")

    if not os.path.exists(rttm_path):
        print(f"⚠️ RTTM manquant pour {filename}")
        continue

    try:
        # A. Ground Truth
        reference = load_rttm(rttm_path)

        # B. Hypothèse (Diarisation)
        result = pipeline(audio_path)

        # --- CORRECTIF SPÉCIAL VERSION 4.0 ---
        # Dans Pyannote 4.0, l'annotation est cachée dans .speaker_diarization
        if hasattr(result, "speaker_diarization"):
            hypothesis = result.speaker_diarization
        elif isinstance(result, Annotation):
            # Ancienne version (3.1 pure)
            hypothesis = result
        else:
            print(f"⚠️ Format de résultat inconnu pour {filename}")
            continue
        # -------------------------------------

        # C. Calcul DER
        # Vérification si l'audio est vide ou sans locuteur détecté
        timeline = hypothesis.get_timeline()
        if not timeline:
             # Si aucun locuteur n'est détecté, on considère une durée de 0 pour l'UEM
             # Ce qui peut causer une erreur de calcul si la ref n'est pas vide
             print(f"{filename:<30} | VIDE       | {len(reference.labels()):<5} | 0     | -{len(reference.labels())}")
             continue

        uem = Segment(0, timeline.duration())
        der_val = der_metric(reference, hypothesis, uem=uem) * 100

        # D. Comptage Locuteurs
        true_speakers = len(reference.labels())
        pred_speakers = len(hypothesis.labels())
        diff = pred_speakers - true_speakers

        # E. Stockage
        results_data.append({
            "filename": filename,
            "der": der_val,
            "true_speakers": true_speakers,
            "pred_speakers": pred_speakers,
            "diff": diff
        })

        print(f"{filename:<30} | {der_val:.2f}%      | {true_speakers:<5} | {pred_speakers:<5} | {diff:+d}")

    except Exception as e:
        print(f"❌ Erreur sur {filename}: {e}")

# --- CALCULS STATISTIQUES FINAUX ---
if results_data:
    # 1. Extraction des DERs pour min/max
    all_ders = [r['der'] for r in results_data]

    min_der = min(all_ders)
    max_der = max(all_ders)

    # Trouver les fichiers correspondants
    best_file = next(r['filename'] for r in results_data if r['der'] == min_der)
    worst_file = next(r['filename'] for r in results_data if r['der'] == max_der)

    # 2. Métriques Globales
    global_der = abs(der_metric) * 100

    correct_counts = sum(1 for r in results_data if r['diff'] == 0)
    accuracy_count = (correct_counts / len(results_data)) * 100

    print("\n" + "="*40)
    print(" RÉSULTATS GLOBAUX ")
    print("="*40)
    print(f"DER Global (Moyenne Pondérée) : {global_der:.2f}%")
    print(f"DER Minimum (Meilleur fichier): {min_der:.2f}% ({best_file})")
    print(f"DER Maximum (Pire fichier)    : {max_der:.2f}% ({worst_file})")
    print("-" * 40)
    print(f"Précision Nb Locuteurs        : {accuracy_count:.2f}%")
    print(f"Sur-estimations               : {sum(1 for r in results_data if r['diff'] > 0)}")
    print(f"Sous-estimations              : {sum(1 for r in results_data if r['diff'] < 0)}")

    # --- SAUVEGARDE CSV ---
    print(f"\nSauvegarde des résultats dans : {CSV_OUTPUT_PATH}")
    with open(CSV_OUTPUT_PATH, mode='w', newline='') as csv_file:
        fieldnames = ['Filename', 'DER', 'True_Speakers', 'Pred_Speakers', 'Diff']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        writer.writeheader()
        for data in results_data:
            writer.writerow({
                'Filename': data['filename'],
                'DER': f"{data['der']:.4f}",
                'True_Speakers': data['true_speakers'],
                'Pred_Speakers': data['pred_speakers'],
                'Diff': data['diff']
            })
    print("✅ Fichier CSV généré avec succès.")
else:
    print("Aucun fichier traité avec succès.")

Chargement du modèle Pyannote...
Succès ! Modèle chargé sur cuda

Début de l'évaluation sur 30 fichiers...

FICHIER                        | DER (%)    | VRAI  | PRED  | DIFF 
--------------------------------------------------------------------------------


  return torch._C._get_cublas_allow_tf32()
It can be re-enabled by calling
   >>> import torch
   >>> torch.backends.cuda.matmul.allow_tf32 = True
   >>> torch.backends.cudnn.allow_tf32 = True
See https://github.com/pyannote/pyannote-audio/issues/1370 for more details.

  std = sequences.std(dim=-1, correction=1)


clean_multispk_000.wav         | 34.87%      | 4     | 2     | -2
clean_multispk_001.wav         | 24.10%      | 3     | 3     | +0
clean_multispk_002.wav         | 68.07%      | 3     | 1     | -2
clean_multispk_003.wav         | 65.97%      | 2     | 2     | +0
clean_multispk_004.wav         | 29.61%      | 2     | 2     | +0
clean_multispk_005.wav         | 32.73%      | 4     | 4     | +0
clean_multispk_006.wav         | 37.24%      | 3     | 2     | -1
clean_multispk_007.wav         | 20.55%      | 2     | 2     | +0
clean_multispk_008.wav         | 28.89%      | 3     | 3     | +0
clean_multispk_009.wav         | 34.54%      | 3     | 4     | +1
clean_multispk_010.wav         | 9.71%      | 3     | 3     | +0
clean_multispk_011.wav         | 62.38%      | 3     | 1     | -2
clean_multispk_012.wav         | 17.71%      | 3     | 4     | +1
clean_multispk_013.wav         | 59.71%      | 2     | 1     | -1
clean_multispk_014.wav         | 36.78%      | 4     | 2     | -2
clean_multi

Found existing installation: huggingface-hub 0.23.2
Uninstalling huggingface-hub-0.23.2:
  Successfully uninstalled huggingface-hub-0.23.2
Collecting huggingface_hub==0.19.4
  Downloading huggingface_hub-0.19.4-py3-none-any.whl.metadata (14 kB)
Downloading huggingface_hub-0.19.4-py3-none-any.whl (311 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.7/311.7 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface_hub
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 5.2.2 requires huggingface-hub>=0.20.0, but you have huggingface-hub 0.19.4 which is incompatible.
diffusers 0.36.0 requires huggingface-hub<2.0,>=0.34.0, but you have huggingface-hub 0.19.4 which is incompatible.
accelerate 1.12.0 requires huggingface_hub>=0.21.0, but you have huggingface-hub 0.19.4 which is incompa

In [None]:
# On installe un trio de versions compatibles entre elles
!pip install "huggingface_hub==0.23.2" "transformers==4.41.2" "pyannote.audio==3.1.1"

Collecting huggingface_hub==0.23.2
  Using cached huggingface_hub-0.23.2-py3-none-any.whl.metadata (12 kB)
Collecting transformers==4.41.2
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.2)
  Downloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Using cached huggingface_hub-0.23.2-py3-none-any.whl (401 kB)
Downloading transformers-4.41.2-py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m82.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m95.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: hugging