In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## First try on a Playlist of conferences

## Utilization of Multiple Dictionaries

In this analysis, several dictionaries are employed to organize and process the audio data effectively:

- **`Diarizations{}`**: 
  - Associates each `file_name` with its corresponding diarization, which is an `Annotation` object.

- **`unique_speakers{}`**: 
  - For each `file_name`, associates a set of unique speakers identified in the audio file.

- **`durations_conferences{}`**: 
  - Maps each `file_name` to a `Duration{}` dictionary. 
  - For each speaker in the audio file, `Duration{}` associates their total spoken duration.
  - **Purpose**: This is particularly useful for determining the principal speaker in each conference.

- **`longest_segments_conferences{}`**: 
  - For each `file_name`, associates a `longest_segments{}` dictionary.
  - For each speaker in the audio file, `longest_segments{}` associates the duration of the longest spoken segment and the corresponding segment.
  - **Purpose**: This is useful for extracting a reasonable subsegment for every speaker to predict their gender.


In [2]:
!pip install pyannote.audio

Collecting pyannote.audio
  Obtaining dependency information for pyannote.audio from https://files.pythonhosted.org/packages/f5/11/611c32f7b7894ba588ade502525d0130f3e731d15f925e9f2a1ae66c8680/pyannote.audio-3.1.1-py2.py3-none-any.whl.metadata
  Downloading pyannote.audio-3.1.1-py2.py3-none-any.whl.metadata (9.3 kB)
Collecting asteroid-filterbanks>=0.4 (from pyannote.audio)
  Downloading asteroid_filterbanks-0.4.0-py3-none-any.whl (29 kB)
Collecting einops>=0.6.0 (from pyannote.audio)
  Obtaining dependency information for einops>=0.6.0 from https://files.pythonhosted.org/packages/29/0b/2d1c0ebfd092e25935b86509a9a817159212d82aa43d7fb07eca4eeff2c2/einops-0.7.0-py3-none-any.whl.metadata
  Downloading einops-0.7.0-py3-none-any.whl.metadata (13 kB)
Collecting lightning>=2.0.1 (from pyannote.audio)
  Obtaining dependency information for lightning>=2.0.1 from https://files.pythonhosted.org/packages/8c/a1/b2a6c33675510bc3e1ca6d010b244ac0dd9c81fc1723a37e7491aa586041/lightning-2.1.3-py3

In [3]:
from pyannote.audio import Pipeline



In [4]:
from pydub import AudioSegment

def convert_mp3_to_wav(mp3_path, wav_path):
    """
    Convertit un fichier MP3 en WAV.

    :param mp3_path: Chemin du fichier MP3 source.
    :param wav_path: Chemin du fichier WAV de sortie.
    """
    # Charger le fichier audio MP3
    audio = AudioSegment.from_mp3(mp3_path)

    # Exporter en tant que fichier WAV
    audio.export(wav_path, format="wav")

In [5]:
from tqdm import tqdm

In [6]:
import os


pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token="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")

# send pipeline to GPU (when available)
import torch
pipeline.to(torch.device("cuda"))


# Path to the directory containing audio files
audio_folder = '/kaggle/input/playlist'

# Dictionnaire pour stocker les résultats de la diarisation
Diarizations = {}

# Iterate over the MP3 files in the directory with a progress bar
for file in tqdm(os.listdir(audio_folder), desc="Processing audio files"):
    # Check if the file is an MP3 file
    if file.endswith('.mp3'):
        file_name = os.path.basename(file)
        mp3_path = os.path.join(audio_folder, file)
        wav_path = file_name[:-3] + "wav" 
        convert_mp3_to_wav(mp3_path, wav_path)
        diarization = pipeline(wav_path)
        Diarizations[file_name] = diarization
        


config.yaml:   0%|          | 0.00/469 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/5.91M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/399 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/221 [00:00<?, ?B/s]

Processing audio files: 100%|██████████| 13/13 [27:27<00:00, 126.73s/it]


In [7]:
print(Diarizations)

{'Cryo-EM Workshop - Ayelet Heimowitz .mp3': <pyannote.core.annotation.Annotation object at 0x7c455d5c0040>, 'Cryo-EM Workshop - Jose Maria Carazo.mp3': <pyannote.core.annotation.Annotation object at 0x7c455d5c1c30>, 'Flatiron Wide Algorithms and Mathematics - Erik Thiede .mp3': <pyannote.core.annotation.Annotation object at 0x7c4548bd7f40>, 'Flatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3': <pyannote.core.annotation.Annotation object at 0x7c45487ab460>, 'Flatiron Wide Algorithms and Mathematics - Ashley Villar .mp3': <pyannote.core.annotation.Annotation object at 0x7c4548b4c0d0>, 'Cryo-EM Workshop - Bridget Carragher .mp3': <pyannote.core.annotation.Annotation object at 0x7c4548869e70>, 'Cryo-EM Workshop - Joakim Anden.mp3': <pyannote.core.annotation.Annotation object at 0x7c454886b9d0>, 'Flatiron Wide Algorithms and Mathematics - Shirley Ho.mp3': <pyannote.core.annotation.Annotation object at 0x7c4548a05390>, 'Cryo-EM Workshop - Erik Lindahl .mp3': <pyannote.core.an

In [8]:
unique_speakers={}
for file in os.listdir(audio_folder):
    file_name =os.path.basename(file)
    diarization = Diarizations[file_name]
    unique_speaker = set(label for turn,_, label in diarization.itertracks(yield_label=True))
    unique_speakers[file_name] = unique_speaker

In [9]:
# Total speaking duration for each speaker -> can be used to identify the main speaker
# Extract the longest spoken segment by each speaker for gender recognition
# Issue: what if the maximum speaking duration is <5 seconds? -> consider merging segments
# In parallel, the gender of the speaker can be detected
durations_conferences = {}
longest_segments_conferences = {}


for file in tqdm(os.listdir(audio_folder), desc="Saving durations,and longest segments for each audio file"):
    file_name = os.path.basename(file)
    diarization = Diarizations[file_name]
    unique_speaker = unique_speakers[file_name]
    Duration={}
    Longest_segment = {}
    for speaker in unique_speaker:
        longest_segment = None
        longest_duration = 0
        total_duration = 0

        for segment, _, speaker_iter in diarization.itertracks(yield_label=True):
            if speaker_iter == speaker:
                duration = segment.duration
                total_duration += duration
                if duration > longest_duration:
                    longest_duration = duration
                    longest_segment = segment

            Duration[speaker] = total_duration
            Longest_segment[speaker] = [longest_duration, longest_segment]
    durations_conferences[file_name] = Duration
    longest_segments_conferences[file_name] = Longest_segment


Saving durations,and longest segments for each audio file: 100%|██████████| 13/13 [00:00<00:00, 387.51it/s]


In [10]:
print(longest_segments_conferences["Cryo-EM Workshop - Joakim Anden.mp3"])

{'SPEAKER_01': [0.3904923599320682, <Segment(1375.41, 1375.8)>], 'SPEAKER_00': [150.0, <Segment(1342.25, 1492.25)>]}


In [11]:
#Extract a sub-audio segment from a larger audio file given specific start and end times

def extract_subsegment(source_path, start_time, end_time, output_path):
    """
    Extrait un sous-segment d'un fichier audio MP3.

    :param source_path: Chemin du fichier audio source MP3.
    :param start_time: Temps de début du sous-segment en millisecondes.
    :param end_time: Temps de fin du sous-segment en millisecondes.
    :param output_path: Chemin du fichier MP3 de sortie.
    """
    # Charger le fichier audio MP3
    audio = AudioSegment.from_mp3(source_path)

    # Extraire le sous-segment
    subsegment = audio[start_time:end_time]

    # Exporter le sous-segment en tant que fichier MP3
    subsegment.export(output_path, format="mp3")


In [12]:
audio_folder="/kaggle/input/playlist"

In [13]:
SegmentsPerFile ={}
for file in tqdm(os.listdir(audio_folder), desc="Saving audios for each speaker in each audio file"):
    file_name = os.path.basename(file)
    unique_speaker = unique_speakers[file_name]
    Longest_segment = longest_segments_conferences[file_name]
    print(Longest_segment)
    
    SegmentsPerSpeaker = {}
    
    for speaker in unique_speaker :
        if Longest_segment[speaker][0]>3 :
            print(speaker, file_name, Longest_segment[speaker][0],Longest_segment[speaker][1].start,Longest_segment[speaker][1].end)
            start_time = Longest_segment[speaker][1].start * 1000
            end_time = Longest_segment[speaker][1].end * 1000
            extract_subsegment('/kaggle/input/playlist/' + file_name, start_time, end_time, file_name[:-4] +speaker+ '.mp3')
            SegmentsPerSpeaker[speaker] = file_name[:-4] + speaker+'.mp3'

    SegmentsPerFile[file_name] = SegmentsPerSpeaker

Saving audios for each speaker in each audio file:   0%|          | 0/13 [00:00<?, ?it/s]

{'SPEAKER_00': [24.65195246179968, <Segment(117.275, 141.927)>]}
SPEAKER_00 Cryo-EM Workshop - Ayelet Heimowitz .mp3 24.65195246179968 117.27504244482174 141.92699490662142


Saving audios for each speaker in each audio file:   8%|▊         | 1/13 [00:03<00:43,  3.64s/it]

{'SPEAKER_01': [0.37351443123952777, <Segment(1051.2, 1051.57)>], 'SPEAKER_00': [182.44482173174902, <Segment(888.26, 1070.7)>]}
SPEAKER_00 Cryo-EM Workshop - Jose Maria Carazo.mp3 182.44482173174902 888.2597623089982 1070.7045840407472


Saving audios for each speaker in each audio file:  15%|█▌        | 2/13 [00:12<01:16,  6.92s/it]

{'SPEAKER_01': [7.062818336162763, <Segment(1443.95, 1451.01)>], 'SPEAKER_00': [9.575551782682396, <Segment(1518.67, 1528.24)>]}
SPEAKER_01 Flatiron Wide Algorithms and Mathematics - Erik Thiede .mp3 7.062818336162763 1443.947368421053 1451.0101867572157
SPEAKER_00 Flatiron Wide Algorithms and Mathematics - Erik Thiede .mp3 9.575551782682396 1518.6672325976233 1528.2427843803057


Saving audios for each speaker in each audio file:  23%|██▎       | 3/13 [00:20<01:13,  7.40s/it]

{'SPEAKER_03': [12.071307300509488, <Segment(1646.41, 1658.48)>], 'SPEAKER_05': [5.95925297113763, <Segment(3189.07, 3195.03)>], 'SPEAKER_04': [11.341256366723428, <Segment(2363.46, 2374.8)>], 'SPEAKER_00': [9.949066213921924, <Segment(2313.95, 2323.9)>], 'SPEAKER_02': [66.62139219015282, <Segment(37.0543, 103.676)>], 'SPEAKER_01': [15.449915110356415, <Segment(2229.62, 2245.07)>]}
SPEAKER_03 Flatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3 12.071307300509488 1646.409168081494 1658.4804753820035
SPEAKER_05 Flatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3 5.95925297113763 3189.074702886248 3195.0339558573855
SPEAKER_04 Flatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3 11.341256366723428 2363.4550084889643 2374.7962648556877
SPEAKER_00 Flatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3 9.949066213921924 2313.9473684210525 2323.8964346349744
SPEAKER_02 Flatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3 66.6213

Saving audios for each speaker in each audio file:  31%|███       | 4/13 [01:14<03:49, 25.54s/it]

{'SPEAKER_00': [19.11714770797971, <Segment(910.823, 929.941)>]}
SPEAKER_00 Flatiron Wide Algorithms and Mathematics - Ashley Villar .mp3 19.11714770797971 910.8234295415959 929.9405772495757


Saving audios for each speaker in each audio file:  38%|███▊      | 5/13 [01:17<02:20, 17.59s/it]

{'SPEAKER_00': [36.010186757215706, <Segment(1015.08, 1051.1)>]}
SPEAKER_00 Cryo-EM Workshop - Bridget Carragher .mp3 36.010186757215706 1015.0848896434636 1051.0950764006793


Saving audios for each speaker in each audio file:  46%|████▌     | 6/13 [01:23<01:35, 13.68s/it]

{'SPEAKER_01': [0.3904923599320682, <Segment(1375.41, 1375.8)>], 'SPEAKER_00': [150.0, <Segment(1342.25, 1492.25)>]}
SPEAKER_00 Cryo-EM Workshop - Joakim Anden.mp3 150.0 1342.249575551783 1492.249575551783


Saving audios for each speaker in each audio file:  54%|█████▍    | 7/13 [01:30<01:07, 11.32s/it]

{'SPEAKER_03': [16.332767402376902, <Segment(194.491, 210.823)>], 'SPEAKER_02': [5.127334465194963, <Segment(1153.96, 1159.09)>], 'SPEAKER_01': [5.008488964346725, <Segment(1136.38, 1141.38)>], 'SPEAKER_00': [6.19694397283547, <Segment(1106.31, 1112.5)>]}
SPEAKER_03 Flatiron Wide Algorithms and Mathematics - Shirley Ho.mp3 16.332767402376902 194.49066213921904 210.82342954159594
SPEAKER_02 Flatiron Wide Algorithms and Mathematics - Shirley Ho.mp3 5.127334465194963 1153.9643463497455 1159.0916808149404
SPEAKER_01 Flatiron Wide Algorithms and Mathematics - Shirley Ho.mp3 5.008488964346725 1136.3752122241085 1141.3837011884552
SPEAKER_00 Flatiron Wide Algorithms and Mathematics - Shirley Ho.mp3 6.19694397283547 1106.307300509338 1112.5042444821734


Saving audios for each speaker in each audio file:  62%|██████▏   | 8/13 [01:44<01:01, 12.25s/it]

{'SPEAKER_01': [0.9507640067911609, <Segment(136.664, 137.615)>], 'SPEAKER_00': [96.74023769100131, <Segment(1675.36, 1772.1)>]}
SPEAKER_00 Cryo-EM Workshop - Erik Lindahl .mp3 96.74023769100131 1675.356536502547 1772.0967741935483


Saving audios for each speaker in each audio file:  69%|██████▉   | 9/13 [01:50<00:41, 10.37s/it]

{'SPEAKER_01': [0.3565365025465326, <Segment(1133.66, 1134.02)>], 'SPEAKER_00': [125.33106960950772, <Segment(747.886, 873.217)>]}
SPEAKER_00 Cryo-EM Workshop - Joachim Frank.mp3 125.33106960950772 747.8862478777589 873.2173174872667


Saving audios for each speaker in each audio file:  77%|███████▋  | 10/13 [01:58<00:28,  9.61s/it]

{'SPEAKER_01': [8.896434634974412, <Segment(1603.78, 1612.67)>], 'SPEAKER_00': [71.59592529711378, <Segment(66.0017, 137.598)>]}
SPEAKER_01 Flatiron Wide Algorithms and Mathematics - Risi Kondor.mp3 8.896434634974412 1603.7775891341257 1612.6740237691001
SPEAKER_00 Flatiron Wide Algorithms and Mathematics - Risi Kondor.mp3 71.59592529711378 66.00169779286927 137.59762308998305


Saving audios for each speaker in each audio file:  85%|████████▍ | 11/13 [02:15<00:23, 11.72s/it]

{'SPEAKER_03': [10.186757215619764, <Segment(2191.67, 2201.86)>], 'SPEAKER_02': [1.7657045840405772, <Segment(2020.47, 2022.23)>], 'SPEAKER_01': [65.87436332767402, <Segment(2687.02, 2752.89)>], 'SPEAKER_00': [13.80305602716453, <Segment(2783.57, 2797.38)>]}
SPEAKER_03 Flatiron Wide Algorithms and Mathematics - Bob Carpenter.mp3 10.186757215619764 2191.6723259762307 2201.8590831918505
SPEAKER_01 Flatiron Wide Algorithms and Mathematics - Bob Carpenter.mp3 65.87436332767402 2687.0203735144314 2752.8947368421054
SPEAKER_00 Flatiron Wide Algorithms and Mathematics - Bob Carpenter.mp3 13.80305602716453 2783.5738539898134 2797.376910016978


Saving audios for each speaker in each audio file:  92%|█████████▏| 12/13 [02:43<00:16, 16.74s/it]

{'SPEAKER_01': [0.5772495755518321, <Segment(635.679, 636.256)>], 'SPEAKER_00': [65.26315789473665, <Segment(1563.95, 1629.21)>]}
SPEAKER_00 Cryo-EM Workshop - Roy Lederman.mp3 65.26315789473665 1563.947368421053 1629.2105263157896


Saving audios for each speaker in each audio file: 100%|██████████| 13/13 [02:49<00:00, 13.02s/it]


In [14]:
import numpy as np 
import scipy
import pandas as pd 
import csv
import os
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
import os
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from tqdm import tqdm
from tensorflow.keras.models import load_model

import glob
import shutil
from pydub import AudioSegment
import librosa


In [15]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")

    # Load the audio file with pydub
    audio = AudioSegment.from_mp3(file_name)
    sample_rate = audio.frame_rate

    # Convert to NumPy array
    audio_data = np.array(audio.get_array_of_samples())

    if audio.channels == 2:  # Check if stereo and convert to mono
        audio_data = audio_data.reshape((-1, 2))
        audio_data = audio_data.mean(axis=1)
        

    audio_data = audio_data.astype(np.float32) / np.max(np.abs(audio_data))  # Normalize


    # Feature extraction
    result = np.array([])
    if chroma or contrast or tonnetz:
        stft = np.abs(librosa.stft(audio_data))
    
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13).T, axis=0)
        result = np.hstack((result, mfccs))

    if chroma:
        chroma_feature = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma_feature))

    if mel:
        mel_feature = np.mean(librosa.feature.melspectrogram(y=audio_data, sr=sample_rate).T, axis=0)
        result = np.hstack((result, mel_feature))

    if contrast:
        contrast_feature = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, contrast_feature))

    if tonnetz:
        tonnetz_feature = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(audio_data), sr=sample_rate).T, axis=0)
        result = np.hstack((result, tonnetz_feature))

    return result

In [16]:
# Gender Prediction

def preprocess_audio(speaker,audio_file):
    # Extract the same features as we did for training
    #audio = AudioSegment.from_mp3("/kaggle/working/" + audio_file[-4] + speaker + ".mp3")
    file_path = "/kaggle/working/" + audio_file[:-4] + speaker + ".mp3"
    features = extract_feature(file_path, mel=True)
    return features

def predict_gender(speaker,audio_file , model):
    # Preprocess the file
    features = preprocess_audio(speaker,audio_file)
    # Reshape features to match the input shape of the model
    features = np.reshape(features, (1, -1))
    # Make a prediction
    prediction = model.predict(features)[0]
    # Interpret the result
    if prediction <= 0.5:
        return "Female",prediction
    else:
        return "Male",prediction

# Load the model
model = load_model("/kaggle/input/genderrec/model.h5")


for file in os.listdir(audio_folder):
    file_name = os.path.basename(file)
    start = "\033[1m"
    end = "\033[0;0m"
    
    print("Processing the conference : " + start + file_name + end)
    unique_speaker = unique_speakers[file_name]
    Longest_segment = longest_segments_conferences[file_name]
    speakers_duration = durations_conferences[file_name]

    # Principal speaker is the one who talked the most
    principal_speaker = max(speakers_duration, key=speakers_duration.get)
    

    #os.rename("/kaggle/working/" + file_name + principal_speaker + ".mp3", "/kaggle/working/" + file_name[-4] + principal_speaker + ".mp3")
   
    gender = predict_gender(principal_speaker, file_name, model)
    print(f"The predicted gender for the principle speaker of this conference is: {gender}")
    print(f"this speaker was interrupted by :{len(unique_speaker)-1}  persons" )
    for speaker in unique_speaker :
        # Predict the gender
        if speaker != principal_speaker:
            #os.rename("/kaggle/working/" + file_name + speaker + ".mp3", "/kaggle/working/" + file_name[-4] + speaker + ".mp3")                
            try:
                    gender = predict_gender(speaker, file_name, model)
                    print(speaker + f" The predicted gender for the interruptor is: {gender}")
            except FileNotFoundError:
                    print("L'interruption était trop courte.")
    


Processing the conference : [1mCryo-EM Workshop - Ayelet Heimowitz .mp3[0;0m
The predicted gender for the principle speaker of this conference is: ('Female', array([0.02563079], dtype=float32))
this speaker was interrupted by :0  persons
Processing the conference : [1mCryo-EM Workshop - Jose Maria Carazo.mp3[0;0m
The predicted gender for the principle speaker of this conference is: ('Male', array([0.875756], dtype=float32))
this speaker was interrupted by :1  persons
L'interruption était trop courte.
Processing the conference : [1mFlatiron Wide Algorithms and Mathematics - Erik Thiede .mp3[0;0m
The predicted gender for the principle speaker of this conference is: ('Female', array([0.10810881], dtype=float32))
this speaker was interrupted by :1  persons
SPEAKER_01 The predicted gender for the interruptor is: ('Male', array([0.92547244], dtype=float32))
Processing the conference : [1mFlatiron Wide Algorithms and Mathematics - Miles Stoudenmire .mp3[0;0m
The predicted gender for t

In [17]:
print(Diarizations['Cryo-EM Workshop - Jose Maria Carazo.mp3'])

[ 00:00:08.191 -->  00:00:57.631] A SPEAKER_00
[ 00:00:58.191 -->  00:02:06.748] B SPEAKER_00
[ 00:02:07.292 -->  00:02:18.853] C SPEAKER_00
[ 00:02:18.938 -->  00:03:11.553] D SPEAKER_00
[ 00:03:13.081 -->  00:04:31.078] E SPEAKER_00
[ 00:04:31.298 -->  00:04:49.567] F SPEAKER_00
[ 00:04:50.059 -->  00:04:55.000] G SPEAKER_00
[ 00:04:55.696 -->  00:05:25.865] H SPEAKER_00
[ 00:05:26.867 -->  00:06:38.395] I SPEAKER_00
[ 00:06:38.752 -->  00:06:56.952] J SPEAKER_00
[ 00:06:57.444 -->  00:06:59.516] K SPEAKER_00
[ 00:07:00.331 -->  00:07:31.485] L SPEAKER_00
[ 00:07:31.723 -->  00:07:32.470] M SPEAKER_00
[ 00:07:32.589 -->  00:07:47.003] N SPEAKER_00
[ 00:07:48.174 -->  00:07:51.808] O SPEAKER_00
[ 00:07:52.351 -->  00:07:55.543] P SPEAKER_00
[ 00:07:56.069 -->  00:08:05.424] Q SPEAKER_00
[ 00:08:05.831 -->  00:08:12.385] R SPEAKER_00
[ 00:08:12.860 -->  00:08:57.954] S SPEAKER_00
[ 00:08:59.482 -->  00:09:25.679] T SPEAKER_00
[ 00:09:25.916 -->  00:10:03.777] U SPEAKER_00
[ 00:10:04.91