## The whole process of diarization, speakers selection and gender prediction applied to a playlist

In [2]:
#%pip install -qq pyannote.audio==2.1.1

2024-03-06 16:51:36.733660: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-06 16:51:36.733685: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-06 16:51:36.734255: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-06 16:51:36.737984: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-06 16:51:37.823051: I external/local_xla/xla/

In [1]:
import os
os.environ["KERAS_BACKEND"] = "torch"

In [None]:
#path to the model.h5
model_path = "model.h5"
#path to the folder containing the audio files name_of_conference.mp3
audio_folder = 'Audios'
#make sure to have the model.h5, and the useful_functions.py file in the same directory as the code

In [3]:
import numpy as np 
import pandas as pd 
import os
from tqdm import tqdm
import torch
from pyannote.audio import Model
import json

import scipy
import csv
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split

from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from tensorflow.keras.models import load_model

import glob
import shutil
from pydub import AudioSegment
import librosa
from useful_functions import resample, extract_features, preprocess_audio, predict_gender


  torchaudio.set_audio_backend("soundfile")


# Diarization

## Utilization of Multiple Dictionaries

In this analysis, several dictionaries are employed to organize and process the audio data effectively:

- **`Diarizations{}`**: 
  - Associates each `file_name` with its corresponding diarization, which is an `Annotation` object.

- **`unique_speakers{}`**: 
  - For each `file_name`, associates a set of unique speakers identified in the audio file.

- **`durations_conferences{}`**: 
  - Maps each `file_name` to a `Duration{}` dictionary. 
  - For each speaker in the audio file, `Duration{}` associates their total spoken duration.
  - **Purpose**: This is particularly useful for determining the principal speaker in each conference.

- **`longest_segments_conferences{}`**: 
  - For each `file_name`, associates a `longest_segments{}` dictionary.
  - For each speaker in the audio file, `longest_segments{}` associates the duration of the longest spoken segment and the corresponding segment.
  - **Purpose**: This is useful for extracting a reasonable subsegment for every speaker to predict their gender.


## Auxiliary functions

In [6]:
from pydub import AudioSegment
import subprocess
        
def convert_mp3_to_wav(mp3_path, wav_path):
    if not os.path.exists(wav_path):
        cmd = ['ffmpeg', '-i', mp3_path, '-acodec', 'pcm_s16le', '-ar', '16000', wav_path]
        # Redirecting stdout and stderr to DEVNULL to clean up the output
        subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        
        
def delete_file(file_path):
    if os.path.exists(file_path):
            os.remove(file_path)
                  

def extract_subsegment(source_path, start_time, end_time, output_path):

    audio = AudioSegment.from_mp3(source_path)

    subsegment = audio[start_time:end_time]

    subsegment.export(output_path, format="mp3")

### Diarization and extraction of different speakers in the audio files

### Saving excerpts for each speaker in each audio file

In [7]:
#%pip install transformers==4.27.4

In [9]:
import whisperx
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token="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") 
     

# send pipeline to GPU (when available)
pipeline.to(torch.device("cuda"))


# Dictionnaire pour stocker les résultats de la diarisation
Diarizations = {}
unique_speakers={}
durations_conferences = {}
longest_segments_conferences = {}


with torch.no_grad():
# Iterate over the MP3 files in the directory with a progress bar
    for file in tqdm(os.listdir(audio_folder), desc="Processing audio files"):
        file_name = os.path.basename(file)
        # Check if the file is an MP3 file
        if file.endswith('.mp3') : 
            mp3_path = os.path.join(audio_folder, file)
            wav_path = mp3_path[:-3] + "wav" 
            convert_mp3_to_wav(mp3_path, wav_path)
            audio_file = wav_path
            audio = whisperx.load_audio(audio_file)
            audio_data = {
                'waveform': torch.from_numpy(audio[None, :]),
                'sample_rate': whisperx.audio.SAMPLE_RATE
            }

            diarization = pipeline(audio_data)
            print("Diarization of", file_name, "done.")
            Diarizations[file_name] = diarization
            #Unique speakers
            unique_speaker = set(label for turn,_, label in diarization.itertracks(yield_label=True))
            unique_speakers[file_name] = unique_speaker
            #Durations
            Duration={}
            #Longest segments
            Longest_segment = {}
            for speaker in unique_speaker:
                longest_segment = None
                longest_duration = 0
                total_duration = 0

                for segment, _, speaker_iter in diarization.itertracks(yield_label=True):
                    if speaker_iter == speaker:
                        duration = segment.duration
                        total_duration += duration
                        if duration > longest_duration:
                            longest_duration = duration
                            longest_segment = segment

                    Duration[speaker] = total_duration
                    Longest_segment[speaker] = [longest_duration, longest_segment]
            durations_conferences[file_name] = Duration
            longest_segments_conferences[file_name] = Longest_segment

            delete_file(wav_path)   

Processing audio files:   0%|          | 0/53 [03:23<?, ?it/s]


In [9]:
SegmentsPerFile ={}
for file in tqdm(os.listdir(audio_folder), desc="Saving audios for each speaker in each audio file"):
    file_name = os.path.basename(file)
    unique_speaker = unique_speakers[file_name]
    Longest_segment = longest_segments_conferences[file_name]
    print(Longest_segment)
    
    SegmentsPerSpeaker = {}

    for speaker in unique_speaker :
        if Longest_segment[speaker][0]>3 :
            print(speaker, file_name, Longest_segment[speaker][0],Longest_segment[speaker][1].start,Longest_segment[speaker][1].end)
            start_time = Longest_segment[speaker][1].start * 1000
            end_time = Longest_segment[speaker][1].end * 1000
            extract_subsegment("/users/eleves-b/2021/ossama.faraji/Desktop/psc hala/Audios/" + file_name, start_time, end_time, file_name[:-4] +speaker+ '.mp3')
            SegmentsPerSpeaker[speaker] = file_name[:-4] + speaker+'.mp3'

    SegmentsPerFile[file_name] = SegmentsPerSpeaker

Saving audios for each speaker in each audio file:   0%|          | 0/52 [00:00<?, ?it/s]

{'SPEAKER_01': [27.724957555178207, <Segment(3465.68, 3493.4)>], 'SPEAKER_02': [25.127334465194963, <Segment(3511.77, 3536.9)>], 'SPEAKER_03': [12.003395585738417, <Segment(3758.01, 3770.01)>], 'SPEAKER_00': [27.504244482172908, <Segment(3936.73, 3964.24)>], 'SPEAKER_04': [11.884550084889725, <Segment(3836.6, 3848.48)>]}
SPEAKER_01 Alfred Nobel and the Nobel Prizes  KITP Colloquium by Lars Brink.mp3 27.724957555178207 3465.679117147708 3493.404074702886
SPEAKER_02 Alfred Nobel and the Nobel Prizes  KITP Colloquium by Lars Brink.mp3 25.127334465194963 3511.7741935483873 3536.9015280135823
SPEAKER_03 Alfred Nobel and the Nobel Prizes  KITP Colloquium by Lars Brink.mp3 12.003395585738417 3758.005093378608 3770.0084889643463
SPEAKER_00 Alfred Nobel and the Nobel Prizes  KITP Colloquium by Lars Brink.mp3 27.504244482172908 3936.7317487266555 3964.2359932088284
SPEAKER_04 Alfred Nobel and the Nobel Prizes  KITP Colloquium by Lars Brink.mp3 11.884550084889725 3836.595925297114 3848.4804753820

Saving audios for each speaker in each audio file:   2%|▏         | 1/52 [00:16<14:16, 16.80s/it]

{'SPEAKER_01': [21.494057724957656, <Segment(4013.69, 4035.19)>], 'SPEAKER_02': [15.67062818336126, <Segment(1180.26, 1195.93)>], 'SPEAKER_05': [42.00339558573842, <Segment(956.817, 998.82)>], 'SPEAKER_06': [1.2733446519523568, <Segment(1732.45, 1733.73)>], 'SPEAKER_03': [14.702886247877814, <Segment(3922.2, 3936.9)>], 'SPEAKER_00': [18.86247877758933, <Segment(2647.72, 2666.58)>], 'SPEAKER_04': [13.039049235994753, <Segment(4164, 4177.04)>]}
SPEAKER_01 Adventures in Galaxy Gas Physics  KITP Colloquium by Mike McCourt.mp3 21.494057724957656 4013.692699490662 4035.1867572156198
SPEAKER_02 Adventures in Galaxy Gas Physics  KITP Colloquium by Mike McCourt.mp3 15.67062818336126 1180.263157894737 1195.9337860780984
SPEAKER_05 Adventures in Galaxy Gas Physics  KITP Colloquium by Mike McCourt.mp3 42.00339558573842 956.8166383701189 998.8200339558573
SPEAKER_03 Adventures in Galaxy Gas Physics  KITP Colloquium by Mike McCourt.mp3 14.702886247877814 3922.1986417657045 3936.9015280135823
SPEAKER

Saving audios for each speaker in each audio file:   4%|▍         | 2/52 [00:37<16:05, 19.32s/it]

{'SPEAKER_01': [2.750424448217018, <Segment(4629.24, 4631.99)>], 'SPEAKER_10': [14.244482173175129, <Segment(2531.99, 2546.24)>], 'SPEAKER_11': [4.6859083191848185, <Segment(3843.73, 3848.41)>], 'SPEAKER_02': [4.108658743633441, <Segment(3799.86, 3803.96)>], 'SPEAKER_09': [7.979626485568588, <Segment(4446.9, 4454.88)>], 'SPEAKER_12': [1.850594227504189, <Segment(1516.75, 1518.6)>], 'SPEAKER_05': [15.891341256367014, <Segment(4677.11, 4693)>], 'SPEAKER_13': [9.083191850593721, <Segment(4317.24, 4326.32)>], 'SPEAKER_06': [5.636672325976178, <Segment(4576.19, 4581.83)>], 'SPEAKER_03': [8.200339558573887, <Segment(3993.9, 4002.1)>], 'SPEAKER_07': [4.668930390491369, <Segment(4196.95, 4201.62)>], 'SPEAKER_08': [33.53140916808161, <Segment(3471.42, 3504.95)>], 'SPEAKER_00': [4.855687606112042, <Segment(3712.96, 3717.82)>], 'SPEAKER_04': [11.222410865874366, <Segment(125.798, 137.02)>]}
SPEAKER_10 A World Powered Predominantly by Solar and Wind Energy  KITP Colloquium by Walter Kohn.mp3 14.24

Saving audios for each speaker in each audio file:   6%|▌         | 3/52 [01:25<26:25, 32.35s/it]

{'SPEAKER_01': [72.73344651952462, <Segment(180.195, 252.929)>], 'SPEAKER_02': [11.918505942274805, <Segment(3714.2, 3726.12)>], 'SPEAKER_05': [7.724957555178207, <Segment(1286.24, 1293.96)>], 'SPEAKER_06': [12.58064516129032, <Segment(35.2886, 47.8693)>], 'SPEAKER_03': [1.7996604414265676, <Segment(1097, 1098.8)>], 'SPEAKER_07': [13.37860780984738, <Segment(1160.33, 1173.71)>], 'SPEAKER_08': [14.75382003395589, <Segment(1503.83, 1518.58)>], 'SPEAKER_00': [8.471986417657263, <Segment(3572.64, 3581.11)>], 'SPEAKER_04': [5.517826825127486, <Segment(3766.07, 3771.59)>]}
SPEAKER_01 A data-centric view on reliable generalization： From ImageNet to LAION-5B [cognZ12dpAM].mp3 72.73344651952462 180.19524617996606 252.92869269949068
SPEAKER_02 A data-centric view on reliable generalization： From ImageNet to LAION-5B [cognZ12dpAM].mp3 11.918505942274805 3714.2020373514433 3726.120543293718
SPEAKER_05 A data-centric view on reliable generalization： From ImageNet to LAION-5B [cognZ12dpAM].mp3 7.724

Saving audios for each speaker in each audio file:   8%|▊         | 4/52 [01:53<24:33, 30.70s/it]

{'SPEAKER_19': [4.261460101867669, <Segment(2258.55, 2262.81)>], 'SPEAKER_17': [6.604414261460079, <Segment(3235.9, 3242.5)>], 'SPEAKER_15': [6.791171477079388, <Segment(3790.65, 3797.44)>], 'SPEAKER_06': [7.04584040747045, <Segment(582.895, 589.941)>], 'SPEAKER_03': [6.417657045840315, <Segment(1254.03, 1260.45)>], 'SPEAKER_14': [19.966044142614464, <Segment(3824.35, 3844.32)>], 'SPEAKER_20': [9.694397283531089, <Segment(1093.76, 1103.46)>], 'SPEAKER_10': [8.030560271646664, <Segment(3632.69, 3640.72)>], 'SPEAKER_02': [3.5653650254669174, <Segment(635.866, 639.431)>], 'SPEAKER_00': [5.568760611205562, <Segment(3680.28, 3685.85)>], 'SPEAKER_04': [8.115449915110275, <Segment(3270.18, 3278.29)>], 'SPEAKER_23': [7.300509337860603, <Segment(1884.73, 1892.03)>], 'SPEAKER_05': [20.101867572156152, <Segment(2908.62, 2928.72)>], 'SPEAKER_09': [9.626485568760472, <Segment(1002.83, 1012.45)>], 'SPEAKER_08': [24.312393887945674, <Segment(0.00848896, 24.3209)>], 'SPEAKER_18': [12.17317487266564, <

Saving audios for each speaker in each audio file:  10%|▉         | 5/52 [03:17<38:56, 49.72s/it]

{'SPEAKER_01': [19.626485568760472, <Segment(3171.99, 3191.62)>], 'SPEAKER_10': [4.872665534804582, <Segment(2418.01, 2422.88)>], 'SPEAKER_11': [8.387096774193651, <Segment(2837, 2845.39)>], 'SPEAKER_02': [8.930390492359948, <Segment(2608.89, 2617.82)>], 'SPEAKER_05': [9.490662139218784, <Segment(2314.07, 2323.56)>], 'SPEAKER_07': [10.71307300509352, <Segment(3112.13, 3122.84)>], 'SPEAKER_03': [15.059422750424346, <Segment(3052.23, 3067.29)>], 'SPEAKER_06': [7.589134125636974, <Segment(2925.65, 2933.23)>], 'SPEAKER_09': [6.43463497453331, <Segment(3293.56, 3299.99)>], 'SPEAKER_08': [1.6298811544988894, <Segment(1308.04, 1309.67)>], 'SPEAKER_00': [13.395585738539898, <Segment(0.00848896, 13.4041)>], 'SPEAKER_04': [9.439728353140708, <Segment(2737.46, 2746.9)>]}
SPEAKER_01 An Observation on Generalization [AKMuA_TVz3A].mp3 19.626485568760472 3171.994906621392 3191.6213921901526
SPEAKER_10 An Observation on Generalization [AKMuA_TVz3A].mp3 4.872665534804582 2418.005093378608 2422.87775891

Saving audios for each speaker in each audio file:  12%|█▏        | 6/52 [03:51<33:59, 44.33s/it]

{'SPEAKER_01': [66.65534804753815, <Segment(3349.79, 3416.44)>], 'SPEAKER_10': [11.765704584040577, <Segment(1734.9, 1746.66)>], 'SPEAKER_11': [6.078098471986323, <Segment(2258.19, 2264.27)>], 'SPEAKER_02': [9.558573853989856, <Segment(3278.65, 3288.21)>], 'SPEAKER_12': [10.203735144312304, <Segment(1062.98, 1073.18)>], 'SPEAKER_05': [11.188455008488745, <Segment(1216.14, 1227.33)>], 'SPEAKER_07': [7.809847198641819, <Segment(2815.48, 2823.29)>], 'SPEAKER_06': [8.624787775891491, <Segment(3426.88, 3435.51)>], 'SPEAKER_03': [15.653650254668928, <Segment(3.9983, 19.652)>], 'SPEAKER_09': [4.380305602716817, <Segment(1561.32, 1565.7)>], 'SPEAKER_08': [19.677419354839003, <Segment(3318.21, 3337.89)>], 'SPEAKER_00': [1.3921901528015042, <Segment(882.216, 883.608)>], 'SPEAKER_04': [2.6825127334464014, <Segment(3533.52, 3536.21)>]}
SPEAKER_01 Human-AI Interaction in the Age of Large Language Models [Yv7drI7cBsQ].mp3 66.65534804753815 3349.7877758913414 3416.4431239388796
SPEAKER_10 Human-AI In

Saving audios for each speaker in each audio file:  13%|█▎        | 7/52 [04:26<31:01, 41.37s/it]

{'SPEAKER_01': [8.726655348047643, <Segment(2278.4, 2287.12)>], 'SPEAKER_10': [9.371816638370092, <Segment(3415.59, 3424.97)>], 'SPEAKER_11': [11.765704584040577, <Segment(1776.19, 1787.95)>], 'SPEAKER_02': [4.142614601018977, <Segment(1310.84, 1314.98)>], 'SPEAKER_12': [44.87266553480458, <Segment(2042.96, 2087.84)>], 'SPEAKER_05': [11.035653650254517, <Segment(3264.3, 3275.34)>], 'SPEAKER_07': [13.073005093378924, <Segment(3361.86, 3374.93)>], 'SPEAKER_13': [23.191850594227617, <Segment(3027.48, 3050.67)>], 'SPEAKER_14': [2.2241086587437167, <Segment(1351.64, 1353.86)>], 'SPEAKER_06': [11.052631578947512, <Segment(2348.57, 2359.62)>], 'SPEAKER_03': [15.653650254668719, <Segment(3631.54, 3647.19)>], 'SPEAKER_09': [5.382003395585798, <Segment(665.781, 671.163)>], 'SPEAKER_08': [13.752122241086454, <Segment(3554.98, 3568.74)>], 'SPEAKER_00': [10.73005093378606, <Segment(3778.28, 3789.01)>], 'SPEAKER_04': [2.3089983022068736, <Segment(2544.98, 2547.29)>]}
SPEAKER_01 How to Use Self-Play 

Saving audios for each speaker in each audio file:  15%|█▌        | 8/52 [05:11<31:12, 42.57s/it]

{'SPEAKER_01': [15.517826825127335, <Segment(0.00848896, 15.5263)>], 'SPEAKER_02': [27.36842105263156, <Segment(340.093, 367.462)>], 'SPEAKER_05': [16.14601018675694, <Segment(2415.66, 2431.81)>], 'SPEAKER_06': [0.4414261460101443, <Segment(2939.21, 2939.65)>], 'SPEAKER_03': [19.083191850594176, <Segment(3352.39, 3371.47)>], 'SPEAKER_07': [4.465195246179974, <Segment(1430.82, 1435.29)>], 'SPEAKER_08': [2.6485568760613205, <Segment(1447.34, 1449.99)>], 'SPEAKER_00': [11.799660441426113, <Segment(3184.17, 3195.97)>], 'SPEAKER_04': [20.814940577249672, <Segment(3423.66, 3444.47)>]}
SPEAKER_01 Formalizing Explanations of Neural Network Behaviors [u0619QrWxQc].mp3 15.517826825127335 0.008488964346349746 15.526315789473685
SPEAKER_02 Formalizing Explanations of Neural Network Behaviors [u0619QrWxQc].mp3 27.36842105263156 340.0933786078099 367.46179966044144
SPEAKER_05 Formalizing Explanations of Neural Network Behaviors [u0619QrWxQc].mp3 16.14601018675694 2415.6621392190154 2431.808149405772

Saving audios for each speaker in each audio file:  17%|█▋        | 9/52 [05:34<25:59, 36.26s/it]

{'SPEAKER_01': [22.6825127334464, <Segment(3262.18, 3284.86)>], 'SPEAKER_10': [6.63837011884516, <Segment(2694.88, 2701.52)>], 'SPEAKER_11': [10.339558573853992, <Segment(2761.1, 2771.43)>], 'SPEAKER_02': [5.891341256367014, <Segment(3516.51, 3522.4)>], 'SPEAKER_05': [26.791171477079615, <Segment(526.545, 553.336)>], 'SPEAKER_12': [11.69779286926996, <Segment(2122.49, 2134.19)>], 'SPEAKER_07': [6.366723259762239, <Segment(3446.29, 3452.66)>], 'SPEAKER_13': [14.567062818336126, <Segment(572.385, 586.952)>], 'SPEAKER_14': [14.821731748726961, <Segment(3574.37, 3589.19)>], 'SPEAKER_03': [4.329371816638286, <Segment(602.81, 607.139)>], 'SPEAKER_06': [11.612903225806349, <Segment(3369.96, 3381.57)>], 'SPEAKER_09': [4.77079796264843, <Segment(2172.39, 2177.16)>], 'SPEAKER_08': [28.081494057724974, <Segment(41.6044, 69.6859)>], 'SPEAKER_00': [50.35653650254699, <Segment(3308.94, 3359.3)>], 'SPEAKER_04': [3.6332767402377613, <Segment(1031.04, 1034.68)>]}
SPEAKER_01 Build an Ecosystem, Not a Mo

Saving audios for each speaker in each audio file:  19%|█▉        | 10/52 [06:24<28:26, 40.63s/it]

{'SPEAKER_01': [5.9762308998301705, <Segment(3555.76, 3561.74)>], 'SPEAKER_02': [3.191850594227617, <Segment(3896.9, 3900.09)>], 'SPEAKER_05': [9.694397283531544, <Segment(2235.29, 2244.98)>], 'SPEAKER_06': [2.818336162988089, <Segment(1872.45, 1875.27)>], 'SPEAKER_03': [24.193548387096776, <Segment(0.00848896, 24.202)>], 'SPEAKER_07': [26.146010186757394, <Segment(3666.31, 3692.45)>], 'SPEAKER_08': [9.01528013582356, <Segment(1403.98, 1413)>], 'SPEAKER_00': [18.981324278438024, <Segment(3783.73, 3802.71)>], 'SPEAKER_04': [28.438030560271727, <Segment(681.587, 710.025)>]}
SPEAKER_01 Beyond Language： Scaling up Robot Ontogeny [kOfesqNz2xU].mp3 5.9762308998301705 3555.7640067911716 3561.7402376910018
SPEAKER_02 Beyond Language： Scaling up Robot Ontogeny [kOfesqNz2xU].mp3 3.191850594227617 3896.9015280135823 3900.09337860781
SPEAKER_05 Beyond Language： Scaling up Robot Ontogeny [kOfesqNz2xU].mp3 9.694397283531544 2235.288624787776 2244.9830220713075
SPEAKER_03 Beyond Language： Scaling up 

Saving audios for each speaker in each audio file:  21%|██        | 11/52 [06:52<25:11, 36.88s/it]

{'SPEAKER_01': [57.48726655348082, <Segment(1227.58, 1285.07)>], 'SPEAKER_10': [13.870967741935601, <Segment(3166.61, 3180.48)>], 'SPEAKER_11': [4.75382003395589, <Segment(2274.69, 2279.45)>], 'SPEAKER_02': [6.213921901528014, <Segment(5.44143, 11.6553)>], 'SPEAKER_12': [3.5993208828522256, <Segment(2445.19, 2448.79)>], 'SPEAKER_05': [13.429541595925457, <Segment(2203.83, 2217.26)>], 'SPEAKER_07': [7.317487266553599, <Segment(2024.22, 2031.54)>], 'SPEAKER_06': [9.083191850594176, <Segment(2455.85, 2464.93)>], 'SPEAKER_03': [9.320882852292016, <Segment(3296.99, 3306.31)>], 'SPEAKER_09': [6.859083191850459, <Segment(1923.68, 1930.53)>], 'SPEAKER_08': [14.99151103565373, <Segment(3394.81, 3409.8)>], 'SPEAKER_00': [9.473684210526244, <Segment(3568.77, 3578.24)>], 'SPEAKER_04': [2.275042444821736, <Segment(241.435, 243.71)>]}
SPEAKER_01 Are LLMs the Beginning or End of NLP？ [KVDKWrsP3es].mp3 57.48726655348082 1227.5806451612902 1285.067911714771
SPEAKER_10 Are LLMs the Beginning or End of N

Saving audios for each speaker in each audio file:  23%|██▎       | 12/52 [07:33<25:18, 37.96s/it]

{'SPEAKER_01': [6.400679117147774, <Segment(2238.01, 2244.41)>], 'SPEAKER_10': [47.623089983022055, <Segment(613.421, 661.044)>], 'SPEAKER_11': [11.256366723259816, <Segment(2806.32, 2817.58)>], 'SPEAKER_02': [1.4261460101867556, <Segment(18.837, 20.2632)>], 'SPEAKER_12': [5.891341256367014, <Segment(1391.42, 1397.31)>], 'SPEAKER_05': [5.24617996604411, <Segment(451.978, 457.224)>], 'SPEAKER_07': [9.72835314091708, <Segment(3647.43, 3657.16)>], 'SPEAKER_13': [9.066213921901635, <Segment(3242.18, 3251.25)>], 'SPEAKER_14': [5.365025466892803, <Segment(1674.1, 1679.47)>], 'SPEAKER_06': [40.59422750424449, <Segment(43.1154, 83.7097)>], 'SPEAKER_03': [3.9219015280132226, <Segment(2341.43, 2345.36)>], 'SPEAKER_09': [4.668930390492278, <Segment(2384.15, 2388.82)>], 'SPEAKER_08': [5.178268251273039, <Segment(2443.86, 2449.04)>], 'SPEAKER_00': [12.12224108658711, <Segment(3535.95, 3548.07)>], 'SPEAKER_04': [5.925297113752125, <Segment(11.2649, 17.1902)>]}
SPEAKER_01 Are Aligned Language Models 

Saving audios for each speaker in each audio file:  25%|██▌       | 13/52 [08:21<26:41, 41.07s/it]

{'SPEAKER_01': [7.962648556876047, <Segment(2051.1, 2059.06)>], 'SPEAKER_10': [5.076400679116887, <Segment(1884.93, 1890.01)>], 'SPEAKER_11': [13.157894736842081, <Segment(2574.32, 2587.48)>], 'SPEAKER_02': [4.99151103565373, <Segment(3701.03, 3706.02)>], 'SPEAKER_05': [5.48387096774195, <Segment(2543.73, 2549.21)>], 'SPEAKER_12': [8.353140916808115, <Segment(3707.14, 3715.49)>], 'SPEAKER_07': [9.354838709677551, <Segment(2195.51, 2204.86)>], 'SPEAKER_06': [19.643463497453013, <Segment(1494.1, 1513.74)>], 'SPEAKER_03': [2.4957555178266375, <Segment(1151.93, 1154.42)>], 'SPEAKER_09': [6.926994906621076, <Segment(2072.66, 2079.58)>], 'SPEAKER_08': [10.356536502546533, <Segment(3119.99, 3130.35)>], 'SPEAKER_00': [11.494057724957557, <Segment(15.4754, 26.9694)>], 'SPEAKER_04': [4.9235993208826585, <Segment(2637.41, 2642.33)>]}
SPEAKER_01 In-Context Learning： A Case Study of Simple Function Classes [DiJsg93zQDc].mp3 7.962648556876047 2051.0950764006793 2059.0577249575554
SPEAKER_10 In-Conte

Saving audios for each speaker in each audio file:  27%|██▋       | 14/52 [09:02<26:01, 41.10s/it]

{'SPEAKER_01': [9.456706281833704, <Segment(1751.55, 1761.01)>], 'SPEAKER_02': [49.066213921901635, <Segment(3309.5, 3358.57)>], 'SPEAKER_05': [10.50933786078076, <Segment(3569.31, 3579.82)>], 'SPEAKER_06': [0.9677419354839003, <Segment(2359.67, 2360.64)>], 'SPEAKER_03': [8.455008488964268, <Segment(792.861, 801.316)>], 'SPEAKER_00': [40.28862478777589, <Segment(215.441, 255.73)>], 'SPEAKER_04': [12.003395585738417, <Segment(2541.37, 2553.37)>]}
SPEAKER_01 Integrating Language into Intelligent Architectures [Uskm9a26V6U].mp3 9.456706281833704 1751.553480475382 1761.0101867572157
SPEAKER_02 Integrating Language into Intelligent Architectures [Uskm9a26V6U].mp3 49.066213921901635 3309.4991511035655 3358.565365025467
SPEAKER_05 Integrating Language into Intelligent Architectures [Uskm9a26V6U].mp3 10.50933786078076 3569.3123938879457 3579.8217317487265
SPEAKER_03 Integrating Language into Intelligent Architectures [Uskm9a26V6U].mp3 8.455008488964268 792.8607809847199 801.3157894736842
SPEAK

Saving audios for each speaker in each audio file:  29%|██▉       | 15/52 [09:22<21:22, 34.66s/it]

{'SPEAKER_01': [21.154499151103664, <Segment(2507.68, 2528.84)>], 'SPEAKER_10': [7.045840407470223, <Segment(2749.47, 2756.51)>], 'SPEAKER_11': [10.441426146010144, <Segment(1662.06, 1672.5)>], 'SPEAKER_02': [4.63497453310697, <Segment(657.462, 662.097)>], 'SPEAKER_05': [1.850594227504189, <Segment(3061.77, 3063.62)>], 'SPEAKER_12': [15.432937181663874, <Segment(2394, 2409.43)>], 'SPEAKER_07': [5.636672325976178, <Segment(2982.78, 2988.41)>], 'SPEAKER_13': [22.937181663837237, <Segment(2271.86, 2294.8)>], 'SPEAKER_03': [6.842105263157919, <Segment(1576.1, 1582.95)>], 'SPEAKER_06': [12.546689303904714, <Segment(1093.66, 1106.21)>], 'SPEAKER_09': [18.760611205432838, <Segment(438.718, 457.479)>], 'SPEAKER_08': [6.9779286926996065, <Segment(2624.24, 2631.21)>], 'SPEAKER_00': [26.655348047538155, <Segment(2813.2, 2839.86)>], 'SPEAKER_04': [10.865874363327748, <Segment(1289.3, 1300.16)>]}
SPEAKER_01 Interpretability via Symbolic Distillation [XHBJJ2N-kUc].mp3 21.154499151103664 2507.6825127

Saving audios for each speaker in each audio file:  31%|███       | 16/52 [09:58<21:02, 35.06s/it]

{'SPEAKER_01': [81.47707979626486, <Segment(159.958, 241.435)>], 'SPEAKER_10': [4.414261460101869, <Segment(4.59253, 9.00679)>], 'SPEAKER_11': [8.234295415959423, <Segment(1090.79, 1099.02)>], 'SPEAKER_02': [16.774193548387302, <Segment(1355.54, 1372.32)>], 'SPEAKER_12': [8.930390492359493, <Segment(1272.93, 1281.86)>], 'SPEAKER_05': [6.893039049235995, <Segment(3474.17, 3481.06)>], 'SPEAKER_07': [20.050933786078076, <Segment(1067.65, 1087.7)>], 'SPEAKER_13': [9.337860780984556, <Segment(1795.85, 1805.19)>], 'SPEAKER_06': [14.923599320883113, <Segment(3553.25, 3568.17)>], 'SPEAKER_03': [10.916808149405824, <Segment(3381.59, 3392.5)>], 'SPEAKER_09': [12.156196943972837, <Segment(19.0238, 31.18)>], 'SPEAKER_08': [5.942275042444635, <Segment(1892.2, 1898.14)>], 'SPEAKER_00': [6.061120543293782, <Segment(836.8, 842.861)>], 'SPEAKER_04': [5.840407470288483, <Segment(1505.37, 1511.21)>]}
SPEAKER_01 Language Models as Statisticians, and as Adapted Organisms [_oHvhJhjkx0].mp3 81.47707979626486

Saving audios for each speaker in each audio file:  33%|███▎      | 17/52 [10:44<22:22, 38.36s/it]

{'SPEAKER_01': [9.06621392190118, <Segment(3339.58, 3348.65)>], 'SPEAKER_10': [8.166383701188352, <Segment(2043.06, 2051.23)>], 'SPEAKER_11': [7.521222410865903, <Segment(1980.55, 1988.07)>], 'SPEAKER_02': [16.82512733446515, <Segment(657.937, 674.762)>], 'SPEAKER_09': [5.7724957555174115, <Segment(1914.63, 1920.4)>], 'SPEAKER_05': [0.7470288624786008, <Segment(1694.02, 1694.76)>], 'SPEAKER_12': [9.04923599320864, <Segment(2010.47, 2019.52)>], 'SPEAKER_13': [10.9677419354839, <Segment(1683.05, 1694.02)>], 'SPEAKER_03': [8.896434634974867, <Segment(2650.11, 2659.01)>], 'SPEAKER_06': [19.98302207130746, <Segment(3417.56, 3437.55)>], 'SPEAKER_07': [96.65534804753815, <Segment(850.823, 947.479)>], 'SPEAKER_08': [11.850594227504189, <Segment(3205.56, 3217.41)>], 'SPEAKER_00': [4.380305602716817, <Segment(2178.62, 2183)>], 'SPEAKER_04': [9.134125636672252, <Segment(1850.31, 1859.45)>]}
SPEAKER_01 Meaning in the age of large language models [lA19zXgObKA].mp3 9.06621392190118 3339.584040747029

Saving audios for each speaker in each audio file:  35%|███▍      | 18/52 [11:26<22:20, 39.42s/it]

{'SPEAKER_01': [23.8709677419356, <Segment(4070.79, 4094.66)>], 'SPEAKER_10': [47.87775891341255, <Segment(384.559, 432.436)>], 'SPEAKER_11': [3.08998302207101, <Segment(2794.15, 2797.24)>], 'SPEAKER_02': [7.334465195246139, <Segment(1977.67, 1985)>], 'SPEAKER_15': [8.641765704584031, <Segment(3993.54, 4002.18)>], 'SPEAKER_09': [36.46859083191839, <Segment(2886.19, 2922.66)>], 'SPEAKER_05': [7.283531409168063, <Segment(574.202, 581.486)>], 'SPEAKER_12': [5.059422750424346, <Segment(4148.14, 4153.2)>], 'SPEAKER_07': [8.455008488964268, <Segment(2715.48, 2723.93)>], 'SPEAKER_13': [23.93887945670628, <Segment(33.9983, 57.9372)>], 'SPEAKER_03': [4.006791171477289, <Segment(1640.2, 1644.2)>], 'SPEAKER_06': [9.422750424448168, <Segment(2841.81, 2851.23)>], 'SPEAKER_14': [20.916808149405824, <Segment(3560.87, 3581.79)>], 'SPEAKER_08': [1.8675721561969567, <Segment(76.2224, 78.09)>], 'SPEAKER_00': [5.059422750424346, <Segment(1525.92, 1530.98)>], 'SPEAKER_04': [17.521222410865903, <Segment(383

Saving audios for each speaker in each audio file:  37%|███▋      | 19/52 [12:23<24:40, 44.85s/it]

{'SPEAKER_01': [8.879456706281871, <Segment(797.75, 806.63)>], 'SPEAKER_10': [7.045840407470223, <Segment(2087.92, 2094.97)>], 'SPEAKER_11': [8.166383701188806, <Segment(2534.97, 2543.13)>], 'SPEAKER_02': [6.808149405772383, <Segment(1775.46, 1782.27)>], 'SPEAKER_05': [8.862478777588876, <Segment(1666.09, 1674.95)>], 'SPEAKER_12': [3.5483870967741495, <Segment(1629.58, 1633.13)>], 'SPEAKER_07': [24.651952461799283, <Segment(1346.87, 1371.52)>], 'SPEAKER_13': [15.348047538200717, <Segment(2826.04, 2841.38)>], 'SPEAKER_06': [34.51612903225805, <Segment(358.294, 392.81)>], 'SPEAKER_03': [5.636672325976232, <Segment(5.11885, 10.7555)>], 'SPEAKER_09': [7.657045840407136, <Segment(1601.28, 1608.94)>], 'SPEAKER_08': [7.062818336163218, <Segment(1233.01, 1240.08)>], 'SPEAKER_00': [5.891341256367014, <Segment(1976.61, 1982.5)>], 'SPEAKER_04': [1.290322580645352, <Segment(3660.59, 3661.88)>]}
SPEAKER_01 On Localization in Language Models [J-CTR0xHr98].mp3 8.879456706281871 797.7504244482172 806.

Saving audios for each speaker in each audio file:  38%|███▊      | 20/52 [13:07<23:45, 44.53s/it]

{'SPEAKER_01': [41.69779286926996, <Segment(123.387, 165.085)>], 'SPEAKER_02': [26.672325976230695, <Segment(2138.58, 2165.25)>], 'SPEAKER_05': [97.31748726655348, <Segment(387.801, 485.119)>], 'SPEAKER_06': [23.80305602716453, <Segment(1931.93, 1955.73)>], 'SPEAKER_03': [64.6179966044142, <Segment(2642.86, 2707.48)>], 'SPEAKER_07': [30.169779286926996, <Segment(617.003, 647.173)>], 'SPEAKER_00': [15.280135823429646, <Segment(2370.99, 2386.27)>], 'SPEAKER_04': [49.779286926995155, <Segment(1003.79, 1053.57)>]}
SPEAKER_01 Panel Discussion [OEeNjlBYSrk].mp3 41.69779286926996 123.38709677419354 165.0848896434635
SPEAKER_02 Panel Discussion [OEeNjlBYSrk].mp3 26.672325976230695 2138.5823429541597 2165.2546689303904
SPEAKER_05 Panel Discussion [OEeNjlBYSrk].mp3 97.31748726655348 387.80135823429544 485.1188455008489
SPEAKER_06 Panel Discussion [OEeNjlBYSrk].mp3 23.80305602716453 1931.9269949066215 1955.730050933786
SPEAKER_03 Panel Discussion [OEeNjlBYSrk].mp3 64.6179966044142 2642.8607809847

Saving audios for each speaker in each audio file:  40%|████      | 21/52 [13:31<19:46, 38.27s/it]

{'SPEAKER_01': [10.339558573853992, <Segment(1269.8, 1280.14)>], 'SPEAKER_10': [23.59932088285268, <Segment(2209.97, 2233.57)>], 'SPEAKER_11': [10.679117147707984, <Segment(2450.03, 2460.7)>], 'SPEAKER_02': [8.879456706281871, <Segment(2603.59, 2612.47)>], 'SPEAKER_05': [11.426146010186812, <Segment(809.856, 821.282)>], 'SPEAKER_07': [4.923599320883113, <Segment(2844, 2848.92)>], 'SPEAKER_06': [30.203735144312418, <Segment(378.939, 409.143)>], 'SPEAKER_03': [21.680814940577193, <Segment(437.241, 458.922)>], 'SPEAKER_09': [3.616298811545221, <Segment(2503.85, 2507.46)>], 'SPEAKER_08': [3.2597623089982335, <Segment(1814.03, 1817.29)>], 'SPEAKER_00': [29.354838709677324, <Segment(541.214, 570.569)>], 'SPEAKER_04': [54.65195246179965, <Segment(140.518, 195.17)>]}
SPEAKER_01 Panel Discussion [unotid_qTbw].mp3 10.339558573853992 1269.804753820034 1280.144312393888
SPEAKER_10 Panel Discussion [unotid_qTbw].mp3 23.59932088285268 2209.9745331069607 2233.5738539898134
SPEAKER_11 Panel Discussion

Saving audios for each speaker in each audio file:  42%|████▏     | 22/52 [14:04<18:19, 36.64s/it]

{'SPEAKER_01': [29.864176570458397, <Segment(78.1919, 108.056)>], 'SPEAKER_02': [13.259762308998233, <Segment(1538.74, 1551.99)>], 'SPEAKER_05': [6.519524617996467, <Segment(3196.75, 3203.27)>], 'SPEAKER_03': [11.511035653650254, <Segment(26.6638, 38.1749)>], 'SPEAKER_06': [9.76230899830216, <Segment(2526.95, 2536.71)>], 'SPEAKER_07': [15.076400679117341, <Segment(3316.51, 3331.59)>], 'SPEAKER_08': [31.22241086587428, <Segment(2894.41, 2925.63)>], 'SPEAKER_00': [20.169779286927223, <Segment(2536.77, 2556.94)>], 'SPEAKER_04': [11.324278438030433, <Segment(3477.87, 3489.19)>]}
SPEAKER_01 Short Talks [9iSxF_RW6xk].mp3 29.864176570458397 78.1918505942275 108.0560271646859
SPEAKER_02 Short Talks [9iSxF_RW6xk].mp3 13.259762308998233 1538.735144312394 1551.9949066213921
SPEAKER_05 Short Talks [9iSxF_RW6xk].mp3 6.519524617996467 3196.748726655348 3203.2682512733445
SPEAKER_03 Short Talks [9iSxF_RW6xk].mp3 11.511035653650254 26.663837011884553 38.17487266553481
SPEAKER_06 Short Talks [9iSxF_RW6

Saving audios for each speaker in each audio file:  44%|████▍     | 23/52 [14:33<16:37, 34.40s/it]

{'SPEAKER_01': [2.9202037351442414, <Segment(1622.03, 1624.95)>], 'SPEAKER_10': [39.983022071307346, <Segment(264.066, 304.049)>], 'SPEAKER_11': [9.0322580645161, <Segment(1591.59, 1600.62)>], 'SPEAKER_02': [13.08998302207101, <Segment(2662.89, 2675.98)>], 'SPEAKER_05': [5.789473684210407, <Segment(3937.21, 3943)>], 'SPEAKER_12': [4.736842105263122, <Segment(756.205, 760.942)>], 'SPEAKER_07': [4.51612903225805, <Segment(3544.17, 3548.68)>], 'SPEAKER_13': [3.480475382003533, <Segment(3879.36, 3882.84)>], 'SPEAKER_14': [4.906621392190118, <Segment(2171.08, 2175.98)>], 'SPEAKER_03': [6.19694397283547, <Segment(2521.18, 2527.38)>], 'SPEAKER_06': [27.979626485569042, <Segment(3404.52, 3432.5)>], 'SPEAKER_09': [6.315789473684163, <Segment(2573.29, 2579.6)>], 'SPEAKER_08': [14.906621392190573, <Segment(3795.36, 3810.26)>], 'SPEAKER_00': [4.363327674023822, <Segment(3722.37, 3726.73)>], 'SPEAKER_04': [12.342954159592523, <Segment(64.3718, 76.7148)>]}
SPEAKER_10 Possible Impossibilities and Imp

Saving audios for each speaker in each audio file:  46%|████▌     | 24/52 [15:23<18:19, 39.28s/it]

{'SPEAKER_01': [4.889643463497578, <Segment(3938.84, 3943.73)>], 'SPEAKER_10': [4.75382003395589, <Segment(750.586, 755.34)>], 'SPEAKER_11': [4.261460101867669, <Segment(3605.92, 3610.18)>], 'SPEAKER_02': [3.056027164685929, <Segment(1826.99, 1830.04)>], 'SPEAKER_05': [5.585738539898557, <Segment(3817.44, 3823.03)>], 'SPEAKER_07': [3.1748726655346218, <Segment(1887.11, 1890.28)>], 'SPEAKER_06': [22.88624787775916, <Segment(3840.74, 3863.62)>], 'SPEAKER_03': [4.0747028862479056, <Segment(1091.89, 1095.97)>], 'SPEAKER_09': [5.314091680814727, <Segment(1219.87, 1225.19)>], 'SPEAKER_08': [10.780984719864136, <Segment(1875.22, 1886)>], 'SPEAKER_00': [28.556876061120875, <Segment(3872.88, 3901.43)>], 'SPEAKER_04': [9.728353140916624, <Segment(3709.02, 3718.75)>]}
SPEAKER_01 Pretraining Task Diversity and the Emergence of Non-Bayesian In-Context Learning for Regression [Gag7H4M-GdQ].mp3 4.889643463497578 3938.83701188455 3943.7266553480476
SPEAKER_10 Pretraining Task Diversity and the Emergen

Saving audios for each speaker in each audio file:  48%|████▊     | 25/52 [16:06<18:05, 40.21s/it]

{'SPEAKER_19': [4.5500848896435855, <Segment(2316.27, 2320.82)>], 'SPEAKER_17': [4.584040747029121, <Segment(3743.06, 3747.65)>], 'SPEAKER_15': [5.636672325976178, <Segment(981.74, 987.377)>], 'SPEAKER_06': [19.134125636672252, <Segment(3326.87, 3346)>], 'SPEAKER_03': [1.612903225806349, <Segment(3484.81, 3486.43)>], 'SPEAKER_14': [6.36672325976231, <Segment(47.2071, 53.5739)>], 'SPEAKER_20': [9.881154499151307, <Segment(1728.5, 1738.38)>], 'SPEAKER_10': [17.130730050933806, <Segment(115.662, 132.793)>], 'SPEAKER_02': [27.80984719864179, <Segment(148.905, 176.715)>], 'SPEAKER_00': [11.68081494057742, <Segment(3128.21, 3139.89)>], 'SPEAKER_04': [11.918505942274805, <Segment(2415.71, 2427.63)>], 'SPEAKER_05': [5.755517826825326, <Segment(2713.17, 2718.92)>], 'SPEAKER_09': [4.53310696095059, <Segment(1972.93, 1977.46)>], 'SPEAKER_08': [10.831918505942213, <Segment(2025.97, 2036.8)>], 'SPEAKER_18': [5.042444821731806, <Segment(2441.98, 2447.02)>], 'SPEAKER_01': [10.441426146010144, <Segmen

Saving audios for each speaker in each audio file:  50%|█████     | 26/52 [17:15<21:10, 48.87s/it]

{'SPEAKER_01': [37.58913412563663, <Segment(430.518, 468.107)>], 'SPEAKER_10': [55.297113752122186, <Segment(1525.61, 1580.91)>], 'SPEAKER_11': [6.977928692698697, <Segment(4410.45, 4417.43)>], 'SPEAKER_02': [23.480475382003533, <Segment(3890.06, 3913.54)>], 'SPEAKER_09': [14.872665534803673, <Segment(4359.41, 4374.29)>], 'SPEAKER_05': [3.123938879456773, <Segment(835.289, 838.413)>], 'SPEAKER_03': [8.285229202037044, <Segment(3256, 3264.29)>], 'SPEAKER_06': [12.920203735144696, <Segment(3132.25, 3145.17)>], 'SPEAKER_07': [22.835314091681084, <Segment(2317.05, 2339.89)>], 'SPEAKER_08': [13.174872665535077, <Segment(5198.24, 5211.42)>], 'SPEAKER_00': [31.96943972835288, <Segment(4886.31, 4918.28)>], 'SPEAKER_04': [7.945670628183052, <Segment(3316.14, 3324.08)>]}
SPEAKER_01 Short Talks [c_UqS54ZA5o].mp3 37.58913412563663 430.5178268251274 468.106960950764
SPEAKER_10 Short Talks [c_UqS54ZA5o].mp3 55.297113752122186 1525.6112054329374 1580.9083191850596
SPEAKER_11 Short Talks [c_UqS54ZA5o]

Saving audios for each speaker in each audio file:  52%|█████▏    | 27/52 [18:14<21:35, 51.82s/it]

{'SPEAKER_19': [7.724957555178207, <Segment(1152.4, 1160.13)>], 'SPEAKER_17': [9.677419354839003, <Segment(4159.28, 4168.96)>], 'SPEAKER_15': [23.378607809847182, <Segment(86.7317, 110.11)>], 'SPEAKER_06': [14.125636672325982, <Segment(4076.66, 4090.79)>], 'SPEAKER_03': [7.1137521222408395, <Segment(4211.32, 4218.43)>], 'SPEAKER_14': [1.1205432937181286, <Segment(612.402, 613.523)>], 'SPEAKER_20': [6.196943972835015, <Segment(3528.77, 3534.97)>], 'SPEAKER_10': [14.855687606112042, <Segment(3702.89, 3717.75)>], 'SPEAKER_02': [14.261460101867215, <Segment(4017.6, 4031.86)>], 'SPEAKER_00': [12.275042444821793, <Segment(3283.54, 3295.81)>], 'SPEAKER_04': [6.19694397283547, <Segment(2810.53, 2816.73)>], 'SPEAKER_05': [7.5042444821733625, <Segment(1245.34, 1252.84)>], 'SPEAKER_09': [23.08998302207135, <Segment(365.713, 388.803)>], 'SPEAKER_08': [3.022071307301303, <Segment(4256.36, 4259.38)>], 'SPEAKER_18': [39.66044142614601, <Segment(3974.25, 4013.91)>], 'SPEAKER_01': [8.200339558574342, <

Saving audios for each speaker in each audio file:  54%|█████▍    | 28/52 [19:34<24:11, 60.49s/it]

{'SPEAKER_01': [2.2750424448217927, <Segment(3652.32, 3654.59)>], 'SPEAKER_10': [6.4516129032258505, <Segment(1074.22, 1080.67)>], 'SPEAKER_11': [7.945670628183052, <Segment(3713.76, 3721.71)>], 'SPEAKER_02': [16.89303904923599, <Segment(2.62309, 19.5161)>], 'SPEAKER_09': [11.578947368421268, <Segment(3600.3, 3611.88)>], 'SPEAKER_05': [7.5891341256365195, <Segment(3495.68, 3503.27)>], 'SPEAKER_03': [2.61460101867533, <Segment(2712.79, 2715.41)>], 'SPEAKER_06': [12.139219015280105, <Segment(1770.14, 1782.28)>], 'SPEAKER_07': [5.246179966044338, <Segment(783.166, 788.413)>], 'SPEAKER_08': [8.692699490662108, <Segment(512.419, 521.112)>], 'SPEAKER_00': [7.945670628183279, <Segment(841.52, 849.465)>], 'SPEAKER_04': [115.39898132427834, <Segment(1887.33, 2002.72)>]}
SPEAKER_10 Towards Reliable Use of Large Language Models： Better Detection, Consistency, and Instruction-Tuning [vuWbJlBePPA].mp3 6.4516129032258505 1074.2190152801359 1080.6706281833617
SPEAKER_11 Towards Reliable Use of Large 

Saving audios for each speaker in each audio file:  56%|█████▌    | 29/52 [20:09<20:16, 52.89s/it]

{'SPEAKER_01': [9.219015280135864, <Segment(4463.9, 4473.12)>], 'SPEAKER_10': [10.152801358234228, <Segment(2388.92, 2399.07)>], 'SPEAKER_02': [8.641765704584031, <Segment(4288.6, 4297.24)>], 'SPEAKER_05': [16.536502546689007, <Segment(4126.6, 4143.13)>], 'SPEAKER_07': [9.779286926994928, <Segment(318.226, 328.005)>], 'SPEAKER_06': [4.634974533106856, <Segment(302.419, 307.054)>], 'SPEAKER_03': [10.560271646858382, <Segment(4364.35, 4374.92)>], 'SPEAKER_09': [11.018675721561976, <Segment(1257.87, 1268.89)>], 'SPEAKER_08': [5.059422750424346, <Segment(4010.76, 4015.81)>], 'SPEAKER_00': [162.93718166383678, <Segment(2828.92, 2991.86)>], 'SPEAKER_04': [8.522920203735339, <Segment(3866.65, 3875.17)>]}
SPEAKER_01 Watermarking of Large Language Models [2Kx9jbSMZqA].mp3 9.219015280135864 4463.896434634975 4473.115449915111
SPEAKER_10 Watermarking of Large Language Models [2Kx9jbSMZqA].mp3 10.152801358234228 2388.9219015280137 2399.074702886248
SPEAKER_02 Watermarking of Large Language Models 

Saving audios for each speaker in each audio file:  58%|█████▊    | 30/52 [20:56<18:40, 50.91s/it]

{'SPEAKER_01': [10.390492359932068, <Segment(1255.8, 1266.19)>], 'SPEAKER_10': [4.27843803056021, <Segment(3917.22, 3921.5)>], 'SPEAKER_11': [4.7877758913414254, <Segment(1351.59, 1356.38)>], 'SPEAKER_02': [2.003395585738417, <Segment(2509.28, 2511.28)>], 'SPEAKER_09': [6.825127334465378, <Segment(2951.03, 2957.85)>], 'SPEAKER_05': [13.497453310696073, <Segment(859.924, 873.421)>], 'SPEAKER_06': [9.168081494057787, <Segment(1304.2, 1313.37)>], 'SPEAKER_03': [4.617996604414202, <Segment(1444.8, 1449.41)>], 'SPEAKER_07': [7.996604414261583, <Segment(2196.04, 2204.03)>], 'SPEAKER_08': [22.69949066213922, <Segment(0.00848896, 22.708)>], 'SPEAKER_00': [21.663837011884425, <Segment(1391.74, 1413.4)>], 'SPEAKER_04': [12.427843803056021, <Segment(1122.54, 1134.97)>]}
SPEAKER_01 Understanding the Origins and Taxonomy of Neural Scaling Laws [MUvFuZpxLU8].mp3 10.390492359932068 1255.7979626485571 1266.1884550084892
SPEAKER_10 Understanding the Origins and Taxonomy of Neural Scaling Laws [MUvFuZpx

Saving audios for each speaker in each audio file:  60%|█████▉    | 31/52 [21:35<16:35, 47.42s/it]

{'SPEAKER_01': [9.524617996604775, <Segment(2830.55, 2840.08)>], 'SPEAKER_02': [1.1714770797962046, <Segment(2908.14, 2909.31)>], 'SPEAKER_05': [13.174872665534622, <Segment(3039.63, 3052.81)>], 'SPEAKER_06': [6.146010186757394, <Segment(2923.49, 2929.63)>], 'SPEAKER_03': [7.623089983022055, <Segment(3009.47, 3017.09)>], 'SPEAKER_07': [14.108658743633441, <Segment(3189.02, 3203.13)>], 'SPEAKER_00': [10.747028862478714, <Segment(452.81, 463.557)>], 'SPEAKER_04': [6.349745331069698, <Segment(2745, 2751.35)>]}
SPEAKER_01 A Background Independent Algebra in Quantum Gravity - Edward Witten [wYXFWXVUjCU].mp3 9.524617996604775 2830.5517826825126 2840.0764006791173
SPEAKER_05 A Background Independent Algebra in Quantum Gravity - Edward Witten [wYXFWXVUjCU].mp3 13.174872665534622 3039.634974533107 3052.809847198642
SPEAKER_06 A Background Independent Algebra in Quantum Gravity - Edward Witten [wYXFWXVUjCU].mp3 6.146010186757394 2923.48896434635 2929.634974533107
SPEAKER_03 A Background Independ

Saving audios for each speaker in each audio file:  62%|██████▏   | 32/52 [21:56<13:10, 39.54s/it]

{'SPEAKER_01': [9.813242784380236, <Segment(2391.69, 2401.5)>], 'SPEAKER_00': [13.022071307300394, <Segment(707.496, 720.518)>], 'SPEAKER_03': [12.393887945670485, <Segment(2491.71, 2504.1)>], 'SPEAKER_02': [2.308998302207133, <Segment(15.3735, 17.6825)>]}
SPEAKER_01 AdS3⧸RMT2 Duality - Eric Perlmutter [FycJicC0-tU].mp3 9.813242784380236 2391.6893039049237 2401.502546689304
SPEAKER_00 AdS3⧸RMT2 Duality - Eric Perlmutter [FycJicC0-tU].mp3 13.022071307300394 707.4957555178269 720.5178268251273
SPEAKER_03 AdS3⧸RMT2 Duality - Eric Perlmutter [FycJicC0-tU].mp3 12.393887945670485 2491.7062818336162 2504.1001697792867


Saving audios for each speaker in each audio file:  63%|██████▎   | 33/52 [22:03<09:24, 29.73s/it]

{'SPEAKER_01': [24.872665534805037, <Segment(2443.79, 2468.67)>], 'SPEAKER_02': [11.528013582343192, <Segment(2509.72, 2521.25)>], 'SPEAKER_05': [10.407470288624609, <Segment(2216.99, 2227.39)>], 'SPEAKER_03': [3.769100169779289, <Segment(15.017, 18.7861)>], 'SPEAKER_06': [12.054329371816948, <Segment(2554.63, 2566.68)>], 'SPEAKER_07': [5.212224108659029, <Segment(2266.51, 2271.72)>], 'SPEAKER_00': [70.57724957555183, <Segment(381.944, 452.521)>], 'SPEAKER_04': [8.998302207131019, <Segment(2022.91, 2031.91)>]}
SPEAKER_01 Black Hole Interiors - Stefan Hollands [VwOawM1hEhU].mp3 24.872665534805037 2443.7945670628183 2468.6672325976233
SPEAKER_02 Black Hole Interiors - Stefan Hollands [VwOawM1hEhU].mp3 11.528013582343192 2509.7198641765704 2521.2478777589135
SPEAKER_05 Black Hole Interiors - Stefan Hollands [VwOawM1hEhU].mp3 10.407470288624609 2216.986417657046 2227.3938879456705
SPEAKER_03 Black Hole Interiors - Stefan Hollands [VwOawM1hEhU].mp3 3.769100169779289 15.0169779286927 18.7860

Saving audios for each speaker in each audio file:  65%|██████▌   | 34/52 [22:21<07:52, 26.24s/it]

{'SPEAKER_01': [6.485568760611386, <Segment(2119.86, 2126.34)>], 'SPEAKER_02': [4.889643463497123, <Segment(2595.22, 2600.11)>], 'SPEAKER_05': [7.419354838709751, <Segment(1993.39, 2000.81)>], 'SPEAKER_06': [10.237691001697385, <Segment(2450.08, 2460.31)>], 'SPEAKER_03': [5.348047538200717, <Segment(2065.12, 2070.47)>], 'SPEAKER_07': [1.0186757215619764, <Segment(2653.66, 2654.68)>], 'SPEAKER_00': [48.69269949066211, <Segment(1782.39, 1831.08)>], 'SPEAKER_04': [10.067911714771071, <Segment(2379.58, 2389.65)>]}
SPEAKER_01 Closed Cosmologies in Two Dimensional Gravity - Ying Zhao [-rZ0vg_6RRA].mp3 6.485568760611386 2119.855687606112 2126.3412563667234
SPEAKER_02 Closed Cosmologies in Two Dimensional Gravity - Ying Zhao [-rZ0vg_6RRA].mp3 4.889643463497123 2595.2207130730053 2600.1103565365024
SPEAKER_05 Closed Cosmologies in Two Dimensional Gravity - Ying Zhao [-rZ0vg_6RRA].mp3 7.419354838709751 1993.3870967741937 2000.8064516129034
SPEAKER_06 Closed Cosmologies in Two Dimensional Gravity

Saving audios for each speaker in each audio file:  67%|██████▋   | 35/52 [22:37<06:33, 23.12s/it]

{'SPEAKER_01': [11.052631578947512, <Segment(2530.79, 2541.84)>], 'SPEAKER_02': [14.04074702886237, <Segment(2074.59, 2088.63)>], 'SPEAKER_05': [5.789473684210407, <Segment(2374.9, 2380.69)>], 'SPEAKER_03': [3.582342954159685, <Segment(2322.93, 2326.51)>], 'SPEAKER_06': [10.424448217317604, <Segment(2346.31, 2356.73)>], 'SPEAKER_07': [8.947368421052488, <Segment(2011.28, 2020.23)>], 'SPEAKER_08': [7.843803056027355, <Segment(2188.62, 2196.46)>], 'SPEAKER_00': [47.164685908318916, <Segment(2209.94, 2257.11)>], 'SPEAKER_04': [2.7504244482173164, <Segment(17.9032, 20.6537)>]}
SPEAKER_01 Complexity of Learning and Creating Quantum Systems - Hsin-Yuan (Robert) Huang [fRmPVH9udCk].mp3 11.052631578947512 2530.7894736842104 2541.842105263158
SPEAKER_02 Complexity of Learning and Creating Quantum Systems - Hsin-Yuan (Robert) Huang [fRmPVH9udCk].mp3 14.04074702886237 2074.5925297113754 2088.6332767402378
SPEAKER_05 Complexity of Learning and Creating Quantum Systems - Hsin-Yuan (Robert) Huang [f

Saving audios for each speaker in each audio file:  69%|██████▉   | 36/52 [22:55<05:47, 21.73s/it]

{'SPEAKER_01': [3.378607809847381, <Segment(2207.14, 2210.52)>], 'SPEAKER_02': [10.577249575551377, <Segment(2131.83, 2142.4)>], 'SPEAKER_05': [11.748726655348037, <Segment(733.183, 744.932)>], 'SPEAKER_03': [6.010186757215706, <Segment(921.723, 927.733)>], 'SPEAKER_06': [10.747028862479056, <Segment(2478.12, 2488.87)>], 'SPEAKER_07': [6.67232597623115, <Segment(1515.78, 1522.45)>], 'SPEAKER_00': [28.40407470288619, <Segment(1884.49, 1912.89)>], 'SPEAKER_04': [1.035653650254666, <Segment(15.9677, 17.0034)>]}
SPEAKER_01 Cosmology from Random Entanglement - Stefano Antonini [U1qHGa65lF8].mp3 3.378607809847381 2207.13921901528 2210.5178268251275
SPEAKER_02 Cosmology from Random Entanglement - Stefano Antonini [U1qHGa65lF8].mp3 10.577249575551377 2131.8251273344654 2142.4023769100168
SPEAKER_05 Cosmology from Random Entanglement - Stefano Antonini [U1qHGa65lF8].mp3 11.748726655348037 733.1833616298811 744.9320882852292
SPEAKER_03 Cosmology from Random Entanglement - Stefano Antonini [U1qHG

Saving audios for each speaker in each audio file:  71%|███████   | 37/52 [23:12<05:00, 20.03s/it]

{'SPEAKER_01': [5.263157894736651, <Segment(2725.61, 2730.87)>], 'SPEAKER_02': [12.665534804753861, <Segment(2529.33, 2541.99)>], 'SPEAKER_05': [2.5127334465196327, <Segment(2033.4, 2035.92)>], 'SPEAKER_06': [0.7809847198641791, <Segment(16.6808, 17.4618)>], 'SPEAKER_03': [5.874363327674018, <Segment(2703.88, 2709.75)>], 'SPEAKER_07': [3.8200339558573884, <Segment(15.4924, 19.3124)>], 'SPEAKER_08': [21.035653650254517, <Segment(2584.12, 2605.15)>], 'SPEAKER_00': [48.930390492359834, <Segment(439.737, 488.667)>], 'SPEAKER_04': [17.028862478777683, <Segment(2413.12, 2430.14)>]}
SPEAKER_01 Gauging Spacetime Inversions - Daniel Harlow [kLppbg3Gg1k].mp3 5.263157894736651 2725.6112054329374 2730.874363327674
SPEAKER_02 Gauging Spacetime Inversions - Daniel Harlow [kLppbg3Gg1k].mp3 12.665534804753861 2529.3293718166383 2541.994906621392
SPEAKER_03 Gauging Spacetime Inversions - Daniel Harlow [kLppbg3Gg1k].mp3 5.874363327674018 2703.879456706282 2709.753820033956
SPEAKER_07 Gauging Spacetime I

Saving audios for each speaker in each audio file:  73%|███████▎  | 38/52 [23:29<04:27, 19.13s/it]

{'SPEAKER_01': [10.526315789473756, <Segment(1460.7, 1471.23)>], 'SPEAKER_02': [11.595925297113808, <Segment(1546.6, 1558.19)>], 'SPEAKER_05': [5.3140916808151815, <Segment(2157.84, 2163.15)>], 'SPEAKER_06': [16.859083191850914, <Segment(1035.93, 1052.79)>], 'SPEAKER_03': [15.449915110356415, <Segment(2224.03, 2239.48)>], 'SPEAKER_00': [8.30220713073004, <Segment(2375.1, 2383.4)>], 'SPEAKER_04': [5.212224108659029, <Segment(1060.59, 1065.8)>]}
SPEAKER_01 Flatiron Wide Algorithms and Mathematics - Marylou Gabrié (October 20, 2020) [sWZB7xJahgY].mp3 10.526315789473756 1460.7045840407472 1471.230899830221
SPEAKER_02 Flatiron Wide Algorithms and Mathematics - Marylou Gabrié (October 20, 2020) [sWZB7xJahgY].mp3 11.595925297113808 1546.5959252971138 1558.1918505942276
SPEAKER_05 Flatiron Wide Algorithms and Mathematics - Marylou Gabrié (October 20, 2020) [sWZB7xJahgY].mp3 5.3140916808151815 2157.8353140916806 2163.149405772496
SPEAKER_06 Flatiron Wide Algorithms and Mathematics - Marylou Gab

Saving audios for each speaker in each audio file:  75%|███████▌  | 39/52 [23:53<04:28, 20.62s/it]

{'SPEAKER_01': [6.825127334465378, <Segment(2257.39, 2264.22)>], 'SPEAKER_02': [9.100169779286716, <Segment(2357.9, 2367)>], 'SPEAKER_05': [7.7419354838712025, <Segment(2297.16, 2304.9)>], 'SPEAKER_03': [5.331069609507637, <Segment(62.4363, 67.7674)>], 'SPEAKER_06': [7.266553480475068, <Segment(2179.79, 2187.05)>], 'SPEAKER_00': [8.8115449915108, <Segment(1207.5, 1216.31)>], 'SPEAKER_04': [36.4176570458404, <Segment(180.331, 216.749)>]}
SPEAKER_01 Gravitational Algebras in (A)dS2 - David Kolchmeyer [b7313tKBe28].mp3 6.825127334465378 2257.3938879456705 2264.219015280136
SPEAKER_02 Gravitational Algebras in (A)dS2 - David Kolchmeyer [b7313tKBe28].mp3 9.100169779286716 2357.9032258064517 2367.0033955857384
SPEAKER_05 Gravitational Algebras in (A)dS2 - David Kolchmeyer [b7313tKBe28].mp3 7.7419354838712025 2297.1561969439726 2304.898132427844
SPEAKER_03 Gravitational Algebras in (A)dS2 - David Kolchmeyer [b7313tKBe28].mp3 5.331069609507637 62.43633276740238 67.76740237691001
SPEAKER_06 Gra

Saving audios for each speaker in each audio file:  77%|███████▋  | 40/52 [24:07<03:45, 18.80s/it]

{'SPEAKER_01': [32.512733446519405, <Segment(640.942, 673.455)>], 'SPEAKER_02': [5.246179966044139, <Segment(16.0017, 21.2479)>], 'SPEAKER_05': [4.974533106960735, <Segment(2463.39, 2468.36)>], 'SPEAKER_06': [13.446519524617997, <Segment(2484.1, 2497.55)>], 'SPEAKER_03': [4.567062818336126, <Segment(2531.06, 2535.63)>], 'SPEAKER_07': [5.365025466892803, <Segment(2316.31, 2321.67)>], 'SPEAKER_08': [1.222410865874508, <Segment(592.317, 593.54)>], 'SPEAKER_00': [8.522920203735339, <Segment(2597.33, 2605.85)>], 'SPEAKER_04': [6.315789473684163, <Segment(2418.29, 2424.61)>]}
SPEAKER_01 Holography on the Quantum Disk - Ahmed Almheiri [LVhz479gja8].mp3 32.512733446519405 640.9422750424449 673.4550084889643
SPEAKER_02 Holography on the Quantum Disk - Ahmed Almheiri [LVhz479gja8].mp3 5.246179966044139 16.001697792869273 21.24787775891341
SPEAKER_05 Holography on the Quantum Disk - Ahmed Almheiri [LVhz479gja8].mp3 4.974533106960735 2463.3870967741937 2468.3616298811544
SPEAKER_06 Holography on t

Saving audios for each speaker in each audio file:  79%|███████▉  | 41/52 [24:25<03:25, 18.65s/it]

{'SPEAKER_01': [38.21731748726643, <Segment(1459.79, 1498.01)>], 'SPEAKER_02': [13.191850594227617, <Segment(2255.1, 2268.29)>], 'SPEAKER_05': [14.651952461799738, <Segment(2436.73, 2451.38)>], 'SPEAKER_06': [8.930390492359948, <Segment(2557.09, 2566.02)>], 'SPEAKER_03': [6.112054329371858, <Segment(2370.99, 2377.11)>], 'SPEAKER_00': [4.668930390492278, <Segment(1575.85, 1580.52)>], 'SPEAKER_04': [12.258064516129252, <Segment(2616.04, 2628.29)>]}
SPEAKER_01 Horizon Phase Spaces in General Relativity - Eanna Flanagan [QZfoBtwe2f8].mp3 38.21731748726643 1459.7877758913414 1498.0050933786079
SPEAKER_02 Horizon Phase Spaces in General Relativity - Eanna Flanagan [QZfoBtwe2f8].mp3 13.191850594227617 2255.101867572156 2268.2937181663838
SPEAKER_05 Horizon Phase Spaces in General Relativity - Eanna Flanagan [QZfoBtwe2f8].mp3 14.651952461799738 2436.7317487266555 2451.3837011884552
SPEAKER_06 Horizon Phase Spaces in General Relativity - Eanna Flanagan [QZfoBtwe2f8].mp3 8.930390492359948 2557.0

Saving audios for each speaker in each audio file:  81%|████████  | 42/52 [24:42<03:00, 18.03s/it]

{'SPEAKER_01': [6.281833616298627, <Segment(2549.26, 2555.54)>], 'SPEAKER_02': [12.682512733446401, <Segment(2656.56, 2669.24)>], 'SPEAKER_03': [5.840407470288483, <Segment(2793.4, 2799.24)>], 'SPEAKER_00': [33.20882852292016, <Segment(259.194, 292.402)>], 'SPEAKER_04': [0.6451612903225818, <Segment(15.3565, 16.0017)>]}
SPEAKER_01 Islands Far Outside the Horizon - Geoff Penington [irmUX1RrHM0].mp3 6.281833616298627 2549.2614601018677 2555.5432937181663
SPEAKER_02 Islands Far Outside the Horizon - Geoff Penington [irmUX1RrHM0].mp3 12.682512733446401 2656.5619694397283 2669.2444821731747
SPEAKER_03 Islands Far Outside the Horizon - Geoff Penington [irmUX1RrHM0].mp3 5.840407470288483 2793.404074702886 2799.2444821731747
SPEAKER_00 Islands Far Outside the Horizon - Geoff Penington [irmUX1RrHM0].mp3 33.20882852292016 259.1935483870968 292.402376910017


Saving audios for each speaker in each audio file:  83%|████████▎ | 43/52 [24:52<02:20, 15.58s/it]

{'SPEAKER_01': [9.202037351442868, <Segment(2170.38, 2179.58)>], 'SPEAKER_02': [7.8777589134124355, <Segment(2675.32, 2683.2)>], 'SPEAKER_05': [9.117147707979711, <Segment(2220.21, 2229.33)>], 'SPEAKER_06': [63.00509337860785, <Segment(815.034, 878.039)>], 'SPEAKER_03': [4.821731748726506, <Segment(2415.68, 2420.5)>], 'SPEAKER_00': [10.662139219015444, <Segment(2350.59, 2361.25)>], 'SPEAKER_04': [3.3276740237693048, <Segment(2425.34, 2428.67)>]}
SPEAKER_01 Keeping Matter in the Loop in 3D Quantum Gravity - Alejandra Castro [mTSTyP3paiQ].mp3 9.202037351442868 2170.382003395586 2179.5840407470287
SPEAKER_02 Keeping Matter in the Loop in 3D Quantum Gravity - Alejandra Castro [mTSTyP3paiQ].mp3 7.8777589134124355 2675.3225806451615 2683.200339558574
SPEAKER_05 Keeping Matter in the Loop in 3D Quantum Gravity - Alejandra Castro [mTSTyP3paiQ].mp3 9.117147707979711 2220.2122241086586 2229.3293718166383
SPEAKER_06 Keeping Matter in the Loop in 3D Quantum Gravity - Alejandra Castro [mTSTyP3paiQ]

Saving audios for each speaker in each audio file:  85%|████████▍ | 44/52 [25:09<02:07, 15.92s/it]

{'SPEAKER_01': [10.628183361629908, <Segment(2431.54, 2442.16)>], 'SPEAKER_02': [8.8115449915108, <Segment(2531.49, 2540.3)>], 'SPEAKER_05': [83.106960950764, <Segment(2303.23, 2386.34)>], 'SPEAKER_03': [3.56536502546669, <Segment(2696.88, 2700.45)>], 'SPEAKER_00': [8.658743633276737, <Segment(15.2037, 23.8625)>], 'SPEAKER_04': [10.645161290322449, <Segment(2727.21, 2737.85)>]}
SPEAKER_01 New Non-Perturbative Results from a Random Matrix Model of N=2 JT Supergravity - Clifford V. Johnson [RQeVOsRUujc].mp3 10.628183361629908 2431.5365025466895 2442.1646859083194
SPEAKER_02 New Non-Perturbative Results from a Random Matrix Model of N=2 JT Supergravity - Clifford V. Johnson [RQeVOsRUujc].mp3 8.8115449915108 2531.4855687606114 2540.297113752122
SPEAKER_05 New Non-Perturbative Results from a Random Matrix Model of N=2 JT Supergravity - Clifford V. Johnson [RQeVOsRUujc].mp3 83.106960950764 2303.2342954159594 2386.3412563667234
SPEAKER_03 New Non-Perturbative Results from a Random Matrix Mode

Saving audios for each speaker in each audio file:  87%|████████▋ | 45/52 [25:24<01:49, 15.70s/it]

{'SPEAKER_01': [6.2139219015280105, <Segment(2399.45, 2405.66)>], 'SPEAKER_00': [28.760611205432724, <Segment(2452.11, 2480.87)>], 'SPEAKER_03': [7.928692699490512, <Segment(2173.73, 2181.66)>], 'SPEAKER_02': [1.6977928692699464, <Segment(15.3056, 17.0034)>]}
SPEAKER_01 Scramblon Loops - Shunyu Yao [xpk91HmzUPo].mp3 6.2139219015280105 2399.4482173174874 2405.6621392190154
SPEAKER_00 Scramblon Loops - Shunyu Yao [xpk91HmzUPo].mp3 28.760611205432724 2452.1137521222413 2480.874363327674
SPEAKER_03 Scramblon Loops - Shunyu Yao [xpk91HmzUPo].mp3 7.928692699490512 2173.7266553480476 2181.655348047538


Saving audios for each speaker in each audio file:  88%|████████▊ | 46/52 [25:30<01:17, 12.88s/it]

{'SPEAKER_01': [5.48387096774195, <Segment(2699.99, 2705.48)>], 'SPEAKER_10': [20.203735144312304, <Segment(2022.96, 2043.17)>], 'SPEAKER_11': [19.134125636672252, <Segment(3175.81, 3194.95)>], 'SPEAKER_02': [12.054329371816493, <Segment(1401.96, 1414.02)>], 'SPEAKER_09': [60.52631578947367, <Segment(66.2224, 126.749)>], 'SPEAKER_05': [1.6977928692699606, <Segment(1156.95, 1158.65)>], 'SPEAKER_12': [6.485568760611386, <Segment(1961.45, 1967.94)>], 'SPEAKER_13': [14.006791171477289, <Segment(1191.99, 1206)>], 'SPEAKER_06': [12.12224108658711, <Segment(3071.93, 3084.05)>], 'SPEAKER_03': [12.58064516129025, <Segment(3437.28, 3449.86)>], 'SPEAKER_07': [42.63157894736844, <Segment(199.465, 242.097)>], 'SPEAKER_08': [6.621392190153074, <Segment(2622.2, 2628.82)>], 'SPEAKER_00': [7.775891341256283, <Segment(3492.01, 3499.79)>], 'SPEAKER_04': [10.203735144312304, <Segment(1147.22, 1157.43)>]}
SPEAKER_01 Shirley Ho - Using Deep Learning as a Last Resort？! (September 30, 2022) [gWNqWT1VnUI].mp3 

Saving audios for each speaker in each audio file:  90%|█████████ | 47/52 [26:13<01:48, 21.78s/it]

{'SPEAKER_01': [8.607809847198496, <Segment(2475.39, 2484)>], 'SPEAKER_00': [75.94227504244486, <Segment(609.516, 685.458)>], 'SPEAKER_02': [2.1901528013586358, <Segment(1979.72, 1981.91)>]}
SPEAKER_01 Symplectic Analysis of Null Raychaudhuri - Luca Ciambelli [F--4r51Wnxs].mp3 8.607809847198496 2475.390492359932 2483.9983022071306
SPEAKER_00 Symplectic Analysis of Null Raychaudhuri - Luca Ciambelli [F--4r51Wnxs].mp3 75.94227504244486 609.516129032258 685.4584040747029


Saving audios for each speaker in each audio file:  92%|█████████▏| 48/52 [26:18<01:06, 16.72s/it]

{'SPEAKER_01': [11.137521222411124, <Segment(2523.61, 2534.75)>], 'SPEAKER_00': [63.599320882852226, <Segment(2316.15, 2379.75)>], 'SPEAKER_03': [4.397283531409165, <Segment(15.2547, 19.652)>], 'SPEAKER_02': [6.451612903225396, <Segment(2394.83, 2401.28)>]}
SPEAKER_01 The Non-Perturbative Hilbert Space of JT Gravity - Luca Iliesiu [K_K8ComNqTU].mp3 11.137521222411124 2523.6078098471985 2534.7453310696096
SPEAKER_00 The Non-Perturbative Hilbert Space of JT Gravity - Luca Iliesiu [K_K8ComNqTU].mp3 63.599320882852226 2316.1544991511037 2379.753820033956
SPEAKER_03 The Non-Perturbative Hilbert Space of JT Gravity - Luca Iliesiu [K_K8ComNqTU].mp3 4.397283531409165 15.254668930390494 19.65195246179966
SPEAKER_02 The Non-Perturbative Hilbert Space of JT Gravity - Luca Iliesiu [K_K8ComNqTU].mp3 6.451612903225396 2394.8302207130732 2401.2818336162986


Saving audios for each speaker in each audio file:  94%|█████████▍| 49/52 [26:27<00:43, 14.54s/it]

{'SPEAKER_01': [9.983022071307005, <Segment(2082.27, 2092.25)>], 'SPEAKER_02': [4.51612903225805, <Segment(2481.2, 2485.71)>], 'SPEAKER_05': [14.227504244482134, <Segment(1946.36, 1960.59)>], 'SPEAKER_07': [8.624787775891491, <Segment(2387.28, 2395.9)>], 'SPEAKER_06': [8.539898132427425, <Segment(2545.9, 2554.44)>], 'SPEAKER_03': [8.047538200339204, <Segment(2181.2, 2189.24)>], 'SPEAKER_09': [2.733446519524618, <Segment(15.2716, 18.0051)>], 'SPEAKER_08': [5.653650254668719, <Segment(2281.64, 2287.29)>], 'SPEAKER_00': [23.446519524617997, <Segment(1486.09, 1509.53)>], 'SPEAKER_04': [8.285229202037044, <Segment(2318.62, 2326.9)>]}
SPEAKER_01 Type I von Neumann Algebras from Gravitational Path Integrals：.... - Eugenia Colafranceschi [TSQzv7foGCI].mp3 9.983022071307005 2082.2665534804755 2092.2495755517825
SPEAKER_02 Type I von Neumann Algebras from Gravitational Path Integrals：.... - Eugenia Colafranceschi [TSQzv7foGCI].mp3 4.51612903225805 2481.1969439728355 2485.7130730050935
SPEAKER_05

Saving audios for each speaker in each audio file:  96%|█████████▌| 50/52 [26:48<00:32, 16.32s/it]

{'SPEAKER_01': [55.229202037351456, <Segment(399.584, 454.813)>], 'SPEAKER_02': [4.51612903225805, <Segment(2415.85, 2420.37)>], 'SPEAKER_05': [4.465195246179974, <Segment(2559.87, 2564.34)>], 'SPEAKER_03': [4.974533106961189, <Segment(2248.36, 2253.34)>], 'SPEAKER_06': [7.538200339558443, <Segment(2343.12, 2350.65)>], 'SPEAKER_07': [7.7928692699492785, <Segment(2467.73, 2475.53)>], 'SPEAKER_00': [3.0390492359933887, <Segment(2374.42, 2377.46)>], 'SPEAKER_04': [8.930390492359948, <Segment(2609.7, 2618.63)>]}
SPEAKER_01 Vacuum Asymptotic Codes - Thomas Faulkner [9rmPeSgkA0M].mp3 55.229202037351456 399.5840407470289 454.81324278438035
SPEAKER_02 Vacuum Asymptotic Codes - Thomas Faulkner [9rmPeSgkA0M].mp3 4.51612903225805 2415.848896434635 2420.3650254668933
SPEAKER_05 Vacuum Asymptotic Codes - Thomas Faulkner [9rmPeSgkA0M].mp3 4.465195246179974 2559.8726655348046 2564.3378607809846
SPEAKER_03 Vacuum Asymptotic Codes - Thomas Faulkner [9rmPeSgkA0M].mp3 4.974533106961189 2248.3616298811544

Saving audios for each speaker in each audio file:  98%|█████████▊| 51/52 [27:06<00:16, 16.92s/it]

{'SPEAKER_01': [50.441426146010144, <Segment(2167.8, 2218.24)>], 'SPEAKER_00': [7.572156196943979, <Segment(2158.99, 2166.56)>], 'SPEAKER_03': [8.115449915110275, <Segment(2103.46, 2111.57)>], 'SPEAKER_02': [4.3972835314089025, <Segment(1828.8, 1833.2)>]}
SPEAKER_01 What Exactly Does Bekenstein Bound？ - Jinzhao Wang [K0ivzrqsGXE].mp3 50.441426146010144 2167.8013582342955 2218.2427843803057
SPEAKER_00 What Exactly Does Bekenstein Bound？ - Jinzhao Wang [K0ivzrqsGXE].mp3 7.572156196943979 2158.9898132427843 2166.5619694397283
SPEAKER_03 What Exactly Does Bekenstein Bound？ - Jinzhao Wang [K0ivzrqsGXE].mp3 8.115449915110275 2103.4550084889643 2111.5704584040745
SPEAKER_02 What Exactly Does Bekenstein Bound？ - Jinzhao Wang [K0ivzrqsGXE].mp3 4.3972835314089025 1828.803056027165 1833.200339558574


Saving audios for each speaker in each audio file: 100%|██████████| 52/52 [27:14<00:00, 31.43s/it]


# Gender prediction

In [10]:
model = load_model(model_path)

2024-03-06 07:29:59.581386: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-06 07:29:59.593841: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-06 07:29:59.593954: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [23]:
conferences_info = []

for file in os.listdir(audio_folder):
    file_name = os.path.basename(file)
    unique_speaker = unique_speakers[file_name]
    longest_segment = longest_segments_conferences[file_name]
    speakers_duration = durations_conferences[file_name]

    # Principal speaker is the one who talked the most
    principal_speaker = max(speakers_duration, key=speakers_duration.get)

    # Predict gender for principal speaker
    principal_speaker_gender = predict_gender(principal_speaker, file_name, model)

    # Collect data for each conference
    conference_data = {
        "conference": file_name,
        "principal_speaker": principal_speaker,
        "principal_speaker_gender": principal_speaker_gender,
        "number_of_interruptions": len(unique_speaker) - 1,
        "interruptors": []
    }

    # Process interruptors
 
    for speaker in unique_speaker:
        if speaker != principal_speaker:
            try:
  
                gender = predict_gender(speaker, file_name, model)
                start_time = int(longest_segment[speaker][1].start )
                end_time = int(longest_segment[speaker][1].end )
                conference_data["interruptors"].append({
                    "speaker": speaker,
                    "gender": gender,
                    "length of interruption": end_time - start_time,
                    "start_time": str(start_time//3600) + ":" +  str(start_time%3600//60)  + ":" + str(start_time%60),
                    "end_time": str(end_time//3600) + ":" + str((end_time%3600)//60) + ":" + str(end_time%60)
                })
            except FileNotFoundError:
                conference_data["interruptors"].append({
                    "speaker": speaker,
                    "length of interruption": end_time - start_time,
                    "error": "Interruption was too short"
                })

    conferences_info.append(conference_data)





In [24]:
conferences_info

[{'conference': 'Alfred Nobel and the Nobel Prizes  KITP Colloquium by Lars Brink.mp3',
  'principal_speaker': 'SPEAKER_02',
  'principal_speaker_gender': ('Male', array([0.8652825], dtype=float32)),
  'number_of_interruptions': 4,
  'interruptors': [{'speaker': 'SPEAKER_01',
    'gender': ('Male', array([0.9029277], dtype=float32)),
    'length of interruption': 28,
    'start_time': '0:57:45',
    'end_time': '0:58:13'},
   {'speaker': 'SPEAKER_03',
    'gender': ('Female', array([0.2345483], dtype=float32)),
    'length of interruption': 12,
    'start_time': '1:2:38',
    'end_time': '1:2:50'},
   {'speaker': 'SPEAKER_00',
    'gender': ('Male', array([0.5190715], dtype=float32)),
    'length of interruption': 28,
    'start_time': '1:5:36',
    'end_time': '1:6:4'},
   {'speaker': 'SPEAKER_04',
    'gender': ('Female', array([0.4122541], dtype=float32)),
    'length of interruption': 12,
    'start_time': '1:3:56',
    'end_time': '1:4:8'}]},
 {'conference': 'Adventures in Galaxy 