### Aim 
Running all 4 codebases on 1 of the recordings from Box, using dask
### Recording
34_KS_Recording4-LR_2165-2465_CT
### Codebases
Codebase 1: yinruiqing https://github.com/yinruiqing/pyannote-whisper/ <br />
Codebase 2: speechbox https://github.com/huggingface/speechbox/ <br />
Codebase 3: ashraf https://github.com/MahmoudAshraf97/whisper-diarization <br />
Codebase 4: whisperx https://github.com/m-bain/whisperX <br />

In [1]:
import pandas as pd
from pydub import AudioSegment
import os
import dask
import socket
from dask_jobqueue import SLURMCluster
from  distributed import Client
import subprocess
!module load openmind/ffmpeg/20160310 

In [9]:
# Load the WAV file
audio = AudioSegment.from_wav("rawAudioFiles/34_KS_Recording4-LR_2165-2465_CT.wav")
directory_path = os.path.join("chunks_"+"34_KS_Recording4-LR_2165-2465_CT")
os.makedirs(directory_path, exist_ok=True)
# Length of one minute in milliseconds
one_minute = 60 * 1000

# Split and save each 1-minute segment
for i in range(5):
    start_time = i * one_minute
    end_time = (i + 1) * one_minute if i < 4 else len(audio)
    segment = audio[start_time:end_time]
    name_of_file = f"minute_{i + 1}.wav"
    path_to_file = os.path.join(directory_path, name_of_file)
    segment.export(path_to_file, format="wav")

print("Audio split into 1-minute segments successfully!")


Audio split into 1-minute segments successfully!


In [55]:
cluster = SLURMCluster(cores=8,
                       processes=2,
                       memory="16GB",
                       account="cpl",
                       walltime="01:00:00",
                       queue="normal",
                       job_script_prologue  =[
                        'source /etc/profile.d/modules.sh' ,
                        'module load openmind8/anaconda/3-2023.09-0',
                        'module load openmind8/cuda/12.1',                 
                        'module load openmind8/cudnn/8.8.1-cuda12'  ,
                        'module load openmind/gcc/11.1.0',
                        'module load openmind/ffmpeg/20160310' , 
                        'source ~/.bashrc',
                        'conda activate whisperx'
                                              ],
                       job_extra=['--gres=gpu:QUADRORTX6000:1'] 
                       )
cluster.scale(5)
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41739 instead


In [2]:
!squeue -u arjunp

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
          37461657    normal     bash   arjunp  R       6:21      1 node091


In [71]:
!scancel 37451543 37451544 37451545

In [3]:
def run_cb1_script(audiopath):
     
    command = ['python', 'yinruiqing_trial.py', audiopath]  
    
    result = subprocess.run(command, capture_output=True, text=True)
     
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)
    return result

def run_cb2_script(audiopath):
     
    command = ['python', 'speechbox_trial.py', audiopath]  
    
    result = subprocess.run(command, capture_output=True, text=True)
     
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)
    return result

def run_cb3_script(audiopath):
     
    command = ['python', '/om/user/arjunp/ashraf_repo/whisper-diarization/diarize.py','-a',audiopath] 
    srtpath = audiopath[:-4] +".srt"
    command2 = ['python', '/om/user/arjunp/process_ashraf_output.py', srtpath]  
    result = subprocess.run(command, capture_output=True, text=True)
    result2 = subprocess.run(command2, capture_output=True, text=True)
     
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)
    return result

def run_whisperx(audiopath):
     
    command = ['python', 'whisperx_demo.py',audiopath]  
    
    result = subprocess.run(command, capture_output=True, text=True)
     
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)
    return result



In [58]:
result_list = []
for filename in os.listdir('chunks_34_KS_Recording4-LR_2165-2465_CT'):
    audiopath = os.path.join('chunks_34_KS_Recording4-LR_2165-2465_CT', filename)
    future  = client.submit(run_whisperx,audiopath)  
    result  = future.result()
    result_list.append(result)

In [31]:
print(result.stderr)

Traceback (most recent call last):
  File "/weka/scratch/weka/cpl/arjunp/whisperx_demo.py", line 1, in <module>
    import whisperx
  File "/home/arjunp/.conda/envs/whisperx/lib/python3.10/site-packages/whisperx/__init__.py", line 1, in <module>
    from .transcribe import load_model
  File "/home/arjunp/.conda/envs/whisperx/lib/python3.10/site-packages/whisperx/transcribe.py", line 10, in <module>
    from .asr import load_model
  File "/home/arjunp/.conda/envs/whisperx/lib/python3.10/site-packages/whisperx/asr.py", line 13, in <module>
    from .vad import load_vad_model, merge_chunks
  File "/home/arjunp/.conda/envs/whisperx/lib/python3.10/site-packages/whisperx/vad.py", line 9, in <module>
    from pyannote.audio import Model
  File "/home/arjunp/.conda/envs/whisperx/lib/python3.10/site-packages/pyannote/audio/__init__.py", line 29, in <module>
    from .core.inference import Inference
  File "/home/arjunp/.conda/envs/whisperx/lib/python3.10/site-packages/pyannote/audio/core/infere

In [27]:
def codebase1Run():
    cluster = SLURMCluster(cores=4,
                       processes=2,
                       memory="16GB",
                       account="cpl",
                       walltime="01:00:00",
                       queue="normal",
                       job_script_prologue  =[
                        'source /etc/profile.d/modules.sh' ,
                        'module load openmind8/anaconda/3-2023.09-0',
                        'module load openmind/gcc/11.1.0',
                        'module load openmind/ffmpeg/20160310' , 
                        'source ~/.bashrc',
                        'export MKL_THREADING_LAYER=GNU',
                        'conda activate torch_gpu'
                                              ],
                       job_extra=['--gres=gpu:QUADRORTX6000:1'] 
                       )
    cluster.scale(5)
    client = Client(cluster)
    result_list = []
    for filename in os.listdir('chunks_34_KS_Recording4-LR_2165-2465_CT'):
        audiopath = os.path.join('chunks_34_KS_Recording4-LR_2165-2465_CT', filename)
        future  = client.submit(run_cb1_script,audiopath)  
        result  = future.result()
        result_list.append(result)
    cluster.close()
    client.close()
    return result_list

In [5]:
def codebase2Run():
    cluster = SLURMCluster(cores=4,
                       processes=2,
                       memory="16GB",
                       account="cpl",
                       walltime="01:00:00",
                       queue="normal",
                       job_script_prologue  =[
                        'source /etc/profile.d/modules.sh' ,
                        'module load openmind8/anaconda/3-2023.09-0',
                        'module load openmind/gcc/11.1.0',
                        'module load openmind/ffmpeg/20160310' , 
                        'source ~/.bashrc',
                        'export MKL_THREADING_LAYER=GNU',
                        'conda activate torch_gpu'
                                              ],
                       job_extra=['--gres=gpu:QUADRORTX6000:1'] 
                       )
    cluster.scale(5)
    client = Client(cluster)
    result_list = []
    for filename in os.listdir('chunks_34_KS_Recording4-LR_2165-2465_CT'):
        audiopath = os.path.join('chunks_34_KS_Recording4-LR_2165-2465_CT', filename)
        future  = client.submit(run_cb2_script,audiopath)  
        result  = future.result()
        result_list.append(result)
    return result_list

In [25]:
def codebase3Run():
    cluster = SLURMCluster(cores=4,
                       processes=2,
                       memory="16GB",
                       account="cpl",
                       walltime="01:00:00",
                       queue="normal",
                       job_script_prologue  =[
                        'source /etc/profile.d/modules.sh' ,
                        'module load openmind8/anaconda/3-2023.09-0',
                        'module load openmind/gcc/11.1.0',
                        'module load openmind/ffmpeg/20160310' , 'source ~/.bashrc',
                        'conda activate codebase3',
                        'cd /om/user/arjunp/ashraf_repo/whisper-diarization'
                                              ],
                       job_extra=['--gres=gpu:QUADRORTX6000:1'] 
                       )
    cluster.scale(5)
    client = Client(cluster)
    result_list = []
    main_directory = os.path.join('/om/user/arjunp','chunks_34_KS_Recording4-LR_2165-2465_CT')
    for filename in os.listdir(main_directory):
        audiopath = os.path.join(main_directory, filename)
        future  = client.submit(run_cb3_script,audiopath)  
        result  = future.result()
        result_list.append(result)
    client.close()
    return result_list

In [7]:
def codebase4Run():
    cluster = SLURMCluster(cores=4,
                       processes=2,
                       memory="16GB",
                       account="cpl",
                       walltime="01:00:00",
                       queue="normal",
                       job_script_prologue  =[
                        'source /etc/profile.d/modules.sh' ,
                        'module load openmind8/anaconda/3-2023.09-0',
                        'module load openmind8/cuda/12.1',                 
                        'module load openmind8/cudnn/8.8.1-cuda12'  ,
                        'module load openmind/gcc/11.1.0',
                        'module load openmind/ffmpeg/20160310' , 
                        'source ~/.bashrc',
                        'conda activate whisperx'
                                              ],
                       job_extra=['--gres=gpu:QUADRORTX6000:1'] 
                       )
    cluster.scale(5)
    client= Client(cluster)
    
    result_list = []
    for filename in os.listdir('chunks_34_KS_Recording4-LR_2165-2465_CT'):
        audiopath = os.path.join('chunks_34_KS_Recording4-LR_2165-2465_CT', filename)
        future  = client.submit(run_whisperx,audiopath)  
        result  = future.result()
        result_list.append(result)
    return result_list

In [28]:
cb1res = codebase1Run()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35445 instead


In [32]:
cb1res[0].stderr

'Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.3.3. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/arjunp/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`\n'

In [None]:
cb2res = codebase2Run()
cb3res = codebase3Run()
cb4res = codebase4Run()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 33037 instead


In [8]:
cb2res = codebase2Run()



In [26]:
cb3res = codebase3Run()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35195 instead


In [16]:
print(cb2res[4].stderr)

Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.3.3. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/arjunp/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`

Map:   0%|          | 0/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00,  1.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00,  1.13 examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 70.01 examples/s]
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was

In [23]:
print(cb3res[0].stderr)

Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in /weka/scratch/weka/cpl/arjunp/ashraf_repo/whisper-diarization/temp_outputs/htdemucs
Separating track /om/user/arjunp/chunks_34_KS_Recording4-LR_2165-2465_CT/minute_2.wav
GPU Name: Quadro RTX 6000
CUDA Compute Capability: (7, 5)
Your GPU supports efficient float16 computation.
Model was trained with pyannote.audio 0.0.1, yours is 3.1.1. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.1.2+cu121. Bad things might happen unless you revert torch to 1.x.
[NeMo I 2024-07-28 22:45:26 msdd_models:1092] Loading pretrained diar_msdd_telephonic model from NGC
[NeMo I 2024-07-28 22:45:26 cloud:58] Found existing object /home/arjunp/.cache/torch/NeMo/NeMo_1.20.0/diar_msdd_telephonic/3c3697a0a46f945574fa407149975a13/diar_msdd_telephonic.nemo.
[NeMo I 2024-07-28 22:45:26 cloud:64] Re-using file from: /home/arjunp/

In [24]:
!squeue -u arjunp

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
          37461680    normal dask-wor   arjunp  R      48:59      1 node078
          37461681    normal dask-wor   arjunp  R      48:59      1 node078
          37461682    normal dask-wor   arjunp  R      48:59      1 node078
          37461932    normal dask-wor   arjunp  R       8:49      1 node078
          37461933    normal dask-wor   arjunp  R       8:49      1 node079
          37461934    normal dask-wor   arjunp  R       8:49      1 node079
          37461811    normal dask-wor   arjunp  R      15:57      1 node091
          37461812    normal dask-wor   arjunp  R      15:57      1 node091
          37461813    normal dask-wor   arjunp  R      15:57      1 node091
          37461657    normal     bash   arjunp  R    1:01:24      1 node091
