### Imports

In [None]:
import whisper
import pandas as pd
import torch
import json
import os
import time
import datetime
import gc
import py3nvml.py3nvml as nvml
import logging
from threading import Thread

### Variables (adjustable)

In [None]:
device_index = 0  # for multiple GPU setup. Indicates which GPU to use
model_version = "large-v3"  # options: https://github.com/beeldengeluid/dane-whisper-asr-worker?tab=readme-ov-file#model-options
fp16 = True  # compute type: True for float16, False for float32
audios_path = "/opt/app-root/src/nbest/bn-nl/"  # absolute path to folder where audio to be transcribed can be found
audio_file = "/opt/app-root/src/nbest/bn-nl/nbest-eval-2008-bn-nl-012.wav"  # for experimenting/testing purposes
out_path = "/opt/app-root/src/results/openai/" + ("float16/" if fp16 else "float32/") + model_version + "/unlabelled/"  #absolute path to folder where transcriptions should be saved
# GPU measurement parameter
interval = 0.5  # how often to measure GPU usage (in s)

### Threading function (to measure GPU usage)

In [None]:
class MyThread(Thread):
    def __init__(self, func, params):
        super(MyThread, self).__init__()
        self.func = func
        self.params = params
        self.result = None

    def run(self):
        self.result = self.func(*self.params)

    def get_result(self):
        return self.result

### ffmpeg setup

In [None]:
# # Uncomment the 2 lines below if you want to download a static FFmpeg build
# !curl https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -o ffmpeg.tar.xz \
#  && tar -xf ffmpeg.tar.xz && rm ffmpeg.tar.xz

# Add the build to PATH
ffmdir = !find . -iname ffmpeg-*-static
path = %env PATH
path = path + ':' + ffmdir[0]
%env PATH $path
print('')
!which ffmpeg
print('Done!')

### Dataset benchmarking

In [None]:
### SETTING UP THE LOGGER
logging.basicConfig(filename=out_path + "log.txt",
                    format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
                    level=logging.INFO,
                    force=True)
logger = logging.getLogger(__name__)
consoleHandler = logging.StreamHandler()
logger.addHandler(consoleHandler)

# Will also add a dataframe to save each stat per file in an Excel sheet
file_names = []
gpu_mem = []
gpu_power = []
times = []

### LOADING THE MODEL
logger.info("================================START OF EVALUATION================================")
start = time.time()
model = whisper.load_model(model_version)
load_time = time.time() - start
logger.info(f"Time to load the model: {load_time} s")
logger.info("================================")

file_names.append("Loading the model")
times.append(load_time)
gpu_mem.append(0)
gpu_power.append(0)

logger.info(
            "Measuring maximum GPU memory usage on GPU device."
            " Make sure to not have additional processes running on the same GPU."
        )
# Initialization for measuring GPU usage
nvml.nvmlInit()
handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
gpu_name = nvml.nvmlDeviceGetName(handle)
gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20
gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0

# Go through files to transcribe
for file in os.listdir(audios_path):
    gpu_usage = {"gpu_memory_usage": [], "gpu_power_usage": []}

    def _get_gpu_info():
        while True:
            gpu_usage["gpu_memory_usage"].append(
                nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20
            )
            gpu_usage["gpu_power_usage"].append(
                nvml.nvmlDeviceGetPowerUsage(handle) / 1000
            )
            time.sleep(interval)

            if stop:
                break

        return gpu_usage

    stop = False
    thread = MyThread(_get_gpu_info, params=())
    thread.start()

    # Measuring time spent transcribing this file
    file_start = time.time()

    result = model.transcribe(audio=audios_path + file, word_timestamps=True, fp16=fp16)

    # Stop measuring GPU usage for this file
    stop = True
    thread.join()
    
    file_time = time.time() - file_start

    logger.info(file[:-4] + ' has been transcribed')
    logger.info(f"Time to transcribe: {file_time} s")
    max_memory_usage = max(gpu_usage["gpu_memory_usage"])
    max_power_usage = max(gpu_usage["gpu_power_usage"])
    logger.info(
        "Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)"
        % (
            max_memory_usage,
            gpu_memory_limit,
            (max_memory_usage / gpu_memory_limit) * 100,
        )
    )
    logger.info(
        "Maximum GPU power usage: %dW / %dW (%.2f%%)"
        % (
            max_power_usage,
            gpu_power_limit,
            (max_power_usage / gpu_power_limit) * 100,
        )
    )
    
    file_names.append(file[:-4])
    times.append(file_time)
    gpu_mem.append(max_memory_usage)
    gpu_power.append(max_power_usage)
    
    # Formatting the transcription
    segments_to_add = []
    for segment in result["segments"]:
        words_to_add = []
        for word in segment["words"]:
            words_to_add.append({
                # There's an issue where the text output contains a whitespace at the front of the text
                "text": word["word"].strip(),
                "start": word["start"],
                "end": word["end"],
                "confidence": word["probability"]
            })
        segments_to_add.append({
            "id": segment["id"],
            "seek": segment["seek"],
            "start": segment["start"],
            "end": segment["end"],
            "text": segment["text"].strip(),
            "tokens": segment["tokens"],
            "temperature": segment["temperature"],
            "avg_logprob": segment["avg_logprob"],
            "compression_ratio": segment["compression_ratio"],
            "no_speech_prob": segment["no_speech_prob"],
            "words": words_to_add
        })
    result = {"segments": segments_to_add}
    
    # Saving results to JSON file
    with open(out_path + file[:-3] + 'json', 'w', encoding='utf-8') as f:
        json.dump(result, f, indent = 2, ensure_ascii = False)
    
    logger.info("================================")

logger.info('Total time spent evaluating: ' + str(datetime.timedelta(seconds=time.time() - start)))
logger.info("================================END OF EVALUATION================================")

logging.shutdown()
nvml.nvmlShutdown()
del model
torch.cuda.empty_cache()
gc.collect()

In [None]:
df = pd.DataFrame({"Time": times, "Max GPU mem usage (MiB)": gpu_mem, "Max GPU power usage (W)": gpu_power}, index=file_names)
df.to_csv(out_path + "info.csv")