In [None]:
# exporting models by optimum-cli
!optimum-cli export onnx --model openai/whisper-tiny whisper-tiny-with-past/ --task automatic-speech-recognition-with-past --opset 13


# Export and save model to onnx

In [1]:
import os
import time
import shutil
from evaluate import load
from datasets import load_dataset
from transformers import WhisperForConditionalGeneration, WhisperProcessor, AutoConfig
from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
from transformers import PretrainedConfig
import librosa
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Export model in ONNX
def export_onnx(model_id, save_dir):
    model = ORTModelForSpeechSeq2Seq.from_pretrained(model_id, export=True)
    model_dir = model.model_save_dir
    shutil.move(model_dir, save_dir)
    print("Model exported to onnx and saved at location ", save_dir)

In [12]:
# export whisper-small
export_onnx("openai/whisper-small","/home/carol/mp/quantize/small-whisper")

Framework not specified. Using pt to export the model.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Using the export variant default. Available variants are:
    - default: The default ONNX variant.
Non-default generation parameters: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}
Speci

Model exported to onnx and saved at location  /home/carol/mp/quantize/small-whisper


In [None]:
#export whisper-tiny
export_onnx("openai/whisper-tiny","/home/carol/mp/quantize/tiny-whisper")





# Inference on ONNX models



In [13]:
def run(model_name, audio):
    
    #define odel path
    model_path = os.path.join("/home/carol/mp/quantize", model_name)
    # Load the model and processor
    processor = WhisperProcessor.from_pretrained(model_name)
    model_config = AutoConfig.from_pretrained(model_name)
    sessions = ORTModelForSpeechSeq2Seq.load_model(
                os.path.join(model_path, 'encoder_model.onnx'),
                os.path.join(model_path, 'decoder_model.onnx'),
                os.path.join(model_path, 'decoder_with_past_model.onnx'))
    model = ORTModelForSpeechSeq2Seq(sessions[0], sessions[1], model_config, model_path, sessions[2])

    # Load the audio file
    audio_data, sample_rate = librosa.load(audio, sr=16000, mono=True)

    # Preprocess the audio
    input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
    forced_decoder_ids = processor.get_decoder_prompt_ids(language="malayalam", task="translate")
    
    # Measure the time taken for inference
    start_time = time.time()
    predicted_ids = model.generate(input_features,forced_decoder_ids=forced_decoder_ids)[0]
    # Generate transcription
    transcription = processor.decode(predicted_ids, skip_special_tokens=True)
    inference_time = time.time() - start_time

     # model size
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(model_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    size = total_size / (1024 * 1024)  # Convert to MB

    print()
    print("Model name = ",model_name)
    print()
    print(transcription)
    print()
    print("Inference Time = ",inference_time)
    print("Model size = ",size) 
    

In [14]:
run("small-whisper","sample.wav")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.



Model name =  small-whisper

 Harvard List number one. The birch canoe slid on the smooth planks. Glue the sheet to the dark blue background. It's easy to tell the depth of a well. These days a chicken leg is a rare dish.

Inference Time =  7.355425596237183
Model size =  1763.2071237564087


In [16]:
run("tiny-whisper","sample.wav")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.



Model name =  tiny-whisper

 Harvard List Number One The Birch can use lid on the smooth planks. Do the sheet to the dark blue background. It's easy to tell the depth of a well. These days a chicken leg is a rare dish.

Inference Time =  0.9543247222900391
Model size =  408.8020076751709


In [17]:
run("small_quantized","sample.wav")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.



Model name =  small_quantized

 Harvard List Number One The Birch Canoe slid on the smooth planks. Glue the sheet to the dark blue background. It's easy to tell the depth of a well. These days a chicken leg is a rare dish.

Inference Time =  5.890881299972534
Model size =  457.8302402496338


In [18]:
run("tiny_quantized","sample.wav")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.



Model name =  tiny_quantized

 Harvard List Number One The Birch Knows lid on the smooth planks.

Inference Time =  0.5709211826324463
Model size =  110.3315486907959
