# Whisper-tiny on IPU

This notebook demonstrates inference with Whisper-tiny on IPU using FP16.    
The present version of the IPU Whisper implementation runs the encoder and the decoder on IPU.

In [None]:
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 Graphcore Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

""" Run inference on a 🤗 Whisper model """

from optimum.utils import logging
from dataclasses import dataclass, field
from pathlib import Path

import torch
from datasets import load_dataset, Dataset
from tqdm import tqdm

import numpy as np

import poptorch
from optimum.graphcore import IPUConfig, IPUTrainer,IPUTrainingArguments
from optimum.graphcore.modeling_utils import to_pipelined
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

In [None]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig

In [None]:
from dataclasses import dataclass
from typing import List

@dataclass
class IPUWhisperConf:
    """A data class to collect IPU-related config parameters"""
    model_spec: str
    ipus_per_replica: int
    pod_type: str

ipu_whisper = {
    "tiny": IPUWhisperConf(model_spec='openai/whisper-tiny.en', ipus_per_replica=2, pod_type="pod4"),
    # Larger sizes will become available in due course
}



In [None]:
model_size = "tiny"
iwc = ipu_whisper[model_size]
max_length = 448

In [None]:
# Instantiate processor and model
processor = WhisperProcessor.from_pretrained(iwc.model_spec)
model = WhisperForConditionalGeneration.from_pretrained(iwc.model_spec)

In [None]:
# load dummy dataset and read soundfiles
test_idx = 4

ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
input_features = processor(ds[test_idx]["audio"]["array"], 
                           return_tensors="pt",
                           sampling_rate=ds[test_idx]['audio']['sampling_rate']).input_features.half()

In [None]:
import os
from pathlib import Path

pod_type = os.getenv("GRAPHCORE_POD_TYPE", iwc.pod_type)
executable_cache_dir = os.getenv("POPLAR_EXECUTABLE_CACHE_DIR", "/tmp/whisper_exe_cache/") + "whisper_inference"

In [None]:
# os.environ["PVTI_OPTIONS"]=r'{"enable":"true", "directory":"/localdata/paolot/profiles/minimal"}'
# os.environ["POPLAR_ENGINE_OPTIONS"] = f'{{"autoReport.all":"true", "debug.allowOutOfMemory": "true", "autoReport.directory":"/localdata/paolot/profiles/no-pipeline-30"}}'

In [None]:
ipu_config = IPUConfig(executable_cache_dir=executable_cache_dir, ipus_per_replica=iwc.ipus_per_replica, matmul_proportion=0.1)

In [None]:
pipelined_model = to_pipelined(model, ipu_config)

In [None]:
pipelined_model = pipelined_model.parallelize(for_generation=True).half()

In [None]:
sample_output = pipelined_model.generate(input_features, max_length=max_length, min_length=3)
transcription = processor.batch_decode(sample_output, skip_special_tokens=False)[0]
transcription

In [None]:
%%time
for _ in range(100):
    sample_output = pipelined_model.generate(input_features, max_length=max_length, min_length=3)
    transcription = processor.batch_decode(sample_output, skip_special_tokens=False)