# Whisper-tiny on IPU

This notebook demonstrates inference with Whisper-tiny on IPU using FP16.    
The present version of the IPU Whisper implementation runs the encoder and the decoder on IPU.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 Graphcore Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

""" Run inference on a 🤗 Whisper model """

from optimum.utils import logging
from dataclasses import dataclass, field
from pathlib import Path

import torch
from datasets import load_dataset, Dataset
from tqdm import tqdm

import numpy as np

import poptorch
from optimum.graphcore import IPUConfig, IPUTrainer,IPUTrainingArguments
from optimum.graphcore.modeling_utils import to_pipelined
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig

In [4]:
from dataclasses import dataclass
from typing import List

@dataclass
class IPUWhisperConf:
    """A data class to collect IPU-related config parameters"""
    model_spec: str
    layers_per_ipu: List
    pod_type: str

ipu_whisper = {
    "tiny": IPUWhisperConf(model_spec='openai/whisper-tiny.en', layers_per_ipu=[8], pod_type="pod4"),
    # Larger sizes will become available in due course
}


In [5]:
model_size = "tiny"
iwc = ipu_whisper[model_size]
max_length = 448

In [6]:
# Instantiate processor and model
processor = WhisperProcessor.from_pretrained(iwc.model_spec)
model = WhisperForConditionalGeneration.from_pretrained(iwc.model_spec)

In [7]:
# load dummy dataset and read soundfiles
test_idx = 4

ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
input_features = processor(ds[test_idx]["audio"]["array"], 
                           return_tensors="pt",
                           sampling_rate=ds[test_idx]['audio']['sampling_rate']).input_features.half()

Found cached dataset librispeech_asr_dummy (/home/paolot/.cache/huggingface/datasets/hf-internal-testing___librispeech_asr_dummy/clean/2.1.0/d3bc4c2bc2078fcde3ad0f0f635862e4c0fef78ba94c4a34c4c250a097af240b)


In [8]:
import os
from pathlib import Path

pod_type = os.getenv("GRAPHCORE_POD_TYPE", iwc.pod_type)
executable_cache_dir = os.getenv("POPLAR_EXECUTABLE_CACHE_DIR", "/tmp/whisper_exe_cache/") + "whisper_inference"

In [9]:
# os.environ["PVTI_OPTIONS"]=r'{"enable":"true", "directory":"/localdata/paolot/profiles/minimal"}'
# os.environ["POPLAR_ENGINE_OPTIONS"] = f'{{"autoReport.all":"true", "debug.allowOutOfMemory": "true", "autoReport.directory":"/localdata/paolot/profiles/no-pipeline-30"}}'

In [10]:
ipu_config = IPUConfig(executable_cache_dir=executable_cache_dir, layers_per_ipu=iwc.layers_per_ipu, matmul_proportion=0.1)

In [11]:
pipelined_model = to_pipelined(model, ipu_config)

In [12]:
pipelined_model = pipelined_model.parallelize().half()

In [13]:
sample_output = pipelined_model.generate(input_features, max_length=max_length, min_length=3)
transcription = processor.batch_decode(sample_output, skip_special_tokens=False)[0]
transcription

Graph compilation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00]
Graph compilation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00]


"<|startoftranscript|><|notimestamps|> Linnell's pictures are a sort of up-guards-in-item paintings, and Mason's exquisite idles are as national as a jingo poem. Mr. Birkett Foster's landscapes smile at one much in the same way that Mr. Carker used to flash his teeth. And Mr. John Collier gives his sitter a cheerful slap in the back, before he says, like a shampooer and a Turkish bath, next man<|endoftext|>"

In [14]:
%%time
for _ in range(100):
    sample_output = pipelined_model.generate(input_features, max_length=max_length, min_length=3)
    transcription = processor.batch_decode(sample_output, skip_special_tokens=False)

CPU times: user 2min 25s, sys: 428 ms, total: 2min 26s
Wall time: 26.6 s
