# Whisper-tiny on IPU

This notebook demonstrates inference with Whisper-tiny on IPU using FP16. Presently runs on branch whisper/poc.

In [8]:
# Copyright 2023 Graphcore Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

In [9]:
# Be sure you're running Poplar version 3.2
# If not, go back to the paperspace instance launch page, select 'advanced options', then select:
# graphcore/pytorch-jupyter:3.2.0-ubuntu-20.04-20230314 as the container name
!popc --version

POPLAR version 3.2.0 (1513789a51)
clang version 15.0.0 (bab932b4fc4cdb58bb009370384b2c41579bd9d9)


In [10]:
# Installs. Whisper features not yet on main branch.
%pip install git+https://github.com/graphcore/optimum-graphcore-fork.git "tokenizers<0.13" "transformers==4.25.1"
# %pip install git+https://github.com/graphcore/optimum-graphcore-fork.git@whisper/poc "tokenizers<0.13"
%pip install soundfile librosa

Collecting git+https://github.com/graphcore/optimum-graphcore-fork.git
  Cloning https://github.com/graphcore/optimum-graphcore-fork.git to /tmp/pip-req-build-je5k6k0t
  Running command git clone --filter=blob:none --quiet https://github.com/graphcore/optimum-graphcore-fork.git /tmp/pip-req-build-je5k6k0t
  Resolved https://github.com/graphcore/optimum-graphcore-fork.git to commit 769986b10fdfba4b4526057ea27c4d667edeb0b5
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting transformers==4.25.1
  Using cached transformers-4.25.1-py3-none-any.whl (5.8 MB)
Collecting huggingface-hub<1.0,>=0.10.0
  Using cached huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
Collecting regex!=2019.12.17
  Using cached regex-2023.3.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (771 kB)
Collecting requests
  Using cached requests-2.28.2-py3-none-any.whl (62 kB)
Collecting 

#### Imports

In [11]:
# Generic imports
import os
import torch
from datasets import load_dataset, Dataset

# IPU-specific imports
import poptorch
from optimum.graphcore import IPUConfig
from optimum.graphcore.modeling_utils import to_pipelined

# HF imports
# from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig
import transformers
transformers.__version__

'4.20.1'

In [None]:
# Removing automatic caching of executables
del os.environ['POPTORCH_CACHE_DIR']
del os.environ['POPLAR_EXECUTABLE_CACHE_DIR']
# os.environ["DECODER_POPLAR_ENGINE_OPTIONS"] = f'{{"autoReport.all":"true", "debug.allowOutOfMemory": "true", "autoReport.directory":"/tmp/profile1"}}'

#### Global data structures

In [None]:
from dataclasses import dataclass
from typing import List

@dataclass
class IPUWhisperConf:
    """A data class to collect IPU-related config parameters"""
    model_spec: str
    ipus_per_replica: int
    pod_type: str

ipu_whisper = {
    "tiny": IPUWhisperConf(model_spec='openai/whisper-tiny.en', ipus_per_replica=2, pod_type="pod4"),
    # Larger sizes will become available in due course
}

In [None]:
# global config
iwc = ipu_whisper["tiny"]
max_length = 448
pod_type = os.getenv("GRAPHCORE_POD_TYPE", iwc.pod_type)

In [None]:
# Instantiate processor and model
processor = WhisperProcessor.from_pretrained(iwc.model_spec)
model = WhisperForConditionalGeneration.from_pretrained(iwc.model_spec)

In [None]:
# load dummy dataset and read soundfiles
test_idx = 4

ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
input_features = processor(ds[test_idx]["audio"]["array"], 
                           return_tensors="pt",
                           sampling_rate=ds[test_idx]['audio']['sampling_rate']).input_features.half()

### Whisper Benchmarking: adjust parameters for different configurations

In [None]:
num_beams = 3
batch_size = 2
replication_factor = 1

import transformers
transformers.__version__

In [None]:
ipu_config = IPUConfig(executable_cache_dir=None, ipus_per_replica=iwc.ipus_per_replica, matmul_proportion=0.1, inference_replication_factor=replication_factor)

pipelined_model = to_pipelined(model, ipu_config).parallelize(for_generation=True).half()

sample_output = pipelined_model.generate(
    input_features.repeat(batch_size,1,1), 
    max_length=max_length, 
    min_length=3, 
    num_beams=num_beams)

transcription = processor.batch_decode(sample_output, skip_special_tokens=False)[0]
transcription

In [None]:
def benchmark():
    sample_output = pipelined_model.generate(
        input_features.repeat(batch_size,1,1), 
        max_length=max_length, 
        min_length=3, 
        num_beams=num_beams)
    transcription = processor.batch_decode(sample_output, skip_special_tokens=False)

print(f"Running with num_beams={num_beams}, timing steps of batch size {batch_size}, running on {2*replication_factor} IPUs")
%timeit -n 10 benchmark()    