In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 Graphcore Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

""" Run inference on a 🤗 Whisper model """

import logging
from dataclasses import dataclass, field
from pathlib import Path

import torch
from datasets import load_dataset, Dataset
from tqdm import tqdm

import numpy as np

import poptorch
from optimum.graphcore import IPUConfig, IPUTrainer,IPUTrainingArguments
from optimum.graphcore.modeling_utils import to_pipelined
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig

In [4]:
from dataclasses import dataclass
from typing import List

@dataclass
class IPUWhisperConf:
    """A data class to collect IPU-related config parameters"""
    model_spec: str
    layers_per_ipu: List
    pod_type: str

ipu_whisper = {
    "tiny": IPUWhisperConf(model_spec='openai/whisper-tiny', layers_per_ipu=[8], pod_type="pod1"),
    "small": IPUWhisperConf(model_spec='openai/whisper-small', layers_per_ipu=[6,6,6,6], pod_type="pod4"),
    "large": IPUWhisperConf(model_spec='openai/whisper-large-v2', layers_per_ipu=[4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4], pod_type="pod16")
}


In [5]:
model_size = "tiny"
iwc = ipu_whisper[model_size]

In [6]:
# Instantiate processor and model
processor = WhisperProcessor.from_pretrained(iwc.model_spec)
model = WhisperForConditionalGeneration.from_pretrained(iwc.model_spec)

In [7]:
model.config.use_cache = False    # avoid outputting a lot of data

In [8]:
cpu_model = WhisperForConditionalGeneration.from_pretrained(iwc.model_spec)

In [9]:
# load dummy dataset and read soundfiles
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
input_features = processor(ds[0]["audio"]["array"], 
                           return_tensors="pt",
                           sampling_rate=ds[0]['audio']['sampling_rate']).input_features

Found cached dataset librispeech_asr_dummy (/home/paolot/.cache/huggingface/datasets/hf-internal-testing___librispeech_asr_dummy/clean/2.1.0/d3bc4c2bc2078fcde3ad0f0f635862e4c0fef78ba94c4a34c4c250a097af240b)


In [10]:
import os
from pathlib import Path

pod_type = os.getenv("GRAPHCORE_POD_TYPE", iwc.pod_type)
executable_cache_dir = os.getenv("POPLAR_EXECUTABLE_CACHE_DIR", "/tmp/whisper_exe_cache/") + "whisper_inference"

In [11]:
# os.environ["POPLAR_ENGINE_OPTIONS"] = f'{{"autoReport.all":"true", "debug.instrument":"false", "debug.allowOutOfMemory": "true", "autoReport.directory":"./profiles/{model_size}"}}'

In [12]:
# Environment variable setting for debugging
# os.environ["POPLAR_LOG_LEVEL"] = "DEBUG"
# os.environ["POPTORCH_LOG_LEVEL"] = "TRACE"
# os.environ["POPART_LOG_LEVEL"] = "DEBUG"
# os.environ["POPART_IR_DUMP"]= "whisper_ir_dump.log"

In [13]:
ipu_config = IPUConfig(executable_cache_dir=executable_cache_dir, layers_per_ipu=iwc.layers_per_ipu)
opts = ipu_config.to_options(for_inference=True)

In [14]:
training_args = IPUTrainingArguments(output_dir="/tmp/outputs",
                                     do_train=False,
                                     do_eval=False,
                                     logging_steps=25,
                                     dataloader_num_workers=32,
                                     resume_from_checkpoint=True,
                                     pad_on_batch_axis=False,
                                     pod_type=iwc.pod_type,
                                     save_strategy="epoch",
                                     report_to="none",
#                                      fp32=True,
                                     label_names=None
                                    )

In [15]:
ipt = IPUTrainer(model=model, ipu_config=ipu_config, args=training_args)
ipt.eval_opts._Popart.set("saveInitializersToFile", "weights.onnx")  # needed for larger models

---------- Device Allocation -----------
conv1, conv2, embed_positions  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 0
Decoder 0  --> IPU 0
Decoder 1  --> IPU 0
Decoder 2  --> IPU 0
Decoder 3  --> IPU 0
Head       --> IPU 0
---------------------------------------


_PopartOptions(instrumentWithHardwareCycleCounter=False, rearrangeAnchorsOnHost=False, cachePath='/tmp/whisper_exe_cache/whisper_inference', enableEngineCaching=True, enableStochasticRounding=False, partialsTypeMatMuls='half', convolutionOptions={'partialsType': 'half'}, disableGradAccumulationTensorStreams=True, accumulateOuterFragmentSettings.schedule=3, accumulateOuterFragmentSettings.excludedVirtualGraphs=['0'], outlineThreshold=10.0, subgraphCopyingStrategy=1, scheduleNonWeightUpdateGradientConsumersEarly=True, patterns_level=2, patterns={'TiedGather': True, 'TiedGatherAccumulate': True, 'UpdateInplacePrioritiesForIpu': True}, engineOptions={'opt.useAutoloader': 'true', 'target.syncReplicasIndependently': 'true'}, saveInitializersToFile='weights.onnx')

In [16]:
input_features = input_features.half()

dataset=Dataset.from_dict({"audio": [input_features], "decoder_input_ids": [torch.tensor([[50258]])]})

In [17]:
ipu_results = ipt.predict(test_dataset=dataset)

Compiling Model...
Graph compilation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:48<00:00]
2023-02-09T15:23:38.567652Z popart:devicex 718139.718139 W: Specified directory not found. Creating "/tmp/whisper_exe_cache/whisper_inference" directory 
Compiled/Loaded model in 116.25388805754483 secs
***** Running Prediction *****
  Num examples = 1
  Batch size = 1
  0%|                                                                                                                                                                                 | 0/1 [00:00<?, ?it/s]

In [18]:
ipu_results.predictions[0]   # logits

array([[[-1.606, -2.879,  2.643, ...,  1.016,  2.459,  2.367]]],
      dtype=float16)