[DAP/Whisper]Add Whisper Preprocessor. (#313)

buddy-compiler · Jul 15, 2024 · a1a0384 · a1a0384
1 parent 0697866
commit a1a0384
Show file tree

Hide file tree

Showing 18 changed files with 1,899 additions and 71 deletions.
diff --git a/examples/BuddyWhisper/CMakeLists.txt b/examples/BuddyWhisper/CMakeLists.txt
@@ -75,11 +75,17 @@ SET_TARGET_PROPERTIES(
   PROPERTIES
   LINKER_LANGUAGE C)
 
-add_executable(buddy-whisper-run whisper-main.cpp)
+set(BUDDY_WHISPER_FILES
+  whisper-main.h
+  whisper-main.cpp
+)
+
+add_executable(buddy-whisper-run ${BUDDY_WHISPER_FILES})
 target_link_directories(buddy-whisper-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR})
 
 set(BUDDY_WHISPER_LIBS
   WHISPER
+  BuddyLibDAP
   mlir_c_runner_utils
   omp
 )

diff --git a/examples/BuddyWhisper/README.md b/examples/BuddyWhisper/README.md
@@ -1,7 +1,7 @@
 # Buddy Compiler WHISPER Example
 
 ## Introduction
-This example shows how to use Buddy Compiler to compile a WHISPER model to MLIR code then run it.  The [model](openai/whisper-base) is a pre-trained model for automatic speech recognition (ASR) and speech translation.
+This example shows how to use Buddy Compiler to compile a WHISPER model to MLIR code then run it.  The [model](openai/whisper-base) is a pre-trained model for automatic speech recognition (ASR) and speech translation (ST).
 
 
 ## How to run
@@ -63,15 +63,13 @@ $ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
 $ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
 ```
 
-3. Set model and dataset environment variable.
+3. Set model environment variable.
 
 ```bash
 $ export WHISPER_MODEL_PATH=/path-to-whisper-model/
-$ export AUDIO_DATASET_PATH=/path-to-audio-dataset/
 
 // For example:
 $ export WHISPER_MODEL_PATH=/home/xxx/whisper-base
-$ export AUDIO_DATASET_PATH=/home/xxx/librispeech_asr_dummy
 ```
 
 4. Build and run the WHISPER example
@@ -83,4 +81,4 @@ $ cd bin
 $ ./buddy-whisper-run
 ```
 
-4. Enjoy it!
+5. Enjoy it!
diff --git a/examples/BuddyWhisper/import-whisper.py b/examples/BuddyWhisper/import-whisper.py
@@ -14,16 +14,15 @@
 #
 # ===---------------------------------------------------------------------------
 #
-# This is the example of whisper model.
+# This is an example for whisper model.
 #
 # ===---------------------------------------------------------------------------
 
 import os
 import torch
 import torch._dynamo as dynamo
 from torch._inductor.decomposition import decompositions as inductor_decomp
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from datasets import load_dataset
+from transformers import WhisperForConditionalGeneration
 import numpy
 
 from buddy.compiler.frontend import DynamoCompiler
@@ -34,27 +33,20 @@
 # Retrieve the Whisper model path from environment variables.
 model_path = os.environ.get("WHISPER_MODEL_PATH")
 if model_path is None:
-    raise EnvironmentError(
-        "The environment variable 'WHISPER_MODEL_PATH' is not set or is invalid."
-    )
+    model_path = "openai/whisper-base"
 
-# Initialize the tokenizer and model from the specified model path.
-processor = WhisperProcessor.from_pretrained(model_path)
+# Initialize the model from the specified model path.
 model = WhisperForConditionalGeneration.from_pretrained(model_path)
 model.config.use_cache = False
 
-dataset_path = os.environ.get("AUDIO_DATASET_PATH")
-ds = load_dataset(dataset_path, "clean", split="validation")
-sample = ds[1]["audio"]
-input_features = processor(
-    sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt"
-).input_features
-
-decoder_input_ids = torch.tensor([[50258] * 448], dtype=torch.long)
+# Generate placeholder for inputs.
+input_features = torch.zeros(size=(1, 80, 3000), dtype=torch.float32)
+decoder_input_ids = torch.zeros(size=(1, 448), dtype=torch.long)
 inputs = {
     "input_features": input_features,
     "decoder_input_ids": decoder_input_ids,
 }
+
 # Initialize Dynamo Compiler with specific configurations as an importer.
 dynamo_compiler = DynamoCompiler(
     primary_registry=tosa.ops_registry,

diff --git a/examples/BuddyWhisper/input_features.data b/examples/BuddyWhisper/input_features.data
diff --git a/examples/BuddyWhisper/whisper-main.cpp b/examples/BuddyWhisper/whisper-main.cpp
@@ -13,38 +13,17 @@
 // limitations under the License.
 //
 //===----------------------------------------------------------------------===//
+//
+// This file implements an example for Whisper Model Inference. 
+//
+// ------------------------------------------------------------------------===//
 
-#include <buddy/Core/Container.h>
-#include <buddy/LLM/TextContainer.h>
-#include <chrono>
-#include <cmath>
-#include <cstddef>
-#include <filesystem>
-#include <fstream>
-#include <iostream>
-using namespace buddy;
-
-constexpr size_t ParamsSize = 99148800;
-constexpr size_t MaxVocabSize = 51865;
-constexpr size_t MaxTokenLength = 448;
-constexpr size_t HiddenSize = 512;
-
-/// Declare Whisper forward function.
-extern "C" void _mlir_ciface_forward(MemRef<float, 3> *, MemRef<float, 1> *,
-                                     MemRef<float, 3> *, MemRef<size_t, 2> *);
+#include "whisper-main.h"
 
 // -----------------------------------------------------------------------------
 // Helper Functions
 // -----------------------------------------------------------------------------
 
-/// Capture input message.
-void getUserInput(std::string &inputStr) {
-  std::cout << "\nPlease send a message:" << std::endl;
-  std::cout << ">>> ";
-  getline(std::cin, inputStr);
-  std::cout << std::endl;
-}
-
 /// Print [Log] label in bold blue format.
 void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; }
 
@@ -83,30 +62,21 @@ void loadParameters(const std::string &paramFilePath,
             << std::endl;
 }
 
-void loadAudio(const std::string &paramFilePath, MemRef<float, 3> &params) {
-  const auto loadStart = std::chrono::high_resolution_clock::now();
-  std::ifstream paramFile(paramFilePath, std::ios::in | std::ios::binary);
-  if (!paramFile.is_open()) {
-    throw std::runtime_error("[Error] Failed to open input_features file!");
-  }
-  printLogLabel();
-  std::cout << "Loading input_features..." << std::endl;
+/// Calculate audioInput from rawAudioData.
+void runPreprocess(MemRef<double, 1> &rawAudioData,
+                   MemRef<float, 3> &audioFeatures) {
+  // Move data into container.                   
+  intptr_t dataShape[1] = {AudioDataLength};
+  rawAudioData = std::move(MemRef<double, 1>(rawSpeech, dataShape));
   printLogLabel();
-  std::cout << "input_features file: "
-            << std::filesystem::canonical(paramFilePath) << std::endl;
-
-  paramFile.read(reinterpret_cast<char *>(params.getData()),
-                 sizeof(float) * (params.getSize()));
-
-  if (paramFile.fail()) {
-    throw std::runtime_error("Error occurred while reading params file!");
-  }
-  paramFile.close();
+  std::cout << "Preprocessing audio..." << std::endl;
+  const auto loadStart = std::chrono::high_resolution_clock::now();
+  dap::whisperPreprocess(&rawAudioData, &audioFeatures);
   const auto loadEnd = std::chrono::high_resolution_clock::now();
   const std::chrono::duration<double, std::milli> loadTime =
       loadEnd - loadStart;
   printLogLabel();
-  std::cout << "input_features load time: " << (double)(loadTime.count()) / 1000
+  std::cout << "Audio preprocess time: " << (double)(loadTime.count()) / 1000
             << "s\n"
             << std::endl;
 }
@@ -129,14 +99,13 @@ int main() {
   /// Define directories of vacabulary and parameter file.
   const std::string vocabDir = "../../examples/BuddyWhisper/vocab.txt";
   const std::string paramsDir = "../../examples/BuddyWhisper/arg0.data";
-  const std::string input_featuresDir =
-      "../../examples/BuddyWhisper/input_features.data";
 
   /// Initialize data containers
   //  - Result container
   //  - Output container.
   //  - Parameters container.
   Text<size_t, 2> outputContainer;
+  MemRef<double, 1> rawAudioContainer({AudioDataLength});
   MemRef<float, 3> audioInput({1, 80, 3000});
   MemRef<float, 3> resultContainer[2] = {
       MemRef<float, 3>({1, 1500, 512}, false, 0),
@@ -148,9 +117,10 @@ int main() {
   /// Fill data into containers
   //  - Output: register vocabulary.
   //  - Parameters: load parameters from the `arg0` file into the container.
+  //  - Input: compute audioInput.
   outputContainer.loadVocab(vocabDir);
   loadParameters(paramsDir, paramsContainer);
-  loadAudio(input_featuresDir, audioInput);
+  runPreprocess(rawAudioContainer, audioInput);
 
   /// Run Whisper Inference
   //  - Perform the forward function.

diff --git a/examples/BuddyWhisper/whisper-main.h b/examples/BuddyWhisper/whisper-main.h
diff --git a/examples/DAPDialect/CMakeLists.txt b/examples/DAPDialect/CMakeLists.txt
@@ -47,3 +47,14 @@ add_dependencies(buddy-iir-vectorization buddy-opt)
 target_link_libraries(buddy-iir-vectorization
   BuddyLibDAPVectorization
 )
+
+#-------------------------------------------------------------------------------
+# Buddy DAP Dialect WhisperPreprocess Operation
+#-------------------------------------------------------------------------------
+
+add_executable(buddy-whisper-preprocess WhisperPreprocess.cpp)
+add_dependencies(buddy-whisper-preprocess buddy-opt)
+target_link_libraries(buddy-whisper-preprocess
+  BuddyLibDAP
+  mlir_c_runner_utils
+)
diff --git a/examples/DAPDialect/WhisperPreprocess.cpp b/examples/DAPDialect/WhisperPreprocess.cpp
diff --git a/frontend/Interfaces/buddy/DAP/DAP.h b/frontend/Interfaces/buddy/DAP/DAP.h
@@ -26,5 +26,6 @@
 #include "buddy/DAP/DSP/Biquad.h"
 #include "buddy/DAP/DSP/FIR.h"
 #include "buddy/DAP/DSP/IIR.h"
+#include "buddy/DAP/DSP/WhisperPreprocess.h"
 
 #endif // FRONTEND_INTERFACES_BUDDY_DAP_DAP
diff --git a/frontend/Interfaces/buddy/DAP/DSP/WhisperPreprocess.h b/frontend/Interfaces/buddy/DAP/DSP/WhisperPreprocess.h
@@ -0,0 +1,45 @@
+//===- WhisperPreprocess.h ------------------------------------------------===//
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+//
+// Header file for whisper preprocess operation in DAP dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FRONTEND_INTERFACES_BUDDY_DAP_DSP_WHISPERPREPROCESS
+#define FRONTEND_INTERFACES_BUDDY_DAP_DSP_WHISPERPREPROCESS
+
+#include "buddy/Core/Container.h"
+#include "buddy/DAP/AudioContainer.h"
+#include "buddy/DAP/DSP/IIRDesign.h"
+
+namespace dap {
+namespace detail {
+// Declare the whisper preprocess C interface.
+extern "C" {
+void _mlir_ciface_buddy_whisperPreprocess(MemRef<double, 1> *inputRawSpeech,
+                                          MemRef<float, 3> *outputFeatures);
+}
+} // namespace detail
+
+// Function for Whisper preprocess
+void whisperPreprocess(MemRef<double, 1> *inputRawSpeech,
+                       MemRef<float, 3> *outputFeatures) {
+  detail::_mlir_ciface_buddy_whisperPreprocess(inputRawSpeech, outputFeatures);
+}
+
+} // namespace dap
+
+#endif // FRONTEND_INTERFACES_BUDDY_DAP_DSP_WHISPERPREPROCESS
diff --git a/frontend/Interfaces/lib/CMakeLists.txt b/frontend/Interfaces/lib/CMakeLists.txt
@@ -59,7 +59,33 @@ add_custom_command(OUTPUT DAP.o
   DEPENDS buddy-opt
   )
 
-add_library(BuddyLibDAP STATIC DAP.o)
+  add_custom_command(OUTPUT DAP-extend.o
+  COMMAND ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP-extend.mlir 
+              -extend-dap
+              -one-shot-bufferize
+              -convert-linalg-to-loops
+              -convert-scf-to-cf
+              -expand-strided-metadata
+              -lower-affine
+              -convert-vector-to-llvm 
+              -memref-expand 
+              -arith-expand
+              -convert-arith-to-llvm
+              -finalize-memref-to-llvm 
+              -convert-math-to-llvm
+              -llvm-request-c-wrappers
+              -convert-func-to-llvm
+              -reconcile-unrealized-casts | 
+          ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
+          ${LLVM_MLIR_BINARY_DIR}/llc 
+              -mtriple=${BUDDY_TARGET_TRIPLE}
+              -mattr=${BUDDY_OPT_ATTR}
+              -filetype=obj -relocation-model=pic
+              -o ${CMAKE_CURRENT_BINARY_DIR}/DAP-extend.o
+  DEPENDS buddy-opt
+  )
+
+add_library(BuddyLibDAP STATIC DAP.o DAP-extend.o)
 
 SET_TARGET_PROPERTIES(BuddyLibDAP PROPERTIES
   LINKER_LANGUAGE CXX

diff --git a/frontend/Interfaces/lib/DAP-extend.mlir b/frontend/Interfaces/lib/DAP-extend.mlir
@@ -0,0 +1,4 @@
+func.func @buddy_whisperPreprocess(%in : memref<?xf64>, %out : memref<1x80x3000xf32>) -> () {
+  dap.whisper_preprocess %in, %out : memref<?xf64>, memref<1x80x3000xf32>
+  return
+}
diff --git a/midend/include/Dialect/DAP/DAPOps.td b/midend/include/Dialect/DAP/DAPOps.td
@@ -94,4 +94,24 @@ def DAP_IirOp : DAP_Op<"iir"> {
   }];
 }
 
+def DAP_WhisperPreprocessOp : DAP_Op<"whisper_preprocess"> {
+  let summary = [{Preprocessor for Whisper model, do features extraction for input audio.
+  Input MemRef stores the raw speech data, Output MemRef contains computed features with 
+  shape memref<1x80x3000xf32>.
+
+  ```mlir
+    dap.whisper_preprocess %input, %output : memref<?xf64>, memref<1x80x3000xf32>
+  ```
+  }];
+
+  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef, "inputMemref",
+                           [MemRead]>:$memrefI,
+                       Arg<AnyRankedOrUnrankedMemRef, "outputMemref",
+                           [MemRead]>:$memrefO);
+
+  let assemblyFormat = [{
+    $memrefI `,` $memrefO attr-dict `:` type($memrefI) `,` type($memrefO) 
+  }];
+}
+
 #endif // DAP_DAPOPS_TD
diff --git a/midend/lib/Conversion/CMakeLists.txt b/midend/lib/Conversion/CMakeLists.txt
@@ -3,6 +3,7 @@ add_subdirectory(LowerBud)
 add_subdirectory(LowerDIP)
 add_subdirectory(LowerRVV)
 add_subdirectory(LowerDAP)
+add_subdirectory(ExtendDAP)
 add_subdirectory(DAPVectorization)
 add_subdirectory(MatMulOptimization)
 add_subdirectory(TransposeOptimization)

diff --git a/midend/lib/Conversion/ExtendDAP/CMakeLists.txt b/midend/lib/Conversion/ExtendDAP/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_mlir_library(ExtendDAPPass
+  ExtendDAPPass.cpp
+  )