##### Copyright 2023 The IREE Authors

In [1]:
#@title Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

# <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/1/10/PyTorch_logo_icon.svg/640px-PyTorch_logo_icon.svg.png" height="20px"> PyTorch Ahead-of-time (AOT) export workflows using <img src="https://raw.githubusercontent.com/iree-org/iree/main/docs/website/docs/assets/images/ghost.svg" height="20px"> IREE

This notebook shows how to use [SHARK-Turbine](https://github.com/nod-ai/SHARK-Turbine) for export from a PyTorch session to [IREE](https://github.com/iree-org/iree), leveraging [torch-mlir](https://github.com/llvm/torch-mlir) under the covers.

SHARK-Turbine contains both a "simple" AOT exporter and an underlying advanced
API for complicated models and full feature availability. This notebook only
uses the "simple" exporter.

## Setup

In [2]:
%%capture
#@title Uninstall existing packages
#   This avoids some warnings when installing specific PyTorch packages below.
!python -m pip uninstall -y fastai torchaudio torchdata torchtext torchvision

In [3]:
#@title Install Pytorch 2.3.0 (prerelease)
!python -m pip install --pre --index-url https://download.pytorch.org/whl/test/cpu --upgrade torch==2.3.0

Looking in indexes: https://download.pytorch.org/whl/test/cpu
Collecting torch==2.3.0
  Downloading https://download.pytorch.org/whl/test/cpu/torch-2.3.0%2Bcpu-cp310-cp310-linux_x86_64.whl (190.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.4/190.4 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 2.2.1+cu121
    Uninstalling torch-2.2.1+cu121:
      Successfully uninstalled torch-2.2.1+cu121
Successfully installed torch-2.3.0+cpu


In [4]:
#@title Install iree-turbine

!python -m pip install iree-turbine

Collecting iree-turbine
  Downloading iree_turbine-2.3.0rc20240410-py3-none-any.whl (150 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.4/150.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting iree-compiler>=20240410.859 (from iree-turbine)
  Downloading iree_compiler-20240410.859-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (64.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.4/64.4 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting iree-runtime>=20240410.859 (from iree-turbine)
  Downloading iree_runtime-20240410.859-cp310-cp310-manylinux_2_28_x86_64.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: iree-runtime, iree-compiler, iree-turbine
Successfully installed iree-compiler-20240410.859 iree-runtime-20240410.859 iree-turbine-2.3.0rc20240410


In [5]:
#@title Report version information
!echo "Installed iree-turbine, $(python -m pip show iree_turbine | grep Version)"

!echo -e "\nInstalled IREE, compiler version information:"
!iree-compile --version

import torch
print("\nInstalled PyTorch, version:", torch.__version__)

Installed iree-turbine, Version: 2.3.0rc20240410

Installed IREE, compiler version information:
IREE (https://iree.dev):
  IREE compiler version 20240410.859 @ b4273a4bfc66ba6dd8f62f6483d74d42a7b936f1
  LLVM version 19.0.0git
  Optimized build

Installed PyTorch, version: 2.3.0+cpu


## Sample AOT workflow

1. Define a program using `torch.nn.Module`
2. Export the program using `aot.export()`
3. Compile to a deployable artifact
  * a: By staying within a Python session
  * b: By outputting MLIR and continuing using native tools

Useful documentation:

* [PyTorch Modules](https://pytorch.org/docs/stable/notes/modules.html) (`nn.Module`) as building blocks for stateful computation
* IREE compiler and runtime [Python bindings](https://www.iree.dev/reference/bindings/python/)

In [6]:
#@title 1. Define a program using `torch.nn.Module`
torch.manual_seed(0)

class LinearModule(torch.nn.Module):
  def __init__(self, in_features, out_features):
    super().__init__()
    self.weight = torch.nn.Parameter(torch.randn(in_features, out_features))
    self.bias = torch.nn.Parameter(torch.randn(out_features))

  def forward(self, input):
    return (input @ self.weight) + self.bias

linear_module = LinearModule(4, 3)

In [None]:
#@title 2. Export the program using `aot.export()`
import shark_turbine.aot as aot

example_arg = torch.randn(4)
export_output = aot.export(linear_module, example_arg)

In [8]:
#@title 3a. Compile fully to a deployable artifact, in our existing Python session

# Staying in Python gives the API a chance to reuse memory, improving
# performance when compiling large programs.

compiled_binary = export_output.compile(save_to=None)

# Use the IREE runtime API to test the compiled program.
import numpy as np
import iree.runtime as ireert

config = ireert.Config("local-task")
vm_module = ireert.load_vm_module(
    ireert.VmModule.wrap_buffer(config.vm_instance, compiled_binary.map_memory()),
    config,
)

input = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
result = vm_module.main(input)
print(result.to_host())

[ 1.4178505 -1.2343317 -7.4767942]


In [9]:
#@title 3b. Output MLIR then continue from Python or native tools later

# Leaving Python allows for file system checkpointing and grants access to
# native development workflows.

mlir_file_path = "/tmp/linear_module_pytorch.mlirbc"
vmfb_file_path = "/tmp/linear_module_pytorch_llvmcpu.vmfb"

print("Exported .mlir:")
export_output.print_readable()
export_output.save_mlir(mlir_file_path)

print("Compiling and running...")
!iree-compile --iree-input-type=torch --iree-hal-target-backends=llvm-cpu {mlir_file_path} -o {vmfb_file_path}
!iree-run-module --module={vmfb_file_path} --device=local-task --input="4xf32=[1.0, 2.0, 3.0, 4.0]"

Exported .mlir:
module @module {
  func.func @main(%arg0: !torch.vtensor<[4],f32>) -> !torch.vtensor<[3],f32> {
    %int0 = torch.constant.int 0
    %0 = torch.aten.unsqueeze %arg0, %int0 : !torch.vtensor<[4],f32>, !torch.int -> !torch.vtensor<[1,4],f32>
    %1 = torch.vtensor.literal(dense_resource<torch_tensor_4_3_torch.float32> : tensor<4x3xf32>) : !torch.vtensor<[4,3],f32>
    %2 = torch.aten.mm %0, %1 : !torch.vtensor<[1,4],f32>, !torch.vtensor<[4,3],f32> -> !torch.vtensor<[1,3],f32>
    %int0_0 = torch.constant.int 0
    %3 = torch.aten.squeeze.dim %2, %int0_0 : !torch.vtensor<[1,3],f32>, !torch.int -> !torch.vtensor<[3],f32>
    %4 = torch.vtensor.literal(dense_resource<torch_tensor_3_torch.float32> : tensor<3xf32>) : !torch.vtensor<[3],f32>
    %int1 = torch.constant.int 1
    %5 = torch.aten.add.Tensor %3, %4, %int1 : !torch.vtensor<[3],f32>, !torch.vtensor<[3],f32>, !torch.int -> !torch.vtensor<[3],f32>
    return %5 : !torch.vtensor<[3],f32>
  }
}

{-#
  dialect_resources: {