<a href="https://colab.research.google.com/github/battuzz/torch_aot/blob/main/TorchAOT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AOT Compilation of torch models

Install latest version of pytorch (CPU)

In [1]:
!pip3 install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

[0mLooking in indexes: https://download.pytorch.org/whl/cpu
Collecting sympy>=1.13.3 (from torch)
  Using cached https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Using cached https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl (6.2 MB)
[0mInstalling collected packages: sympy
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1+cpu which is incompatible.[0m[31m
[0mSuccessfully installed sympy


In [None]:
!pip install cmake

Import libraries

In [19]:
import torch
print(torch.__version__)

torch.set_default_dtype(torch.float64)

2.7.1+cpu


## Model definition

In [20]:
NUM_INPUTS = 5
NUM_OUTPUTS = 7
NUM_INDUCING_POINTS = 350

class ModelNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(NUM_INPUTS, 128)
        self.fc2 = torch.nn.Linear(128, 128)
        self.fc3 = torch.nn.Linear(128, NUM_OUTPUTS)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

def squared_distance(x1, x2):
    return (
        torch.sum(x1**2, dim=1, keepdim=True)
        + torch.sum(x2**2, dim=1)
        - 2 * torch.mm(x1, x2.t())
    )


def rbf_kernel(x1, x2, lengthscale=1.0):
    dist = squared_distance(x1 / lengthscale, x2 / lengthscale)
    return torch.exp(-0.5 * dist)

class ModelGPPosterior(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.lengthscales = torch.nn.Parameter(torch.randn(NUM_INPUTS))
        self.inducing_points = torch.nn.Parameter(
        torch.randn(NUM_INDUCING_POINTS, NUM_INPUTS)
        )
        self.alpha = torch.nn.Parameter(torch.randn(NUM_INDUCING_POINTS, NUM_OUTPUTS))

    def forward(self, x):
        Kuf = rbf_kernel(x, self.inducing_points, self.lengthscales)
        mean = Kuf @ self.alpha
        return mean

In [21]:
def train_with_random_data(model):
    X = torch.randn(1000, NUM_INPUTS)
    y = torch.randn(1000, NUM_OUTPUTS)

    model.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    for epoch in range(30):
        optimizer.zero_grad()
        output = model(X)
        loss = torch.nn.functional.mse_loss(output, y)
        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

In [22]:
model_nn = ModelNN()
model_gp = ModelGPPosterior()
train_with_random_data(model_nn)
train_with_random_data(model_gp)

Epoch 1, Loss: 1.0249337383042418
Epoch 2, Loss: 1.0240663061429183
Epoch 3, Loss: 1.023237230116913
Epoch 4, Loss: 1.0224466135230583
Epoch 5, Loss: 1.0216944885337036
Epoch 6, Loss: 1.0209787778836588
Epoch 7, Loss: 1.0202930912333692
Epoch 8, Loss: 1.0196385097524077
Epoch 9, Loss: 1.0190139914511325
Epoch 10, Loss: 1.018421251218499
Epoch 11, Loss: 1.0178573037013714
Epoch 12, Loss: 1.0173186258010265
Epoch 13, Loss: 1.0168017457016565
Epoch 14, Loss: 1.0163069628270254
Epoch 15, Loss: 1.015832781810941
Epoch 16, Loss: 1.0153746602166944
Epoch 17, Loss: 1.0149334798490608
Epoch 18, Loss: 1.0145104799240747
Epoch 19, Loss: 1.0141028128256495
Epoch 20, Loss: 1.013710964157435
Epoch 21, Loss: 1.0133321824577843
Epoch 22, Loss: 1.0129642900206837
Epoch 23, Loss: 1.012606444883554
Epoch 24, Loss: 1.0122576431917354
Epoch 25, Loss: 1.011916529581065
Epoch 26, Loss: 1.0115826434628437
Epoch 27, Loss: 1.011255069098299
Epoch 28, Loss: 1.0109339938948756
Epoch 29, Loss: 1.0106186913272117
E

In [35]:
example_input = torch.randn((1, NUM_INPUTS))

# Export NN
model_nn.eval()

exported = torch.export.export(model_nn, (example_input,))
torch._inductor.aoti_compile_and_package(
    exported,
    package_path="model_nn.pt2",
)
with open("model_nn_inputs.txt", "w") as f:
    f.write(
        f"{len(example_input.shape)} {' '.join(map(str, example_input.shape))}"
    )

model_gp.eval()

exported = torch.export.export(model_gp, (example_input,))
torch._inductor.aoti_compile_and_package(
    exported,
    package_path="model_gp.pt2",
)
with open("model_gp_inputs.txt", "w") as f:
    f.write(
        f"{len(example_input.shape)} {' '.join(map(str, example_input.shape))}"
    )

In [24]:
cmake_contents = """cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(aoti_example)

find_package(Torch REQUIRED)

add_executable(aoti_example inference.cpp)

target_link_libraries(aoti_example "${TORCH_LIBRARIES}")
set_property(TARGET aoti_example PROPERTY CXX_STANDARD 17)
"""
with open('CMakeLists.txt', 'w') as f:
    f.write(cmake_contents)

In [25]:
build_contents = """export CMAKE_PREFIX_PATH=/usr/local/lib/python3.11/dist-packages/torch/share/cmake
export TORCHINDUCTOR_FREEZING=1


rm -rf build
mkdir build
cmake -B build .
cmake --build build --config Release
"""
with open('build.sh', 'w') as f:
    f.write(build_contents)

In [26]:
!chmod +x ./build.sh

In [27]:
cpp_content = """#include <iostream>
#include <vector>
#include <chrono>
#include <fstream>

#include <torch/torch.h>
#include <torch/csrc/inductor/aoti_package/model_package_loader.h>

using namespace std::chrono;

int main(int argc, char* argv[]) {

    if (argc < 3) {
        std::cerr << "Usage: " << argv[0] << " <model.pt2> <inputs.txt>" << std::endl;
        return 1;
    }

    // Load input
    std::ifstream input_file{argv[2]};
    if (!input_file) {
        std::cerr << "Error opening input file: " << argv[2] << std::endl;
        return 1;
    }
    int num_dims {};
    input_file >> num_dims;

    std::vector<int64_t> input_dims{};
    for (int i = 0; i < num_dims; ++i) {
        int64_t dim_size;
        input_file >> dim_size;
        input_dims.push_back(dim_size);
    }

    input_file.close();

    // auto arrayRef = c10::makeArrayRef(input_dims);
    torch::Tensor input = torch::randn(input_dims, torch::dtype(torch::kFloat64));
    std::vector<torch::Tensor> inputs { input };

    c10::InferenceMode mode;
    torch::inductor::AOTIModelPackageLoader loader(argv[1], "model", false);


    // std::vector<torch::Tensor> inputs = {torch::randn({1, 2}, torch::dtype(torch::kFloat64))};

    // Warmup
    std::vector<torch::Tensor> outputs;
    for (int i = 0; i < 1000; i++) {
        outputs = loader.run(inputs);
    }

    // Benchmark
    auto start_time = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < 1000; i++) {
        outputs = loader.run(inputs);
    }
    auto end_time = std::chrono::high_resolution_clock::now();

    auto elapsed = duration_cast<microseconds>(end_time - start_time);
    std::cout << "Average inference time over 1000 runs: "
              << (elapsed.count() / 1000) << " us" << std::endl;

    return 0;
}
"""

with open('inference.cpp', 'w') as f:
    f.write(cpp_content)

In [28]:
!ls .

build	  CMakeLists.txt  model_nn_inputs.txt  sample_data
build.sh  inference.cpp   model_nn.pt2


In [29]:
!./build.sh

-- The C compiler identification is GNU 11.4.0
-- The CXX compiler identification is GNU 11.4.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
  static library kineto_LIBRARY-NOTFOUND not found.
Call Stack (most recent call first):
  /usr/local/lib/python3.11/dist-packages/torch/share/cmake/Torch/TorchConfig.cmake:125 (append_torchlib_if_found)
  CMakeLists.txt:4 (find_package)

[0m
-- Found Torch: /usr/local/lib/python3.11/dist-packages/torch/lib/libtorch.so
-- Configuring done (0.4s)
-- Generating done (0.0s)
-- Build files have been written to: /content/build
[ 50%] [32mBuilding CXX object CMakeFiles/aoti_example.dir/inferenc

In [34]:
!./build/aoti_example model_nn.pt2 model_nn_inputs.txt

Average inference time over 1000 runs: 12 us


In [38]:
!./build/aoti_example model_gp.pt2 model_gp_inputs.txt

Average inference time over 1000 runs: 14 us


# Try to export with derivatives / jacobian

In [51]:
class ModelWithJacobian(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model_ = model

    def forward(self, x):
        dy = torch.autograd.functional.jacobian(self.model_, x, create_graph=True)
        return dy


class ModelWithGrads(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model_ = model

    def forward(self, x):
        y = self.model_(x)
        dy = torch.autograd.grad(y, x, retain_graph=True, create_graph=True, )
        return dy


In [52]:
m = ModelWithJacobian(model_nn)

m.eval()

exported = torch.export.export(m, (example_input,))
torch._inductor.aoti_compile_and_package(
    exported,
    package_path="model_grads.pt2",
)

Unsupported: Failed to convert args/kwargs to proxy
  Explanation: Missing `as_proxy()` implementation for some arg/kwarg.


  Developer debug context: call_function args: NNModuleVariable() TensorVariable() ConstantVariable(bool: True)


from user code:
   File "/tmp/ipython-input-51-492552076.py", line 7, in forward
    dy = torch.autograd.functional.jacobian(self.model_, x, create_graph=True)

Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"


In [53]:
m = ModelWithGrads(model_nn)

m.eval()

exported = torch.export.export(m, (example_input,))
torch._inductor.aoti_compile_and_package(
    exported,
    package_path="model_grads.pt2",
)

Unsupported: Attempted to call function marked as skipped
  Explanation: Dynamo developers have intentionally marked that the function `grad` in file `/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py` should not be traced.
  Hint: Avoid calling the function `grad`.
  Hint: Remove the function `grad` or the file `/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py` from torch/_dynamo/trace_rules.py. More graph breaks may occur as a result of attempting to trace into the function.
  Hint: Please file an issue to PyTorch.

  Developer debug context: module: torch.autograd, qualname: grad, skip reason: <missing reason>


from user code:
   File "/tmp/ipython-input-51-492552076.py", line 18, in forward
    dy = torch.autograd.grad(y, x, retain_graph=True, create_graph=True, )

Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
