In [1]:
import numpy as np

import torch
import torch.nn as nn

In [4]:
class AlexNet(nn.Module):

    def __init__(self, num_classes: int = 1000) -> None:
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [5]:
model = AlexNet()

In [6]:
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

In [7]:
input_names

['actual_input_1',
 'learned_0',
 'learned_1',
 'learned_2',
 'learned_3',
 'learned_4',
 'learned_5',
 'learned_6',
 'learned_7',
 'learned_8',
 'learned_9',
 'learned_10',
 'learned_11',
 'learned_12',
 'learned_13',
 'learned_14',
 'learned_15']

In [8]:
output_names

['output1']

In [9]:
dummy_input = torch.randn(10, 3, 224, 224, device="cpu")

In [10]:
torch.onnx.export(model, dummy_input, "alexnet.onnx", verbose=True, 
                  input_names=input_names, output_names=output_names)

graph(%actual_input_1 : Float(10:150528, 3:50176, 224:224, 224:1, requires_grad=0, device=cpu),
      %learned_0 : Float(64:363, 3:121, 11:11, 11:1, requires_grad=1, device=cpu),
      %learned_1 : Float(64:1, requires_grad=1, device=cpu),
      %learned_2 : Float(192:1600, 64:25, 5:5, 5:1, requires_grad=1, device=cpu),
      %learned_3 : Float(192:1, requires_grad=1, device=cpu),
      %learned_4 : Float(384:1728, 192:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %learned_5 : Float(384:1, requires_grad=1, device=cpu),
      %learned_6 : Float(256:3456, 384:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %learned_7 : Float(256:1, requires_grad=1, device=cpu),
      %learned_8 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %learned_9 : Float(256:1, requires_grad=1, device=cpu),
      %learned_10 : Float(4096:9216, 9216:1, requires_grad=1, device=cpu),
      %learned_11 : Float(4096:1, requires_grad=1, device=cpu),
      %learned_12 : Float(4096:4096, 4096:1, 

In [11]:
import onnx

# Load the ONNX model
model = onnx.load("alexnet.onnx")

In [13]:
type(model)

onnx.onnx_ml_pb2.ModelProto

In [14]:
# Check that the IR is well formed
onnx.checker.check_model(model)

In [15]:
# Print a human readable representation of the graph
onnx.helper.printable_graph(model.graph)

'graph torch-jit-export (\n  %actual_input_1[FLOAT, 10x3x224x224]\n) initializers (\n  %learned_0[FLOAT, 64x3x11x11]\n  %learned_1[FLOAT, 64]\n  %learned_10[FLOAT, 4096x9216]\n  %learned_11[FLOAT, 4096]\n  %learned_12[FLOAT, 4096x4096]\n  %learned_13[FLOAT, 4096]\n  %learned_14[FLOAT, 1000x4096]\n  %learned_15[FLOAT, 1000]\n  %learned_2[FLOAT, 192x64x5x5]\n  %learned_3[FLOAT, 192]\n  %learned_4[FLOAT, 384x192x3x3]\n  %learned_5[FLOAT, 384]\n  %learned_6[FLOAT, 256x384x3x3]\n  %learned_7[FLOAT, 256]\n  %learned_8[FLOAT, 256x256x3x3]\n  %learned_9[FLOAT, 256]\n) {\n  %17 = Conv[dilations = [1, 1], group = 1, kernel_shape = [11, 11], pads = [2, 2, 2, 2], strides = [4, 4]](%actual_input_1, %learned_0, %learned_1)\n  %18 = Relu(%17)\n  %19 = MaxPool[kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [2, 2]](%18)\n  %20 = Conv[dilations = [1, 1], group = 1, kernel_shape = [5, 5], pads = [2, 2, 2, 2], strides = [1, 1]](%19, %learned_2, %learned_3)\n  %21 = Relu(%20)\n  %22 = MaxPool[kernel

# Tracing vs Scripting
- `Trace-base`: 모델을 한 번 실행하고 이 실행 중에 실제로 실행된 연산자를 내보내는 방식으로 작동
    - 모델이 동적인 경우(입력 데이터에 따라 동작이 변경되는 경우) export가 정확하지 않음.
    - 특정 입력 크기에 대해서만 유효 (trace시 명시적 입력이 필요한 이유)
    - 모델에 for loop, if condition과 같은 제어 흐름이 포함된 경우, loop를 풀고 if condition은 처음과 동일한 `정적 그래프`를 내보냄
    - 동적 제어 흐름을 사용하려면 Script 기반 export를 사용해야 함
- `Script-base`: 내보내려는 모델이 `ScriptModule`.
    - pytorch 코드에서 직렬화 및 최적화 가능한 모델을 생성하는 python 언어의 하위 집합

`trace-based exporter`, for loop이 풀린 ONNX Graph를 얻음

In [1]:
import torch

# Trace-based only

class LoopModel(torch.nn.Module):
    def forward(self, x, y):
        for i in range(y):
            x = x + i
        return x

model = LoopModel()
dummy_input = torch.ones(2, 3, dtype=torch.long)
loop_count = torch.tensor(5, dtype=torch.long)

torch.onnx.export(model, (dummy_input, loop_count), 'loop.onnx', verbose=True)

graph(%0 : Long(2:3, 3:1, requires_grad=0, device=cpu)):
  %2 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={0}]()
  %3 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Add(%0, %2)
  %4 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={1}]()
  %5 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Add(%3, %4)
  %6 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={2}]()
  %7 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Add(%5, %6)
  %8 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={3}]()
  %9 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Add(%7, %8)
  %10 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={4}]()
  %11 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Add(%9, %10)
  return (%11)



  import sys


동적 loop을 포착하기 위해 `script-based exporter`를 사용하여 일반 nn.Module처럼 호출할 수 있음

In [3]:
# Mixing tracing and scripting

@torch.jit.script
def loop(x, y):
    for i in range(int(y)):
        x = x + i
    return x

class LoopModel2(torch.nn.Module):
    def forward(self, x, y):
        return loop(x, y)

model = LoopModel2()
dummy_input = torch.ones(2, 3, dtype=torch.long)
loop_count = torch.tensor(5, dtype=torch.long)
torch.onnx.export(model, (dummy_input, loop_count), 'loop.onnx', verbose=True,
                  input_names=['input_data', 'loop_range'])

graph(%input_data : Long(2:3, 3:1, requires_grad=0, device=cpu),
      %loop_range : Long(requires_grad=0, device=cpu),
      %10 : Bool(requires_grad=0, device=cpu)):
  %2 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={1}]()
  %4 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Loop(%loop_range, %10, %input_data) # <ipython-input-3-e5e16e1b7555>:5:4
    block0(%i.1 : Long(device=cpu), %cond : bool, %x.6 : Long(2:3, 3:1, requires_grad=0, device=cpu)):
      %8 : LongTensor = onnx::Add(%x.6, %i.1) # <ipython-input-3-e5e16e1b7555>:6:12
      %9 : bool = onnx::Cast[to=9](%2)
      -> (%9, %8)
  return (%4)



위는 동적 흐름 제어가 정확히 포착됨. 아래 예제로 확인해보자

In [None]:
import caffe2.python.onnx.backend as backend
import numpy as np
import onnx
model = onnx.load('loop.onnx')

rep = backend.prepare(model)
outputs = rep.run((dummy_input.numpy(), np.array(9).astype(np.int64)))
print(outputs[0])
#[[37 37 37]
# [37 37 37]]


import onnxruntime as ort
ort_sess = ort.InferenceSession('loop.onnx')
outputs = ort_sess.run(None, {'input_data': dummy_input.numpy(),
                              'loop_range': np.array(9).astype(np.int64)})
print(outputs)
#[array([[37, 37, 37],
#       [37, 37, 37]], dtype=int64)]

In [None]:
class Module(torch.nn.Module):
    def forward(self, x, tup):
        # type: (int, Tuple[Tensor, Tensor]) -> Tensor
        t0, t1 = tup
        return t0 + t1 + x

model = LoopModel2()
torch.onnx.export(model, (dummy_input, loop_count), 'loop.onnx', verbose=True,
                  input_names=['input_data', 'loop_range'])