In [1]:
import torch
from torch import nn
import os

torch.set_num_threads(1)

In [2]:
import numpy as np

def save_to(base, x, y):
    np_input = x.detach().numpy()
    with open(os.path.join(base, 'dummy_in.dat'), 'wb') as f:
        np_input.tofile(f)

    np_output = y.detach().numpy()
    with open(os.path.join(base, 'dummy_out.dat'), 'wb') as f:
        np_output.tofile(f)

In [2]:
conv = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False)
)

In [3]:
x = torch.rand(10000, 1, 28, 28)

In [4]:
%%time

y = conv(x)

CPU times: user 752 ms, sys: 1.5 s, total: 2.26 s
Wall time: 204 ms


In [5]:
input_names = ["image"]
output_names = ["prediction"]
dummy_input = torch.randn(10000, 1, 28, 28)
torch.onnx.export(conv,
                  dummy_input,
                  "conv/convolution1.onnx",
                  verbose=False,
                  input_names=input_names,
                  output_names=output_names)

In [3]:
conv2 = nn.Conv2d(64, 64, kernel_size=1, stride=1, padding=0, bias=False)

In [4]:
x = torch.rand(128, 64, 224, 224)

In [5]:
%%time
y = conv2(x)
y.size()

CPU times: user 2.43 s, sys: 1.82 s, total: 4.25 s
Wall time: 4.25 s


torch.Size([128, 64, 224, 224])

In [6]:
input_names = ["image"]
output_names = ["prediction"]
dummy_input = torch.randn(128, 64, 224, 224)
torch.onnx.export(conv2,
                  dummy_input,
                  "conv/conv_opt.onnx",
                  verbose=False,
                  input_names=input_names,
                  output_names=output_names)

In [12]:
r2 = nn.ReLU()

In [13]:
%%time
y = r2(y)
y.size()

CPU times: user 508 ms, sys: 1.62 s, total: 2.13 s
Wall time: 136 ms


torch.Size([10000, 64, 24, 24])

In [18]:
input_names = ["image"]
output_names = ["prediction"]
dummy_input = torch.randn(10000, 64, 24, 24)
torch.onnx.export(r2,
                  dummy_input,
                  "relu/r2.onnx",
                  verbose=False,
                  input_names=input_names,
                  output_names=output_names)

In [15]:
mp = nn.MaxPool2d(kernel_size=2, stride=2)

In [16]:
%%time
y = mp(y)
y.size()

CPU times: user 2.6 s, sys: 500 ms, total: 3.1 s
Wall time: 229 ms


torch.Size([10000, 64, 12, 12])

In [19]:
input_names = ["image"]
output_names = ["prediction"]
dummy_input = torch.randn(10000, 64, 24, 24)
torch.onnx.export(mp,
                  dummy_input,
                  "maxpool/maxpool.onnx",
                  verbose=False,
                  input_names=input_names,
                  output_names=output_names)

In [20]:
softmax = nn.LogSoftmax(dim=1)
x = torch.rand(10000, 10)

In [21]:
%%time
y = softmax(x)
y.size()

CPU times: user 80 ms, sys: 200 ms, total: 280 ms
Wall time: 208 ms


torch.Size([10000, 10])

In [23]:
input_names = ["image"]
output_names = ["prediction"]
dummy_input = torch.randn(10000, 10)
torch.onnx.export(softmax,
                  dummy_input,
                  "Lsm/lsm.onnx",
                  verbose=False,
                  input_names=input_names,
                  output_names=output_names)

In [3]:
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=2, batch_first=False)
x = torch.rand(7 * 24 * 6, 64, 512)
print(encoder_layer)

TransformerEncoderLayer(
  (self_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (linear1): Linear(in_features=512, out_features=2048, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (linear2): Linear(in_features=2048, out_features=512, bias=True)
  (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
)


In [4]:
%%time

y = encoder_layer(x)
y.size()

CPU times: user 14.5 s, sys: 2.89 s, total: 17.4 s
Wall time: 17.4 s


torch.Size([1008, 64, 512])

In [5]:
save_to('attention', x, y)

In [6]:
input_names = ["series"]
output_names = ["prediction"]
dummy_input = x
torch.onnx.export(encoder_layer,
                  dummy_input,
                  "attention/encoder.onnx",
                  verbose=True,
                  input_names=input_names,
                  output_names=output_names)

Exported graph: graph(%series : Float(1008, 64, 512, strides=[32768, 512, 1], requires_grad=0, device=cpu),
      %self_attn.in_proj_bias : Float(1536, strides=[1], requires_grad=1, device=cpu),
      %self_attn.out_proj.weight : Float(512, 512, strides=[512, 1], requires_grad=1, device=cpu),
      %self_attn.out_proj.bias : Float(512, strides=[1], requires_grad=1, device=cpu),
      %linear1.bias : Float(2048, strides=[1], requires_grad=1, device=cpu),
      %linear2.bias : Float(512, strides=[1], requires_grad=1, device=cpu),
      %norm1.weight : Float(512, strides=[1], requires_grad=1, device=cpu),
      %onnx::MatMul_121 : Float(512, 1536, strides=[1, 512], requires_grad=0, device=cpu),
      %onnx::MatMul_141 : Float(512, 2048, strides=[1, 512], requires_grad=0, device=cpu),
      %onnx::MatMul_142 : Float(2048, 512, strides=[1, 2048], requires_grad=0, device=cpu)):
  %norm2.bias : Float(512, strides=[1], requires_grad=1, device=cpu) = onnx::Identity(%self_attn.out_proj.bias)
  %

In [3]:
decoder_layer = nn.TransformerDecoderLayer(d_model=768, nhead=8, dim_feedforward=3072)
print(decoder_layer)

TransformerDecoderLayer(
  (self_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
  )
  (multihead_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
  )
  (linear1): Linear(in_features=768, out_features=3072, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (linear2): Linear(in_features=3072, out_features=768, bias=True)
  (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (norm3): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (dropout3): Dropout(p=0.1, inplace=False)
)
