In [1]:
import warnings
import onnx
import torch
import torch.onnx
warnings.filterwarnings("ignore")

from model import ReceiptOCR_DefaultModel

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


## Load model network and weight

In [3]:
detector_cfg = 'text_detector/craft_config.yaml'
recognizer_cfg = 'text_recognizer/star_config.yaml'

model = ReceiptOCR_DefaultModel(detector_cfg, recognizer_cfg)

Loading weights from checkpoint (text_detector/model/craft_mlt_25k.pth)
Loading weights from checkpoint (text_recognizer/model/TPS-ResNet-BiLSTM-Attn-case-sensitive.pth)


# Detector

## Exporter Model
Batch Size X Channel X Height X Width

In [4]:
detector_dummy_input = torch.randn(1, 3, 1280, 720)

In [5]:
model.detector(detector_dummy_input)

(tensor([[[[0.0023, 0.0009],
           [0.0194, 0.0019],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0031, 0.0016],
           [0.0010, 0.0002]],
 
          [[0.0060, 0.0017],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          [[0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          ...,
 
          [[0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          [[0.0065, 0.0015],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0019, 0.0006]],
 
          [[0.0082

In [6]:
out_detector_model = 'text_detector/model/craft.onnx'

In [10]:
# Export the model
torch.onnx.export(model.detector,            
                  detector_dummy_input,
                  out_detector_model,
                  export_params=True,
                  opset_version=13,
                  do_constant_folding=True,
                  input_names = ['input'],
                  output_names = ['output'],
                  dynamic_axes={'input' : {0:'batch_size', 2:'height', 3:'width'},
                                'output' : {0:'batch_size'}})

## Inspecting Model

In [11]:
# Load the ONNX model
onnx_model = onnx.load(out_detector_model)

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(onnx_model.graph))

graph torch-jit-export (
  %input[FLOAT, batch_sizex3xheightxwidth]
) initializers (
  %basenet.slice5.1.weight[FLOAT, 1024x512x3x3]
  %basenet.slice5.1.bias[FLOAT, 1024]
  %basenet.slice5.2.weight[FLOAT, 1024x1024x1x1]
  %basenet.slice5.2.bias[FLOAT, 1024]
  %conv_cls.0.weight[FLOAT, 32x32x3x3]
  %conv_cls.0.bias[FLOAT, 32]
  %conv_cls.2.weight[FLOAT, 32x32x3x3]
  %conv_cls.2.bias[FLOAT, 32]
  %conv_cls.4.weight[FLOAT, 16x32x3x3]
  %conv_cls.4.bias[FLOAT, 16]
  %conv_cls.6.weight[FLOAT, 16x16x1x1]
  %conv_cls.6.bias[FLOAT, 16]
  %conv_cls.8.weight[FLOAT, 2x16x1x1]
  %conv_cls.8.bias[FLOAT, 2]
  %299[FLOAT, 64x3x3x3]
  %300[FLOAT, 64]
  %302[FLOAT, 64x64x3x3]
  %303[FLOAT, 64]
  %305[FLOAT, 128x64x3x3]
  %306[FLOAT, 128]
  %308[FLOAT, 128x128x3x3]
  %309[FLOAT, 128]
  %311[FLOAT, 256x128x3x3]
  %312[FLOAT, 256]
  %314[FLOAT, 256x256x3x3]
  %315[FLOAT, 256]
  %317[FLOAT, 256x256x3x3]
  %318[FLOAT, 256]
  %320[FLOAT, 512x256x3x3]
  %321[FLOAT, 512]
  %323[FLOAT, 512x512x3x3]
  %324[FLOAT

# Recognizer

ERROR UNSOLVED BY CREATOR
https://github.com/pytorch/pytorch/issues/27212

## Exporter Model
Batch Size X Channel X Height X Width

In [12]:
recognizer_dummy_input = torch.randn(100, 1, 32, 100)
recognizer_dummy_text = torch.LongTensor(100, 26).fill_(0)

In [13]:
model.recognizer.module(recognizer_dummy_input, recognizer_dummy_text)

tensor([[[ -9.1567,  -7.9548,  -7.0153,  ...,  -7.5213,  -8.4923,  -6.2916],
         [-12.9125, -12.2800,  -8.6195,  ..., -12.6158, -13.8953, -13.2493],
         [-16.7911, -15.0270, -15.7007,  ..., -16.3188, -16.9956, -16.5788],
         ...,
         [-14.0376,  -5.1481, -14.6025,  ..., -14.0541, -13.3346, -14.8601],
         [-14.0561,  -5.1797, -14.5754,  ..., -14.0733, -13.3462, -14.8630],
         [-14.0494,  -5.1462, -14.5524,  ..., -14.0671, -13.3431, -14.8623]],

        [[ -7.7614,  -8.7713,  -6.1116,  ...,  -8.8747,  -7.9698,  -7.6302],
         [-13.9064, -14.0955, -12.4275,  ..., -13.6737, -13.9023, -13.5223],
         [-15.8626, -14.9347, -14.2942,  ..., -14.2057, -15.4003, -15.4908],
         ...,
         [-13.0832,  -5.3524, -12.0100,  ..., -14.2358, -13.3563, -13.7593],
         [-13.0650,  -5.3422, -12.0023,  ..., -14.2119, -13.3418, -13.7275],
         [-13.0510,  -5.3449, -11.9959,  ..., -14.1940, -13.3300, -13.6997]],

        [[ -7.9671,  -7.8509,  -7.1226,  ...

In [14]:
out_recognizer_model = 'text_recognizer/model/star.onnx'

In [15]:
# Export the model
torch.onnx.export(model.recognizer.module,            
                  (recognizer_dummy_input, recognizer_dummy_text),
                  out_recognizer_model,
                  export_params=True,
                  opset_version=13,
                  do_constant_folding=True,
                  input_names = ['input'],
                  output_names = ['output'],
                  dynamic_axes={'input' : {0:'batch_size', 2:'height', 3:'width'},
                                'output' : {0:'batch_size'}})

RuntimeError: Exporting the operator grid_sampler to ONNX opset version 13 is not supported. Please feel free to request support or submit a pull request on PyTorch GitHub.

## Inspecting Model

In [None]:
# Load the ONNX model
onnx_model = onnx.load(out_recognizer_model)

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(onnx_model.graph))