In [1]:
import warnings
import onnx
import torch
import torch.onnx
warnings.filterwarnings("ignore")

from src.model import ReceiptOCR_DefaultModel

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


## Load model network and weight

In [3]:
detector_cfg = 'configs/craft_config.yaml'
recognizer_cfg = 'configs/star_config.yaml'

model = ReceiptOCR_DefaultModel(detector_cfg, recognizer_cfg)

Loading weights from checkpoint (models/text_detector/craft_mlt_25k.pth)
Loading weights from checkpoint (models/text_recognizer/TPS-ResNet-BiLSTM-Attn-case-sensitive.pth)


# Detector

## Exporter Model
Batch Size X Channel X Height X Width

In [4]:
detector_dummy_input = torch.randn(1, 3, 1280, 720)

In [5]:
model.detector(detector_dummy_input)

(tensor([[[[0.0010, 0.0002],
           [0.0263, 0.0034],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          [[0.0010, 0.0002],
           [0.0017, 0.0003],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          [[0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          ...,
 
          [[0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0010, 0.0002]],
 
          [[0.0014, 0.0003],
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           ...,
           [0.0010, 0.0002],
           [0.0010, 0.0002],
           [0.0072, 0.0023]],
 
          [[0.0086

In [6]:
out_detector_model = 'models/text_detector/craft.onnx'

In [7]:
# Export the model
torch.onnx.export(model.detector,            
                  detector_dummy_input,
                  out_detector_model,
                  export_params=True,
                  opset_version=13,
                  do_constant_folding=True,
                  input_names = ['input'],
                  output_names = ['output'],
                  dynamic_axes={'input' : {0:'batch_size', 2:'height', 3:'width'},
                                'output' : {0:'batch_size'}})

## Inspecting Model

In [8]:
# Load the ONNX model
onnx_model = onnx.load(out_detector_model)

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(onnx_model.graph))

graph torch-jit-export (
  %input[FLOAT, batch_sizex3xheightxwidth]
) initializers (
  %basenet.slice5.1.weight[FLOAT, 1024x512x3x3]
  %basenet.slice5.1.bias[FLOAT, 1024]
  %basenet.slice5.2.weight[FLOAT, 1024x1024x1x1]
  %basenet.slice5.2.bias[FLOAT, 1024]
  %conv_cls.0.weight[FLOAT, 32x32x3x3]
  %conv_cls.0.bias[FLOAT, 32]
  %conv_cls.2.weight[FLOAT, 32x32x3x3]
  %conv_cls.2.bias[FLOAT, 32]
  %conv_cls.4.weight[FLOAT, 16x32x3x3]
  %conv_cls.4.bias[FLOAT, 16]
  %conv_cls.6.weight[FLOAT, 16x16x1x1]
  %conv_cls.6.bias[FLOAT, 16]
  %conv_cls.8.weight[FLOAT, 2x16x1x1]
  %conv_cls.8.bias[FLOAT, 2]
  %299[FLOAT, 64x3x3x3]
  %300[FLOAT, 64]
  %302[FLOAT, 64x64x3x3]
  %303[FLOAT, 64]
  %305[FLOAT, 128x64x3x3]
  %306[FLOAT, 128]
  %308[FLOAT, 128x128x3x3]
  %309[FLOAT, 128]
  %311[FLOAT, 256x128x3x3]
  %312[FLOAT, 256]
  %314[FLOAT, 256x256x3x3]
  %315[FLOAT, 256]
  %317[FLOAT, 256x256x3x3]
  %318[FLOAT, 256]
  %320[FLOAT, 512x256x3x3]
  %321[FLOAT, 512]
  %323[FLOAT, 512x512x3x3]
  %324[FLOAT

# Recognizer

ERROR UNSOLVED BY CREATOR
https://github.com/pytorch/pytorch/issues/27212

## Exporter Model
Batch Size X Channel X Height X Width

In [9]:
recognizer_dummy_input = torch.randn(100, 1, 32, 100)
recognizer_dummy_text = torch.LongTensor(100, 26).fill_(0)

In [10]:
model.recognizer.module(recognizer_dummy_input, recognizer_dummy_text)

tensor([[[ -8.7121, -10.4233,  -9.7408,  ...,  -7.8442,  -8.5406,  -6.8858],
         [-14.0378, -15.5630, -16.3955,  ..., -12.0814, -12.9617, -13.4963],
         [-16.6610, -17.7068, -16.6028,  ..., -15.9874, -15.5287, -16.0990],
         ...,
         [-13.1663,  -2.5138, -12.0675,  ..., -13.4109, -13.2892, -13.2974],
         [-13.1462,  -2.4805, -11.9805,  ..., -13.4278, -13.2709, -13.2835],
         [-13.1181,  -2.4402, -11.8769,  ..., -13.4378, -13.2335, -13.2671]],

        [[ -8.2434, -10.8703,  -6.9456,  ...,  -7.5715,  -7.8886,  -5.5564],
         [-14.0342, -14.6697, -11.6138,  ..., -12.8473, -14.2496, -13.7790],
         [-17.8563, -19.0249, -15.7666,  ..., -18.3419, -19.9705, -17.4715],
         ...,
         [-14.2385,  -5.2659, -14.6901,  ..., -14.4494, -13.3195, -14.6651],
         [-14.0618,  -5.1896, -14.4138,  ..., -14.3364, -13.1216, -14.4995],
         [-13.8816,  -5.1824, -14.1655,  ..., -14.1733, -12.9041, -14.3783]],

        [[ -8.3754,  -9.2564,  -8.1864,  ...

In [11]:
out_recognizer_model = 'models/text_recognizer/star.onnx'

In [12]:
# Export the model
torch.onnx.export(model.recognizer.module,            
                  (recognizer_dummy_input, recognizer_dummy_text),
                  out_recognizer_model,
                  export_params=True,
                  opset_version=13,
                  do_constant_folding=True,
                  input_names = ['input'],
                  output_names = ['output'],
                  dynamic_axes={'input' : {0:'batch_size', 2:'height', 3:'width'},
                                'output' : {0:'batch_size'}})

RuntimeError: Exporting the operator grid_sampler to ONNX opset version 13 is not supported. Please feel free to request support or submit a pull request on PyTorch GitHub.

## Inspecting Model

In [None]:
# Load the ONNX model
onnx_model = onnx.load(out_recognizer_model)

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(onnx_model.graph))