In [1]:
!pip install onnx
!pip install onnxruntime

Collecting onnx
  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx
Successfully installed onnx-1.16.1
Collecting onnxruntime
  Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m11.8 MB/s[0m

In [2]:
import onnx
import onnxruntime as ort

def get_model_io_shapes(model_path):
    # Load the ONNX model
    model = onnx.load(model_path)

    # Initialize ONNX Runtime session
    session = ort.InferenceSession(model_path)

    # Get input and output shapes
    input_shapes = {}
    for input in session.get_inputs():
        input_shapes[input.name] = input.shape

    output_shapes = {}
    for output in session.get_outputs():
        output_shapes[output.name] = output.shape

    return input_shapes, output_shapes

# Example usage
model_path = '/content/drive/MyDrive/OCR-SKU/Easy OCR/EasyOCR ONNX Models/v1/detection_model.onnx'
input_shapes, output_shapes = get_model_io_shapes(model_path)

print("Input Shapes:", input_shapes)
print("Output Shapes:", output_shapes)

Input Shapes: {'input1': [1, 3, 'height', 'width']}
Output Shapes: {'output': [1, 'Transposeoutput_dim_1', 'Transposeoutput_dim_2', 2], '281': [1, 32, 'Relu281_dim_2', 'Relu281_dim_3']}


In [3]:
model_path = '/content/drive/MyDrive/OCR-SKU/Easy OCR/EasyOCR ONNX Models/v1/13_recognition_model.onnx'
input_shapes, output_shapes = get_model_io_shapes(model_path)

print("Input Shapes:", input_shapes)
print("Output Shapes:", output_shapes)

Input Shapes: {'input1': [1, 1, 64, 'batch_size_1_1']}
Output Shapes: {'output': [1, 'Addoutput_dim_1', 188]}


In [25]:
def preprocess_image(image, input_shape):
    # Get target height and width
    target_height = 64
    target_width = 128

    height, width, _ = image.shape
    aspect_ratio = width / height
    new_width = int(target_width)
    new_height = int(new_width / aspect_ratio)
    if new_height > target_height:
        new_height = target_height
        new_width = int(new_height * aspect_ratio)
    resized_image = cv2.resize(image, (new_width, new_height))

    # Ensure the image has 3 color channels
    if resized_image.shape[2] != 3:
        raise ValueError("Input image must have 3 color channels")

    # Pad the image to match the target shape
    pad_height = target_height - new_height
    pad_width = target_width - new_width
    top_pad = pad_height // 2
    bottom_pad = pad_height - top_pad
    left_pad = pad_width // 2
    right_pad = pad_width - left_pad
    padded_image = cv2.copyMakeBorder(resized_image, top_pad, bottom_pad, left_pad, right_pad, cv2.BORDER_CONSTANT, value=0)

    # Normalize image to range [0, 1]
    normalized_image = padded_image.astype(np.float32) / 255.0

    # Add batch dimension
    batched_image = np.expand_dims(normalized_image, axis=0)

    return batched_image

In [26]:
import cv2
import numpy as np
import onnxruntime as ort

def load_image(image_path):
    # Load image using OpenCV
    image = cv2.imread(image_path)
    # Convert BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image


def perform_detection(image, detection_model_path):
    # Load detection model
    detection_session = ort.InferenceSession(detection_model_path)

    # Print model input shape for debugging
    print("Model input shape:", detection_session.get_inputs()[0].shape)

    # Preprocess input image
    input_name = detection_session.get_inputs()[0].name
    input_shape = detection_session.get_inputs()[0].shape
    preprocessed_image = preprocess_image(image, input_shape)

    # Print preprocessed image shape for debugging
    print("Preprocessed image shape:", preprocessed_image.shape)

    # Perform inference
    detection_results = detection_session.run(None, {input_name: preprocessed_image})

    return detection_results

def perform_recognition(image, recognition_model_path):
    # Load recognition model
    recognition_session = ort.InferenceSession(recognition_model_path)

    # Preprocess input image
    input_name = recognition_session.get_inputs()[0].name
    input_shape = recognition_session.get_inputs()[0].shape
    preprocessed_image = preprocess_image(image, input_shape)

    # Perform inference
    recognition_results = recognition_session.run(None, {input_name: preprocessed_image})

    return recognition_results

# Example usage
detection_model_path = '/content/drive/MyDrive/OCR-SKU/Easy OCR/EasyOCR ONNX Models/v1/detection_model.onnx'
recognition_model_path = '/content/drive/MyDrive/OCR-SKU/Easy OCR/EasyOCR ONNX Models/v1/13_recognition_model.onnx'
image_path = '/content/a.jpg'

# Load image
image = load_image(image_path)

# Perform detection
detection_results = perform_detection(image, detection_model_path)
print("Detection Results:", detection_results)

# Perform recognition
recognition_results = perform_recognition(image, recognition_model_path)
print("Recognition Results:", recognition_results)


Model input shape: [1, 3, 'height', 'width']
Preprocessed image shape: (1, 64, 128, 3)


InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input1 for the following indices
 index: 1 Got: 64 Expected: 3
 Please fix either the inputs/outputs or the model.

In [28]:
!pip install easyocr

Collecting easyocr
  Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.4.2-py2.py3-none-any.whl (30 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (908 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m908.3/908.3 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ninja (from easyocr)
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/307.2 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->easyocr)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==

In [33]:
import torch
import easyocr

reader = easyocr.Reader(['hi'], gpu=True)  # Use 'gpu=True' to utilize GPU if available

# Access the detection and recognition models within EasyOCR
detector = reader.detector
device = 'cpu'

batch_size_1 = 500
batch_size_2 = 500
in_shape=[1, 3, batch_size_1, batch_size_2]
dummy_input = torch.rand(in_shape)
dummy_input = dummy_input.to(device)

torch.onnx.export(
    detector,
    dummy_input,
    "detectionModel.onnx",
    export_params=True,
    opset_version=11,
    input_names = ['input'],
    output_names = ['output'],
    dynamic_axes={'input' : {2 : 'batch_size_1', 3: 'batch_size_2'}},
)



In [38]:
import onnxruntime
import numpy as np
import cv2

# Load the ONNX model
onnx_model_path = "detectionModel.onnx"
ort_session = onnxruntime.InferenceSession(onnx_model_path)

# Define function to preprocess input image
def preprocess_image(image_path):
    # Load image using OpenCV
    image = cv2.imread(image_path)
    # Resize image to match model input shape
    image_resized = cv2.resize(image, (500, 500))
    # Convert image to float32 and normalize
    image_resized = image_resized.astype(np.float32) / 255.0
    # Convert image to CHW format (Channel, Height, Width)
    image_resized = np.transpose(image_resized, (2, 0, 1))
    # Add batch dimension
    image_resized = np.expand_dims(image_resized, axis=0)
    return image_resized

# Define function to perform inference
def detect_objects(image_path):
    # Preprocess input image
    input_data = preprocess_image(image_path)
    # Perform inference
    outputs = ort_session.run(None, {'input': input_data})
    return outputs

# Function to post-process detection results
def postprocess_detection(detection_output):
    # Perform any necessary post-processing on detection output
    # For example, you can extract bounding boxes, confidence scores, etc.
    # Modify this function based on the output format of your model
    return detection_output

# Path to input image
image_path = "a.jpg"

# Perform detection inference
detection_output = detect_objects(image_path)

# Post-process detection output
postprocessed_output = postprocess_detection(detection_output)

# Print or use post-processed output as required
print(len(postprocessed_output), postprocessed_output[0].shape, postprocessed_output[1].shape)

2 (1, 250, 250, 2) (1, 32, 250, 250)


In [40]:
import torch
import torchvision.transforms as transforms

recognizer = reader.recognizer

# Define the dimensions of the input image
batch_size = 1
num_channels = 1
image_height = imgH = 64
image_width = 128
device = 'cpu'

# Create dummy input tensors for the image and text inputs
dummy_input_image = torch.randn(batch_size, num_channels, image_height, image_width)

# Define the maximum length of the text input
max_text_length = 10

dummy_text_input = torch.LongTensor(max_text_length, batch_size).random_(0, 10)

# Convert the input image to grayscale
grayscale_transform = transforms.Grayscale(num_output_channels=1)
grayscale_input = grayscale_transform(dummy_input_image)

input_names = ["image_input", "text_input"]
output_names = ["output"]
dynamic_axes = {"image_input": {0: "batch_size"}, "text_input": {1: "batch_size"}}
opset_version = 12

torch.onnx.export(recognizer, (grayscale_input, dummy_text_input), "recog.onnx",
                  input_names=input_names, output_names=output_names,
                  dynamic_axes=dynamic_axes, opset_version=opset_version)


RuntimeError: Only tuples, lists and Variables are supported as JIT inputs/outputs. Dictionaries and strings are also accepted, but their usage is not recommended. Here, received an input of unsupported type: torch._C.ScriptObject