In [1]:
from optimum.onnxruntime import OR
from optimum.onnxruntime import ORTOptimizer, ORTModelForTokenClassification, OptimizationConfig
from optimum.onnxruntime.configuration import AutoQuantizationConfig
from optimum.onnxruntime import ORTQuantizer
from transformers import AutoTokenizer, AutoModelForTokenClassification



In [2]:
class Converter:
    def __init__(self, model_name):
        self.model = ORTModelForTokenClassification.from_pretrained(model_name, from_transformers=True)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    def onnxConvertor(self):
        self.model.save_pretrained("onnx_model")
        self.tokenizer.save_pretrained("onnx_model")

    def optimizer_for_gpu(self):
        ort_model = ORTModelForTokenClassification.from_pretrained('onnx_model/', file_name = 'model.onnx', provider='CUDAExecutionProvider')
        optimizer = ORTOptimizer.from_pretrained(ort_model)
        optimization_config = OptimizationConfig(optimization_level=2, # full optimize
                                                optimize_for_gpu=True, # use CUDAExecutionProvider
                                                fp16=True
                                                )
        optimizer.optimize(save_dir='optimized_model_for_gpu', optimization_config=optimization_config)
        self.tokenizer.save_pretrained('optimized_model_for_gpu/')
    
    def optimizer_for_cpu_fp16(self):
        ort_model = ORTModelForTokenClassification.from_pretrained('onnx_model/', file_name = 'model.onnx', provider='CPUExecutionProvider')
        optimizer = ORTOptimizer.from_pretrained(ort_model)
        optimization_config = OptimizationConfig(optimization_level=2, # 99 full optimize
                                                optimize_for_gpu=False, # use CPUExecutionProvider
                                                fp16=True
                                                )
        optimizer.optimize(save_dir='optimized_model_for_cpu_fp16', optimization_config=optimization_config)
        self.tokenizer.save_pretrained('optimized_model_for_cpu_fp16/')
    
    def optimizer_for_cpu(self):
        ort_model = ORTModelForTokenClassification.from_pretrained('onnx_model/', file_name = 'model.onnx', provider='CPUExecutionProvider')
        optimizer = ORTOptimizer.from_pretrained(ort_model)
        optimization_config = OptimizationConfig(optimization_level=2, # 99 full optimize
                                                optimize_for_gpu=False, # use CPUExecutionProvider
                                                fp16=False
                                                )
        optimizer.optimize(save_dir='optimized_model_for_cpu', optimization_config=optimization_config)
        self.tokenizer.save_pretrained('optimized_model_for_cpu/')    
    
    def optimized_quantized(self,ort_model_name):
        ort_model = ORTModelForTokenClassification.from_pretrained(ort_model_name, file_name = 'model_optimized.onnx', provider='CPUExecutionProvider')
        qconfig = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
        quantizer = ORTQuantizer.from_pretrained(ort_model)
        # Apply dynamic quantization on the model
        quantizer.quantize(save_dir='dynamic_quantized_model', quantization_config=qconfig)
        self.tokenizer.save_pretrained('dynamic_quantized_model/')


In [12]:
converter = Converter("/home/forest/Desktop/ML-HUB/nlp-health-deidentification-sub-base-ro/nioyatech/nlp-health-deidentification-sub-base-ro")

In [7]:
converter.onnxConvertor()

In [5]:
converter.optimizer_for_gpu()

In [6]:
converter.optimizer_for_cpu_fp16()

In [8]:
converter.optimizer_for_cpu()

In [9]:
converter.optimized_quantized('optimized_model_for_cpu')

In [2]:
from optimum.onnxruntime import ORTModelForVision2Seq

ImportError: cannot import name 'ORTModelForVision2Seq' from 'optimum.onnxruntime' (/home/forest/miniconda3/envs/ort/lib/python3.9/site-packages/optimum/onnxruntime/__init__.py)

In [1]:
from transformers import TrOCRProcessor
from optimum.onnxruntime.modeling_vision2seq import ORTModelForVision2Seq

processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
onnx_model = ORTModelForVision2Seq.from_pretrained(
    "microsoft/trocr-base-handwritten", 
    from_transformers=True,
    use_cache=True,
)
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = onnx_model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

ModuleNotFoundError: No module named 'optimum.onnxruntime.modeling_vision2seq'

In [3]:
model_ckpt = "microsoft/trocr-base-handwritten"
!python -m transformers.onnx --model={model_ckpt} --feature=vision2seq-lm onnx/ --atol 1e-3

Framework not requested. Using torch to export to ONNX.
Downloading: 100%|█████████████████████████| 4.13k/4.13k [00:00<00:00, 5.49MB/s]
Downloading: 100%|█████████████████████████| 1.33G/1.33G [05:36<00:00, 3.97MB/s]
Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Downloading: 100%|██████████████████████████████| 228/228 [00:00<00:00, 285kB/s]
Using framework PyTorch: 1.13.0+cu117
  if num_channels != self.num_channels:
  if height != self.image_size[0] or width != self.image_size[1]:
Validating ONNX model...
	-[✓] ONNX model output names match reference model ({'last_hidden_state'})
	- Validating ONNX Model output "last_hidden_state":
		-[✓] (3, 577, 768) matches (3, 577, 768)
		-[x] values not close enough (atol

In [4]:
# export vision2seq model to onnx
import onnx
from transformers import AutoTokenizer, AutoModelForVision2Seq
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqConfig
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForConditionalGeneration   
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageCaptioning
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageClassification
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextGeneration
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextRetrieval
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextTranslation
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageTranslation
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextTranslation
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextTranslation
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextTranslation


ModuleNotFoundError: No module named 'transformers.models.vision2seq'

In [None]:
from transformers.models.vision2seq.modeling_vision2seq import Vision2SeqForImageToTextTranslation