In [1]:
import warnings
warnings.filterwarnings('ignore')
import torch
from argparse import ArgumentParser
from pathlib import Path
import transformers
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, XGLMTokenizer, XGLMForCausalLM
from transformers.onnx import FeaturesManager
from pprint import pprint
import os
import onnx
from onnx_tf.backend import prepare
# https://huggingface.co/blog/convert-transformers-to-onnx
# https://github.com/sithu31296/PyTorch-ONNX-TFLite

2022-11-08 03:03:38.147750: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-08 03:03:38.702294: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-08 03:03:48.553608: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib:/usr/local/cuda/bin:/home/app/singularity-ce/bin:/home/app/singularity-ce/bin:/home/u00691/.local/bin:/home/u00691/anaconda3/bin:/home/u00691/.vscode-server/bin/d045a5eda657f4d7b676ded

In [4]:
model_info = {
#         'name': "rinna/japanese-gpt-1b", 
        'name': "rinna/japanese-gpt2-medium",   
        'tokenizer': T5Tokenizer,
        'model': AutoModelForCausalLM
    }
model_id = model_info['name']
model_name = model_id.split("/")[-1]
feature = 'causal-lm'

In [None]:
model = model_info['model'].from_pretrained(model_info['name'])
tokenizer = model_info['tokenizer'].from_pretrained(model_info['name'])


# load config
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=feature)
onnx_config = model_onnx_config(model.config)

In [5]:
# export
output_dir = os.path.join("output", model_name)

In [7]:
onnx_output_dir = os.path.join(output_dir, 'onnx')
if not os.path.exists(onnx_output_dir):
    os.makedirs(onnx_output_dir)
onnx_file_name = os.path.join(onnx_output_dir, model_name+".onnx")
onnx_file_name

'output/japanese-gpt2-medium/onnx/japanese-gpt2-medium.onnx'

In [4]:
onnx_inputs, onnx_outputs = transformers.onnx.export(
        preprocessor=tokenizer,
        model=model,
        config=onnx_config,
        opset=13,
        output=Path(onnx_file_name),
)

In [8]:
# Load the ONNX model
onnx_model = onnx.load(onnx_file_name)

In [6]:
# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

In [10]:
# Print a Human readable representation of the graph
# onnx.helper.printable_graph(onnx_model.graph)

In [9]:
tf_rep = prepare(onnx_model)
tf_output_dir = os.path.join(output_dir, 'tf')
if not os.path.exists(tf_output_dir):
    os.makedirs(tf_output_dir)

In [None]:
tf_rep.export_graph(tf_output_dir)

In [None]:
import tensorflow as tf
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(tf_output_dir)
tflite_model = converter.convert()
print(tflite_model)

In [None]:
tflite_output_dir = os.path.join(output_dir, 'tflite')
if not os.path.exists(tflite_output_dir):
    os.makedirs(tflite_output_dir)

# Save the model
with open(tflite_output_dir, 'wb') as f:
    f.write(tflite_model)