In [1]:
import torch, open_clip, pathlib, gc
from onnxconverter_common import float16
from onnxruntime.quantization import quantize_dynamic, QuantType

In [2]:
MODEL = "ViT-H-14-quickgelu"
CKPT = "dfn5b"  # Hugging Face ID or local .pt
OPSET = 17
OUT_FP32 = pathlib.Path("clip_text_fp32.onnx")
OUT_FP16 = pathlib.Path("clip_text_fp16.onnx")
OUT_INT8 = pathlib.Path("clip_text_int8.onnx")

In [3]:
full, _, _ = open_clip.create_model_and_transforms(
    MODEL, pretrained=CKPT, force_custom_text=True, device="cpu"
)
text_enc = full.text.eval().cpu()
del full.visual
gc.collect()

tok = open_clip.get_tokenizer(MODEL)
dummy = tok(["dummy"]).to(torch.int64)

In [4]:
class Wrapper(torch.nn.Module):
    def __init__(self, enc):
        super().__init__()
        self.enc = enc

    def forward(self, input_ids):
        return self.enc(input_ids)

In [5]:
torch.onnx.export(
    Wrapper(text_enc),
    dummy,
    OUT_FP32.as_posix(),
    input_names=["input_ids"],
    output_names=["embeddings"],
    dynamic_axes={"input_ids": {0: "batch"}, "embeddings": {0: "batch"}},
    opset_version=OPSET,
    do_constant_folding=True,
)
print(f"📝  wrote {OUT_FP32.name}")



📝  wrote clip_text_fp32.onnx


In [6]:
model_object = float16.convert_float_to_float16_model_path(OUT_FP32.as_posix(), keep_io_types=True)
print(f"📝  wrote {OUT_FP16.name}")

#torch.onnx.save(model_object, OUT_FP16.as_posi x())



📝  wrote clip_text_fp16.onnx




In [7]:
model_object.save(OUT_FP16.as_posix())

AttributeError: export

In [6]:
quantize_dynamic(
    model_input=OUT_FP32.as_posix(),
    model_output=OUT_INT8.as_posix(),
    weight_type=QuantType.QInt8,
)
print(f"📝  wrote {OUT_INT8.name}")



📝  wrote clip_text_int8.onnx
