In [1]:
import os
import warnings

In [2]:
os.environ["KERAS_BACKEND"] = "torch"
warnings.simplefilter(action="ignore")

In [3]:
!git clone --branch video_swin https://github.com/innat/keras-cv.git
%cd keras-cv
!pip install -q -e .
!pip install -q onnxruntime

Cloning into 'keras-cv'...
remote: Enumerating objects: 13782, done.[K
remote: Counting objects: 100% (1919/1919), done.[K
remote: Compressing objects: 100% (769/769), done.[K
remote: Total 13782 (delta 1337), reused 1628 (delta 1134), pack-reused 11863[K
Receiving objects: 100% (13782/13782), 25.65 MiB | 20.19 MiB/s, done.
Resolving deltas: 100% (9788/9788), done.
/kaggle/working/keras-cv


In [4]:
import numpy as np

import onnx
import onnxruntime

import torch
import keras
from keras import ops
from keras_cv.models import VideoSwinBackbone
from keras_cv.models import VideoClassifier

keras.__version__, torch.__version__, onnx.__version__, onnxruntime.__version__

2024-04-03 11:40:58.890566: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-03 11:40:58.890789: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-03 11:40:59.090899: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


('3.0.5', '2.1.2+cpu', '1.15.0', '1.17.1')

In [5]:
def vswin_tiny():
    !wget https://github.com/innat/VideoSwin/releases/download/v2.0/videoswin_tiny_kinetics400_classifier.weights.h5 -q
    backbone=VideoSwinBackbone(
        input_shape=(32, 224, 224, 3), 
        embed_dim=96,
        depths=[2, 2, 6, 2],
        num_heads=[3, 6, 12, 24],
        include_rescaling=False, 
    )
    model = VideoClassifier(
        backbone=backbone,
        num_classes=400,
        activation=None,
        pooling='avg',
    )
    model.load_weights(
        'videoswin_tiny_kinetics400_classifier.weights.h5'
    )
    return model

In [6]:
model = vswin_tiny()
model.eval()
model.summary()

In [7]:
def to_numpy(tensor):
    if tensor.requires_grad:
        tensor = tensor.detach()
    tensor = tensor.cpu()
    numpy_array = tensor.numpy()
    return numpy_array

In [8]:
batch_size = 1
x = torch.randn(batch_size, 32, 224, 224, 3, requires_grad=True)
torch_out = model(x)

In [9]:
torch.onnx.export(
    model, # model being run
    x,     # model input (or a tuple for multiple inputs)
    "vswin_tiny.onnx", 
    export_params=True,       
    opset_version=10,       
    do_constant_folding=True, 
    input_names = ['input'],   # the model's input names
    output_names = ['output'], # the model's output names
    dynamic_axes={
        'input' : {0 : 'batch_size'}, 
        'output' : {0 : 'batch_size'}
    }
)

In [10]:
onnx_model = onnx.load("vswin_tiny.onnx")
onnx.checker.check_model(onnx_model)
ort_session = onnxruntime.InferenceSession(
    "vswin_tiny.onnx", providers=["CPUExecutionProvider"]
)

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)

In [11]:
np.testing.assert_allclose(
    to_numpy(torch_out), ort_outs[0], rtol=1e-05, atol=1e-05
)