# ImageNet inference with UFront

In [1]:
import torch,os,random
import torch.nn as nn
import torch.nn.functional as F
from typing import Tuple
from PIL import Image
from torchvision import transforms
from torch.utils import data
from functools import partial
from torchvision.models import resnet18, resnet50, squeezenet1_1, regnet_x_32gf, maxvit_t, shufflenet_v2_x1_5, inception_v3, mobilenet_v3_small, efficientnet_v2_s, densenet121, convnext_small
import tqdm

In [2]:
!python --version

Python 3.7.12


## Prepare the ImageNet validation dataset

In [3]:
root = "/root/data/" #set to the path of imagenet-1k validation set, which contains a folder named 'imagenet1kvalid'
working = "./" #working directory
!wget https://raw.githubusercontent.com/raghakot/keras-vis/master/resources/imagenet_class_index.json

--2023-10-25 07:47:40--  https://raw.githubusercontent.com/raghakot/keras-vis/master/resources/imagenet_class_index.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 35363 (35K) [text/plain]
Saving to: ‘imagenet_class_index.json’


2023-10-25 07:47:40 (2.96 MB/s) - ‘imagenet_class_index.json’ saved [35363/35363]



In [4]:
import os
from torch.utils.data import Dataset
from PIL import Image
import json
syn_to_class = {}
with open(os.path.join(working, "imagenet_class_index.json"), "rb") as f:
    json_file = json.load(f)
    for class_id, v in json_file.items():
        syn_to_class[class_id] = v[1]
                
def get_class_name(entry):        
    target = syn_to_class[int(entry)]
    return target
        
class ImageNetKaggle(Dataset):
    def __init__(self, root, transform=None):
        self.samples = []
        self.targets = []
        self.transform = transform
        samples_dir = os.path.join(root, "imagenet1kvalid")
        for entry in os.listdir(samples_dir):
                sample_path = os.path.join(samples_dir, entry)
                for file in os.listdir(sample_path):                    
                    self.samples.append(os.path.join(sample_path, file))
                    self.targets.append(int(entry))
                
    def __len__(self):
            return len(self.samples)
        
    def __getitem__(self, idx):
            x = Image.open(self.samples[idx]).convert("RGB")
            if self.transform:
                x = self.transform(x)
            return x, self.targets[idx]

## Prepare Dataloader

In [5]:
from torch.utils.data import DataLoader
from torchvision import transforms
import torch
import torchvision
from tqdm import tqdm
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
val_transform = transforms.Compose(
            [
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean, std),
            ]
        )
dataset = ImageNetKaggle(root, val_transform)
dataloader = DataLoader(
            dataset,
            batch_size=64, # may need to reduce this depending on your GPU 
            num_workers=8, # may need to reduce this depending on your num of CPUs and RAM
            shuffle=False,
            drop_last=False,
            pin_memory=True
        )

  cpuset_checked))


## 1) Download and install UFront package

In [6]:
# You may execute !python --version and install the following ufront package based on your python version

# For Python 3.7
!wget https://anonymous.4open.science/r/anonyufront-2B3E/release/ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl

# For Python 3.8
# !wget https://anonymous.4open.science/r/anonyufront-2B3E/release/ufront-0.1.1-cp38-cp38-manylinux_2_28_x86_64.whl

# For Python 3.9
# !wget https://anonymous.4open.science/r/anonyufront-2B3E/release/ufront-0.1.1-cp39-cp39-manylinux_2_28_x86_64.whl

# For Python 3.10
# !wget https://anonymous.4open.science/r/anonyufront-2B3E/release/ufront-0.1.1-cp310-cp310-manylinux_2_28_x86_64.whl

# For Python 3.11
# !wget https://anonymous.4open.science/r/anonyufront-2B3E/release/ufront-0.1.1-cp311-cp311-manylinux_2_28_x86_64.whl

--2023-10-25 07:48:13--  https://anonymous.4open.science/r/anonyufront-2B3E/release/ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl
Resolving anonymous.4open.science (anonymous.4open.science)... 104.21.18.195, 172.67.183.76, 2606:4700:3037::6815:12c3, ...
Connecting to anonymous.4open.science (anonymous.4open.science)|104.21.18.195|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /api/repo/anonyufront-2B3E/file/release/ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl [following]
--2023-10-25 07:48:14--  https://anonymous.4open.science/api/repo/anonyufront-2B3E/file/release/ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl
Reusing existing connection to anonymous.4open.science:443.
HTTP request sent, awaiting response... 200 OK
Length: 60693211 (58M)
Saving to: ‘ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl’


2023-10-25 07:48:37 (53.6 MB/s) - ‘ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl’ saved [60693211/60693211]



In [7]:
!pip install ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl

# !pip install ufront-0.1.1-cp38-cp38-manylinux_2_28_x86_64.whl 
# !pip install ufront-0.1.1-cp39-cp39-manylinux_2_28_x86_64.whl
# !pip install ufront-0.1.1-cp310-cp310-manylinux_2_28_x86_64.whl
# !pip install ufront-0.1.1-cp311-cp311-manylinux_2_28_x86_64.whl

Processing ./ufront-0.1.1-cp37-cp37m-manylinux_2_28_x86_64.whl
Collecting tf2onnx
  Downloading tf2onnx-1.15.1-py3-none-any.whl (454 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.7/454.7 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: tf2onnx, ufront
Successfully installed tf2onnx-1.15.1 ufront-0.1.1
[0m

## 2) Install compiler backend and runtime


In [9]:
# Install compiler backend and runtime
#for python3.9+
#!pip install iree-compiler==20230512.517 iree-runtime==20230512.517 -f https://openxla.github.io/iree/pip-release-links.html

#for python3.7, the latest IREE version is 20230330.474
!pip install iree-compiler==20230330.474 iree-runtime==20230330.474 -f https://openxla.github.io/iree/pip-release-links.html

Looking in links: https://openxla.github.io/iree/pip-release-links.html
Collecting iree-compiler==20230330.474
  Downloading https://github.com/openxla/iree/releases/download/candidate-20230330.474/iree_compiler-20230330.474-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.5/53.5 MB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting iree-runtime==20230330.474
  Downloading https://github.com/openxla/iree/releases/download/candidate-20230330.474/iree_runtime-20230330.474-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m62.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Installing collected packages: iree-runtime, iree-compiler
Successfully installed iree-compiler-20230330.474 iree-runtime-20230330.474
[0m

## 3) Compile the models and run on CPU/GPU

In [10]:
from ufront.pytorch.model import UFrontTorch 
import iree.compiler as ireec
from iree.compiler import tools
from iree import runtime
# !pip install onnxsim 

def compile_with_ufront(net, GPU, dataloader):
    for x, y in dataloader:
        break
    net.eval()
    indata = x.numpy()
    model = UFrontTorch(net, batch_size=indata.shape[0], pass_weights=True) # convert torch model to ufront model
    #This will trigger Rust frontend for actual model conversion and graph building
    #operators can also be managed by python side (each operator here corresponding to an operator in the Rust computation graph)
    output_tensors = model(inputs = [indata])

    #The output of the model (forward pass have not been triggered at the moment!)
    # if model.model.__class__.__name__ not in ["MaxVit", "SwinTransformer", "VisionTransformer", "MultiHeadAttention"]:
    output = model.softmax(input=output_tensors[0], name="softmax_out")

    #This will trigger model compilation, i.e., convert Rust computation graph to a unified high-level IR and lower it to TOSA IR
    model.compile(optimizer={"type":"sgd", "lr":"0.01", "momentum":"0", "nesterov":"False", "weight_decay":"0"},
                        loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy'])

    modelir = model.dump_ir()

    tosa_ir= model.dump_tosa_ir()

    print("Compiling TOSA model...")
    if GPU:
        binary = ireec.compile_str(tosa_ir,
                        target_backends=["cuda"], 
                        input_type=ireec.InputType.TOSA)
        module = runtime.load_vm_flatbuffer(binary, driver="cuda")
    else:
        binary = ireec.compile_str(tosa_ir,
                        target_backends=["llvm-cpu"], 
                        input_type=ireec.InputType.TOSA)
        module = runtime.load_vm_flatbuffer(binary,backend="llvm-cpu") 
    return module

Some of the onnx models requires onnxsim library, please install onnxsim before usage!


In [11]:
def get_ufront_accuracy(module, dataloader):
    correct = 0.0
    total = 0.0
    batch_size = 0
    for x, y in tqdm(dataloader):
        if batch_size == 0:
            batch_size = x.shape[0]
        elif x.shape[0] < batch_size:
            print("Ignore last batch!") #dynamic batch size is currently not supported!
            break
        y_pred = module.forward(x.numpy()).to_host()
        correct += (y_pred.argmax(axis=1) == y.numpy()).sum().item()
        total += len(y)
    return correct / total

In [None]:
#If you experiencing the following error, you need to upgrade NVidia Driver and CUDA; or you can lower IREE to a lower version, e.g., 20230330.474.

#RuntimeError: Error creating vm context with modules: 
#main_checkout/runtime/src/iree/hal/drivers/cuda/native_executable.c:99: INTERNAL; 
#CUDA driver error 'CUDA_ERROR_UNSUPPORTED_PTX_VERSION' (222): 
#the provided PTX was compiled with an unsupported toolchain.; 
#while invoking native function hal.executable.create; while calling import; 

In [12]:
net = mobilenet_v3_small(weights="DEFAULT", dropout=0.0)
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth


  0%|          | 0.00/9.83M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [03:28<00:00,  3.74it/s]

Ignore last batch!





0.6700544174135723

In [13]:
net = shufflenet_v2_x1_5(weights="DEFAULT")
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/shufflenetv2_x1_5-3c479a10.pth" to /root/.cache/torch/hub/checkpoints/shufflenetv2_x1_5-3c479a10.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [03:24<00:00,  3.83it/s]

Ignore last batch!





0.7178697183098591

In [14]:
net = squeezenet1_1(weights="DEFAULT")
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_1-b8a52dc0.pth


  0%|          | 0.00/4.73M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [03:21<00:00,  3.87it/s]

Ignore last batch!





0.5785251280409731

In [15]:
net = resnet18(weights="DEFAULT")
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [08:49<00:00,  1.48it/s]

Ignore last batch!





0.693481914212548

In [16]:
net = resnet50(weights="DEFAULT")
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [11:51<00:00,  1.10it/s]

Ignore last batch!





0.7925336107554417

In [17]:
net = densenet121(weights="DEFAULT")
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth


  0%|          | 0.00/30.8M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [13:06<00:01,  1.01s/it]

Ignore last batch!





0.7363356274007683

In [18]:
net = inception_v3(weights="DEFAULT") 
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader) #low performance than official reported, to fix this, the image size need to resize to 299 x 299, instead of standard 224 x 224

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [15:54<00:01,  1.22s/it]

Ignore last batch!





0.6998239436619719

In [19]:
import torchvision.models as models
net = models.vision_transformer.vit_b_16(weights="DEFAULT")
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth


  0%|          | 0.00/330M [00:00<?, ?B/s]

Compiling TOSA model...


100%|█████████▉| 781/782 [09:30<00:00,  1.37it/s]

Ignore last batch!





0.8052176696542894

In [20]:
#fix low performance of inception3
val_transform = transforms.Compose(
            [
                transforms.Resize(299),
                transforms.CenterCrop(299), #299 instead of 224, see this: https://github.com/IntelLabs/distiller/issues/422
                transforms.ToTensor(),
                transforms.Normalize(mean, std),
            ]
        )
dataset = ImageNetKaggle(root, val_transform)
dataloader = DataLoader(
            dataset,
            batch_size=64, # may need to reduce this depending on your GPU 
            num_workers=8, # may need to reduce this depending on your num of CPUs and RAM
            shuffle=False,
            drop_last=False,
            pin_memory=True
        )
net = inception_v3(weights="DEFAULT") 
module = compile_with_ufront(net, True, dataloader)
get_ufront_accuracy(module, dataloader)

Compiling TOSA model...


100%|█████████▉| 781/782 [15:18<00:01,  1.16s/it]

Ignore last batch!


100%|█████████▉| 781/782 [15:18<00:01,  1.18s/it]


0.7694662291933418