# 🚀 Export and quantize pretrained classification models for Arduino Nicla Vision deployment 

## Install FocoosAI SDK

In [1]:
!uv pip install 'focoos[onnx-cpu] @ git+https://github.com/FocoosAI/focoos.git'

# you can use [onnx] extra dependencies if you are in a GPU or Colab environment

[2mUsing Python 3.11.12 environment at: /Users/u464645/Documents/projects/hackatons/project/sample/.venv[0m
[2K   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)          [0m
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.git ([2mHEAD[0m)  [0m[1A
[2K[1A   [36m[1mUpdating[0m[39m https://github.com/FocoosAI/focoos.g

## Get Pretrained Model

FocoosAI offers three pretrained classification models in different sizes:

- fai-cls-n-coco  (nano, optimized for Arduino Nicla Vision) 
- fai-cls-s-coco  (small)
- fai-cls-m-coco  (medium)

all models are trained on coco dataset at 224px resolution, **but for Nicla vision we suggest exporting them to 96px.**

Choose the model size that best fits your accuracy and efficiency needs.

In [26]:
from pprint import pprint
from unittest import result


from focoos import FocoosHUB, ModelManager

hub = FocoosHUB(api_key="c7ef8380320c421792425668205fa8fa")

ref = 'f73c51dfb3bd422f'
model = ModelManager.get(f"hub://{ref}", hub=hub)

im = '/Users/u464645/Documents/projects/hackatons/project/sample/webcam_captures/capture_1_20251005_014108.jpg'
result = model.infer(image=im, threshold=0.4, annotate=True)
print(result.detections[1])

[1;32m[10/05 02:33][INFO][HUB]: Currently logged as: frigato.luca97@gmail.com environment: https://api.focoos.ai/v0[0m
[1;32m[10/05 02:33][INFO][HUB]: 📥 Model already downloaded[0m
[1;32m[10/05 02:33][INFO][ModelManager]: 📥 Loading model info from cache: /Users/u464645/FocoosAI/models/f73c51dfb3bd422f/model_info.json[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: Loading weights from local path: /Users/u464645/FocoosAI/models/f73c51dfb3bd422f/model_final.pth[0m



1 background, 1 humans
Latency: imload 8ms, preprocess 3ms, inference 22ms, postprocess 0ms, annotate 2ms, total 35ms
FocoosDet(bbox=None, conf=0.609123706817627, cls_id=1, label=humans, mask=None, keypoints=None)




## Export as optimized ONNX for edge deployment

For edge deployment, we need to export model to more portable runtime, like onnxruntime.

In [28]:
import os
from pathlib import Path

from PIL import Image

from focoos import ASSETS_DIR, RuntimeType

image_size = 96  # 96px input size


def find_workspace_root():
    """Find the workspace root directory."""
    current_path = Path.cwd()
    
    # Look for workspace indicators
    for path in [current_path] + list(current_path.parents):
        if (path / "pyproject.toml").exists() or (path / ".git").exists() or (path / "README.md").exists():
            return path
    
    # Fallback to current directory
    return current_path

# Add project root to path for imports
project_root = find_workspace_root()

out_dir = project_root / "export"
os.makedirs(out_dir, exist_ok=True)


exported_model = model.export(
    runtime_type=RuntimeType.ONNX_CPU,  # optimized for edge or cpu
    image_size=image_size,
    dynamic_axes=False,  # quantization need static axes!
    simplify_onnx=True,  # simplify and optimize onnx model graph
    onnx_opset=18,
    out_dir=os.path.join(out_dir, ref)
)  # save to models dir

# benchmark onnx model
exported_model.benchmark(iterations=100)

# test onnx model
im = '/Users/u464645/Documents/projects/hackatons/project/sample/webcam_captures/capture_1_20251005_014108.jpg'
result = model.infer(image=im, threshold=0.5, annotate=True)
print(result.detections[1])

print(result)

[1;32m[10/05 02:33][INFO][FocoosModel]: 🔧 Export Device: cpu[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: 🚀 Exporting ONNX model with Optimum..[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: 📊 Nodes in graph: 47[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: ✅ ONNX export completed [0m
[1;32m[10/05 02:33][INFO][FocoosModel]: 🔧 Applying ONNX Simplify: Run Optimum graph optimizations...[0m
[1;32m[10/05 02:33][INFO][onnx_model_bert]: opset version: 18[0m
[1;32m[10/05 02:33][INFO][onnx_model]: Sort graphs in topological order[0m
[1;32m[10/05 02:33][INFO][onnx_model]: Model saved to /Users/u464645/Documents/projects/hackatons/project/sample/export/f73c51dfb3bd422f/model_optimized.onnx[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: 📊 After ONNX Runtime optimizations: 47 nodes in graph[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: 📈 Reduction: ~0.0% nodes removed![0m
[1;32m[10/05 02:33][INFO][FocoosModel]: ✅ Onnx model successfully simplified.[0m
[1;32m[10/05 02:33][INFO][FocoosModel]: 


1 background, 1 humans
Latency: imload 8ms, preprocess 3ms, inference 19ms, postprocess 0ms, annotate 2ms, total 32ms
FocoosDet(bbox=None, conf=0.609123706817627, cls_id=1, label=humans, mask=None, keypoints=None)
FocoosDetections(detections=[FocoosDet(bbox=None, conf=0.6248612403869629, cls_id=0, label=background, mask=None, keypoints=None), FocoosDet(bbox=None, conf=0.609123706817627, cls_id=1, label=humans, mask=None, keypoints=None)], image=hidden, latency=InferLatency(imload=0.008, preprocess=0.003, inference=0.019, postprocess=0.0, annotate=0.002))


## Quantize exported model to int8 (or uint8)

In [29]:
import os
import shutil
from pathlib import Path

# Create a temporary flat calibration folder
calibration_temp_dir = project_root / "temp_calibration"
calibration_temp_dir.mkdir(exist_ok=True)

# Clear existing files
for file in calibration_temp_dir.glob("*"):
    if file.is_file():
        file.unlink()

# Copy a subset of images from both classes to the temp folder
val_dir = Path("/Users/u464645/Documents/projects/hackatons/project/sample/datasets/human_identification_balanced/val/")

# Take first 50 images from each class for calibration (adjust as needed)
for class_dir in val_dir.iterdir():
    if class_dir.is_dir():
        images = list(class_dir.glob("*.jpg"))[:50]  # Take first 50 images
        for i, img in enumerate(images):
            dest_name = f"{class_dir.name}_{i:03d}_{img.name}"
            shutil.copy2(img, calibration_temp_dir / dest_name)

print(f"Created calibration folder with {len(list(calibration_temp_dir.glob('*.jpg')))} images")

Created calibration folder with 100 images


In [30]:
from focoos.infer.quantizer import OnnxQuantizer, QuantizationCfg

quantization_cfg = QuantizationCfg(
    size=image_size,  # input size: must be same as exported model
    calibration_images_folder=str(calibration_temp_dir),  # Use the temporary flat calibration folder
    # to use the dataset validation split on which the model was trained.
    format="QO",  # QO (QOperator): All the quantized operators have their own ONNX definitions, like QLinearConv, MatMulInteger etc.
    # QDQ (Quantize-DeQuantize): inserts DeQuantizeLinear(QuantizeLinear(tensor)) between the original operators to simulate the quantization and dequantization process.
    per_channel=False,  # Per-channel quantization: each channel has its own scale/zero-point → more accurate,
    # especially for convolutions, at the cost of extra memory and computation.
    normalize_images=True,  # normalize images during preprocessing: some models have normalization outside of model forward
)

quantizer = OnnxQuantizer(input_model_path=exported_model.model_path, cfg=quantization_cfg)
model_path = quantizer.quantize(
    benchmark=True  # benchmark bot fp32 and int8 models
)

[1;32m[10/05 02:34][INFO][OnnxQuantizer]: Setting up data reader with calibration images: /Users/u464645/Documents/projects/hackatons/project/sample/temp_calibration[0m


Input shape: 96, 96


[1;32m[10/05 02:34][INFO][onnxruntime.quantization.shape_inference]: Performing symbolic shape inference...[0m
[1;32m[10/05 02:34][INFO][OnnxQuantizer]: 🔧 Quantizing model from /Users/u464645/Documents/projects/hackatons/project/sample/export/f73c51dfb3bd422f/model.onnx to /Users/u464645/Documents/projects/hackatons/project/sample/export/f73c51dfb3bd422f/model_int8.onnx[0m
[1;32m[10/05 02:34][INFO][OnnxQuantizer]: ✅ Quantized model saved successfully to /Users/u464645/Documents/projects/hackatons/project/sample/export/f73c51dfb3bd422f/model_int8.onnx[0m
[1;33m[10/05 02:34][DEBUG][InferModel]: Runtime type: onnx_cpu, Loading model from /Users/u464645/Documents/projects/hackatons/project/sample/export/f73c51dfb3bd422f/model.onnx..[0m
[1;33m[10/05 02:34][DEBUG][ONNXRuntime]: 🔧 [onnxruntime device] CPU[0m
[1;33m[10/05 02:34][DEBUG][ONNXRuntime]: Available providers:['CoreMLExecutionProvider', 'AzureExecutionProvider', 'CPUExecutionProvider'][0m
[1;32m[10/05 02:34][INFO][ONNXRu

## Inference with quantized model on cpu

In [32]:
from focoos import InferModel

quantized_model = InferModel(model_path, runtime_type=RuntimeType.ONNX_CPU)

res = quantized_model.infer(im, annotate=True)
Image.fromarray(res.image)
result = model.infer(image=im, threshold=0.5, annotate=True)
print(result.detections[1])

[1;33m[10/05 02:34][DEBUG][InferModel]: Runtime type: onnx_cpu, Loading model from /Users/u464645/Documents/projects/hackatons/project/sample/export/f73c51dfb3bd422f/model_int8.onnx..[0m
[1;33m[10/05 02:34][DEBUG][ONNXRuntime]: 🔧 [onnxruntime device] CPU[0m
[1;33m[10/05 02:34][DEBUG][ONNXRuntime]: Available providers:['CoreMLExecutionProvider', 'AzureExecutionProvider', 'CPUExecutionProvider'][0m
[1;32m[10/05 02:34][INFO][ONNXRuntime]:  using: CPUExecutionProvider[0m
[1;32m[10/05 02:34][INFO][ONNXRuntime]: ⏱️ Warming up model model_int8 on CPUExecutionProvider, size: 96x96..[0m



1 background, 1 humans
Latency: imload 9ms, preprocess 1ms, inference 0ms, postprocess 0ms, annotate 2ms, total 12ms

1 background, 1 humans
Latency: imload 8ms, preprocess 1ms, inference 22ms, postprocess 0ms, annotate 1ms, total 32ms
FocoosDet(bbox=None, conf=0.609123706817627, cls_id=1, label=humans, mask=None, keypoints=None)




# Train a model from scratch with FocoosAI HUB
👋 Welcome to FocoosAI!

[Create your free account](https://app.focoos.ai/) on the FocoosAI platform and get access to:

- **10 hours of GPU cloud training** 
- **5GB of cloud storage** for your datasets and models
- **1000 cloud inference requests** to test your models
- **Up to 20 models** to experiment with

Start building amazing computer vision models today with our generous free tier!

## Uplaod a dataset

The first step in a computer vision pipeline always starts with data. 
You can use a dataset shared by FocoosAI or upload your own.

You can also find some classification dataset on [Roboflow Universe](https://universe.roboflow.com/search?q=classification) or just create your own with some scraping!

The supported classification dataset layout are:
- **classification folder** (*'folder structure'* on roboflow):

```bash
root/
    train/
        cls1/
            - img_1.jpg
            - img_2.jpg
        cls2/
            - img_1.jpg
            - img_2.jpg
    valid/
        cls1/
            - img_1.jpg
            - img_2.jpg
        cls2/
            img_1.jpg
            img_2.jpg
```
- **roboflow_coco** (*'COCO'* on roboflow):
```bash
root/
    train/
        - _annotations.coco.json
        - img_1.jpg
        - img_2.jpg
    valid/
        - _annotations.coco.json
        - img_3.jpg
        - img_4.jpg
```

First, you need to compress your dataset in .zip format and upload on focoos platform in few clicks:

- go to the dataset page and click on  **"+ add dataset"**, chouse your dataset and upload:

![New Dataset](https://i.imgur.com/2zwPs8d.png)
![Uploading](https://i.imgur.com/7GmwOzW.png)

- Once the upload is finished, you can view a summary, preview, and statistics of your dataset:

![Dataset Info](https://i.imgur.com/qXHFXtU.png) 

## Train a model
- Now we can finally train a model with this dataset, just click on **"Use for training a new model"** and insert name and description about your model:

![New Model](https://i.imgur.com/Io5f4E6.png)

- Configure the hyperparameters, with a recommended batch size of 128 and a resolution of 96px, then initiate the training process:

![Hyperparameters](https://i.imgur.com/NIb5Dq4.png)

- While training, you can monitor the accuracy and loss metrics:

![training](https://i.imgur.com/EX3BXti.png)

- Once training is finished, you can perform inference directly in the browser to evaluate results:

![Inference](https://i.imgur.com/xcHIWv0.png)

- If you are not satisfied with the results, you can train another model with different hyperparameters and compare the models in the “compare models” section.

- Otherwise, you can go to the code snippet section and copy the code to download it to the SDK:

![Code Snippet](https://i.imgur.com/Ju7RgFn.png)

## Export and quantize HUB trained models

In [None]:
from focoos import FocoosHUB, ModelManager
from focoos.infer.quantizer import OnnxQuantizer, QuantizationCfg

hub = FocoosHUB(api_key="YOUR_API_KEY")
model = ModelManager.get("hub://YOUR_MODEL_REF", hub=hub)

exported_model = model.export(
    runtime_type=RuntimeType.ONNX_CPU,
    image_size=96,
    dynamic_axes=False,
    simplify_onnx=True,  # simplify and optimize onnx model graph
    onnx_opset=18,
    out_dir=os.path.join("export/", "my_hub_model"),
)


quantization_cfg = QuantizationCfg(
    size=image_size,  # input size: must be same as exported model
    calibration_images_folder=str("/home/ubuntu/focoos/datasets/coco/val2017"),  # Calibration images folder: It is strongly recommended
    # to use the dataset validation split on which the model was trained.
    # Here, for example, we will use the assets folder.
    format="QO",  # QO (QOperator): All the quantized operators have their own ONNX definitions, like QLinearConv, MatMulInteger etc.
    # QDQ (Quantize-DeQuantize): inserts DeQuantizeLinear(QuantizeLinear(tensor)) between the original operators to simulate the quantization and dequantization process.
    per_channel=False,  # Per-channel quantization: each channel has its own scale/zero-point → more accurate,
    # especially for convolutions, at the cost of extra memory and computation.
    normalize_images=True,  # normalize images during preprocessing: some models have normalization outside of model forward
)

quantizer = OnnxQuantizer(input_model_path=exported_model.model_path, cfg=quantization_cfg)
model_path = quantizer.quantize(
    benchmark=True  # benchmark bot fp32 and int8 models
)