In [16]:
################################################################################
import os
import re
from typing import List, Callable, Union, Dict
from tqdm import tqdm
from copy import deepcopy

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F  # 이 줄을 추가하여 F.interpolate 사용 에러 해결
import torch.optim as optim
from torch.cuda import amp

# Pytorch Quantization
from pytorch_quantization import nn as quant_nn
from torch.quantization import QuantStub, DeQuantStub
from pytorch_quantization.nn.modules import _utils as quant_nn_utils
from pytorch_quantization import calib
from pytorch_quantization.tensor_quant import QuantDescriptor
from pytorch_quantization import quant_modules
from pytorch_quantization import tensor_quant
from absl import logging as quant_logging



In [17]:
class QuantAdd(torch.nn.Module):
    def __init__(self, quantization):
        super().__init__()
        if quantization:
            self._input0_quantizer = quant_nn.TensorQuantizer(QuantDescriptor())
            self._input1_quantizer = quant_nn.TensorQuantizer(QuantDescriptor())
        self.quantization = quantization

    def forward(self, x, y):
        if self.quantization:
            # print(f"QAdd {self._input0_quantizer}  {self._input1_quantizer}")
            return self._input0_quantizer(x) + self._input1_quantizer(y)
        return x + y

class QuantC2fChunk(torch.nn.Module):
    def __init__(self, c):
        super().__init__()
        self._input0_quantizer = quant_nn.TensorQuantizer(QuantDescriptor())
        self.c = c
    def forward(self, x, chunks, dims):
        return torch.split(self._input0_quantizer(x), (self.c, self.c), dims)

class QuantConcat(torch.nn.Module): 
    def __init__(self, dim):
        super().__init__()
        self._input0_quantizer = quant_nn.TensorQuantizer(QuantDescriptor())
        self._input1_quantizer = quant_nn.TensorQuantizer(QuantDescriptor())
        self.dim = dim

    def forward(self, x, dim):
        x_0 = self._input0_quantizer(x[0])
        x_1 = self._input1_quantizer(x[1])
        return torch.cat((x_0, x_1), self.dim) 

class QuantUpsample(torch.nn.Module): 
    def __init__(self, size, scale_factor, mode):
        super().__init__()
        self.size = size
        self.scale_factor = scale_factor
        self.mode = mode
        self._input_quantizer = quant_nn.TensorQuantizer(QuantDescriptor())
        
    def forward(self, x):
        return F.interpolate(self._input_quantizer(x), self.size, self.scale_factor, self.mode)

In [23]:
# Load a model
from ultralytics import YOLO
# model = YOLO("yolov8n_.yaml")  # build a new model from scratch
model = YOLO("yolov8n_relu_int8.pt")  # load a pretrained model (recommended for training)
model.cuda()

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): ReLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): ReLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): ReLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_s

In [19]:
    def bottleneck_quant_forward(self, x):
        if hasattr(self, "addop"):
            return self.addop(x, self.cv2(self.cv1(x))) if self.add else self.cv2(self.cv1(x))
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))

    def concat_quant_forward(self, x):
        if hasattr(self, "concatop"):
            return self.concatop(x, self.d)
        return torch.cat(x, self.d)

    def upsample_quant_forward(self, x):
        if hasattr(self, "upsampleop"):
            return self.upsampleop(x)
        return F.interpolate(x)

    def c2f_qaunt_forward(self, x):
        if hasattr(self, "c2fchunkop"):
            y = list(self.c2fchunkop(self.cv1(x), 2, 1))
            y.extend(m(y[-1]) for m in self.m)
            return self.cv2(torch.cat(y, 1))
            
        else:
            y = list(self.cv1(x).split((self.c, self.c), 1))
            y.extend(m(y[-1]) for m in self.m)
            return self.cv2(torch.cat(y, 1))

In [20]:
for name, module in model.named_modules():
    if module.__class__.__name__ == "C2f":
        if not hasattr(module, "c2fchunkop"):
            print(f"Add C2fQuantChunk to {name}")
            module.c2fchunkop = QuantC2fChunk(module.c)
        module.__class__.forward = c2f_qaunt_forward

    if module.__class__.__name__ == "Bottleneck":
        if module.add:
            if not hasattr(module, "addop"):
                print(f"Add QuantAdd to {name}")
                module.addop = QuantAdd(module.add)
            module.__class__.forward = bottleneck_quant_forward
            
    if module.__class__.__name__ == "Concat":
        if not hasattr(module, "concatop"):
            print(f"Add QuantConcat to {name}")
            module.concatop = QuantConcat(module.d)
        module.__class__.forward = concat_quant_forward

    if module.__class__.__name__ == "Upsample":
        if not hasattr(module, "upsampleop"):
            print(f"Add QuantUpsample to {name}")
            module.upsampleop = QuantUpsample(module.size, module.scale_factor, module.mode)
        module.__class__.forward = upsample_quant_forward

Add C2fQuantChunk to model.model.2
Add QuantAdd to model.model.2.m.0
Add C2fQuantChunk to model.model.4
Add QuantAdd to model.model.4.m.0
Add QuantAdd to model.model.4.m.1
Add C2fQuantChunk to model.model.6
Add QuantAdd to model.model.6.m.0
Add QuantAdd to model.model.6.m.1
Add C2fQuantChunk to model.model.8
Add QuantAdd to model.model.8.m.0
Add QuantUpsample to model.model.10
Add QuantConcat to model.model.11
Add C2fQuantChunk to model.model.12
Add QuantUpsample to model.model.13
Add QuantConcat to model.model.14
Add C2fQuantChunk to model.model.15
Add QuantConcat to model.model.17
Add C2fQuantChunk to model.model.18
Add QuantConcat to model.model.20
Add C2fQuantChunk to model.model.21


In [21]:
model.save('yolov8n_relu_int8.pt')

In [24]:
model.export(format='onnx')  # creates 'yolov8s.onnx'

# Load the exported ONNX model
onnx_model = YOLO('yolov8n_relu_int8.onnx')

# Run inference
results = onnx_model('https://ultralytics.com/images/bus.jpg')

YOLOv8_relu summary (fused): 218 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8n_relu_int8.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (18.4 MB)

[34m[1mONNX:[0m starting export with onnx 1.12.0 opset 17...
[34m[1mONNX:[0m export success ✅ 0.8s, saved as 'yolov8n_relu_int8.onnx' (12.2 MB)

Export complete (1.5s)
Results saved to [1m/works/ultralytics[0m
Predict:         yolo predict task=detect model=yolov8n_relu_int8.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov8n_relu_int8.onnx imgsz=640 data=/works/ultralytics/ultralytics/cfg/datasets/coco.yaml  
Visualize:       https://netron.app
Loading yolov8n_relu_int8.onnx for ONNX Runtime inference...

Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /works/ultralytics/bus.jpg: 640x640 4 persons, 1 bus, 37.1ms
Speed: 3.0ms preprocess, 37.1ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)


In [37]:
import torch
import yaml
import collections
from tqdm import tqdm
from pytorch_quantization import nn as quant_nn
from pytorch_quantization.nn.modules import _utils as quant_nn_utils
from ultralytics import YOLO
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import sys
import os


def cal_model(model, data_loader, device, num_batch=1024):
    num_batch = num_batch
    def compute_amax(model, **kwargs):
        for name, module in model.named_modules():
            if isinstance(module, quant_nn.TensorQuantizer):
                if module._calibrator is not None:
                    if isinstance(module._calibrator, calib.MaxCalibrator):
                        module.load_calib_amax(strict=False)
                    else:
                        module.load_calib_amax(**kwargs)

                    module._amax = module._amax.to(device)
        
    def collect_stats(model, data_loader, device, num_batch=1024):
        """Feed data to the network and collect statistics"""
        # Enable calibrators
        model.eval()
        for name, module in model.named_modules():
            if isinstance(module, quant_nn.TensorQuantizer):
                if module._calibrator is not None:
                    module.disable_quant()
                    module.enable_calib()
                else:
                    module.disable()

        # Feed data to the network for collecting stats
        with torch.no_grad():
            for i, datas in tqdm(enumerate(data_loader), total=num_batch, desc="Collect stats for calibrating"):
                # imgs = datas[0].to(device, non_blocking=True).float() / 255.0
                imgs = datas['img'].to(device, non_blocking=True).float() / 255.0
                model(imgs)

                if i >= num_batch:
                    break

        # Disable calibrators
        for name, module in model.named_modules():
            if isinstance(module, quant_nn.TensorQuantizer):
                if module._calibrator is not None:
                    module.enable_quant()
                    module.disable_calib()
                else:
                    module.enable()

    collect_stats(model, data_loader, device, num_batch=num_batch)
    compute_amax(model, method="mse")
    
def load_coco_dataset(data_dir, cache_file):
    cache_path = os.path.join(data_dir, cache_file)

    # 캐시 파일이 존재하면 불러오기
    if os.path.exists(cache_path):
        print(f"Loading dataset from cache: {cache_path}")
        dataset = torch.load(cache_path)
    else:
        # 캐시 파일이 없으면 데이터셋 생성 및 캐시 파일 저장
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((640, 640)),  # 예시 크기, 모델 요구 사항에 따라 조정
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        dataset = datasets.CocoDetection(root=os.path.join(data_dir, 'images'), 
                                         annFile=os.path.join(data_dir, 'annotations/instances_train2017.json'), 
                                         transform=transform)
        print(f"Scanning dataset and creating cache: {cache_path}")
        torch.save(dataset, cache_path)

    return dataset

# 데이터셋 로드 예시
data_dir = '/usr/src/datasets/coco'
cache_file = 'labels/train2017.cache'
dataset = load_coco_dataset(data_dir, cache_file)
# DataLoader 생성
batch_size = 2  # 배치 크기, 필요에 따라 조정
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)
# COCO 데이터셋 로드

model = YOLO('yolov8n_relu_int8.pt')
# 모델을 GPU로 옮기기 (GPU 사용이 가능한 경우)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("cal_model")
# 캘리브레이션 함수 실행
cal_model(model, train_loader, device)

Loading dataset from cache: /usr/src/datasets/coco/labels/train2017.cache
cal_model
New https://pypi.org/project/ultralytics/8.2.18 available 😃 Update with 'pip install -U ultralytics'
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n_relu.pt, data=/works/ultralytics/ultralytics/cfg/datasets/coco.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda:0, workers=8, project=None, name=train20, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, reti

ValueError: either size or scale_factor should be defined