In [1]:
import os

os.environ['YOLO_VERBOSE'] = 'False'

In [2]:
import torch
from ultralytics import YOLO
from torch import nn

In [3]:
from utils import utils
from quantization import quantizer
import copy

In [4]:
@torch.inference_mode()
def evaluate(
  model: nn.Module,
) -> float:
  
  metrics = model.val(data = 'road_sign_data.yaml', split = 'val')
  acc = metrics.results_dict['metrics/mAP50(B)']

  return acc

In [5]:
def fine_tune(model: nn.Module):
  model.train(data='road_sign_data.yaml', epochs=10, imgsz=640,device = '0')

In [10]:
model = YOLO('tsr.pt').cuda()

In [7]:
Byte = 8
KiB = 1024 * Byte
MiB = 1024 * KiB
GiB = 1024 * MiB

In [8]:
bitwidth = 8

In [11]:
fp32_model_accuracy = evaluate(model)

In [12]:
with torch.inference_mode():

    quantize = quantizer.KMeansQuantizer(model, bitwidth)
    quantized_model_size = utils.get_model_size(model, bitwidth)
    print(f"    {bitwidth}-bit k-means quantized model has size={quantized_model_size/MiB:.2f} MiB")
    quantized_model_accuracy = evaluate(model)
    print(f"    {bitwidth}-bit k-means quantized model has accuracy={quantized_model_accuracy:.2f}%")

    8-bit k-means quantized model has size=2.87 MiB
    8-bit k-means quantized model has accuracy=0.99%


In [15]:
best_sparse_checkpoint = dict()
best_sparse_accuracy = 0
accuracy_drop_threshold = 0.5
quantizers_before_finetune = copy.deepcopy(quantize)

for bitwidth in [8]:
    print(f'k-means quantizing model into {bitwidth} bits')
    with torch.inference_mode():

        quantize.apply(model, update_centroids=False)
    quantized_model_accuracy = evaluate(model)
    print(f"    {bitwidth}-bit k-means quantized model has accuracy={quantized_model_accuracy:.2f}% before quantization-aware training ")
    accuracy_drop = fp32_model_accuracy - quantized_model_accuracy
    if accuracy_drop > accuracy_drop_threshold:
        print(f"        Quantization-aware training due to accuracy drop={accuracy_drop:.2f}% is larger than threshold={accuracy_drop_threshold:.2f}%")
        num_finetune_epochs = 5
        best_accuracy = 0
        epoch = num_finetune_epochs
        while accuracy_drop > accuracy_drop_threshold and epoch > 0:
            fine_tune(model)
            with torch.inference_mode():

                quantize.apply(model, update_centroids=True)
            model_accuracy = evaluate(model)
            is_best = model_accuracy > best_accuracy
            best_accuracy = max(model_accuracy, best_accuracy)
            print(f'        Epoch {num_finetune_epochs-epoch} Accuracy {model_accuracy:.2f}% / Best Accuracy: {best_accuracy:.2f}%')
            accuracy_drop = fp32_model_accuracy - best_accuracy
            epoch -= 1
    else:
        print(f"        No need for quantization-aware training since accuracy drop={accuracy_drop:.2f}% is smaller than threshold={accuracy_drop_threshold:.2f}%")
     

k-means quantizing model into 8 bits
    8-bit k-means quantized model has accuracy=0.99% before quantization-aware training 
        No need for quantization-aware training since accuracy drop=0.00% is smaller than threshold=0.50%


: 