# Evaluation YoloV8n Ultralytics QAT (Quantization Aware Training)

## 1. Import the Library

In [10]:
import torch
import time
import pandas as pd
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.checks import cuda_is_available
from ultralytics.utils.benchmarks import benchmark
from pathlib import Path

device = 'cuda' if cuda_is_available() else 'cpu'

In [11]:
path = Path('/home/jeffrymahbuubi/Syringe-Detection/compression/ultralytics-qat-jeffry/run/detect/QAT_b16_e100_silu/weights/best.pt')

model = YOLO(path, task='detect')
benchmark(model=model, data='coco128.yaml', imgsz=640, half=False, device=device)

Setup complete ✅ (8 CPUs, 15.6 GB RAM, 98.6/250.9 GB disk)

Benchmarks complete for best.pt on coco128.yaml at imgsz=640 (511.04s)
                   Format Status❔  Size (MB)  metrics/mAP50-95(B)  Inference time (ms/im)     FPS
0                 PyTorch       ✅        6.2               0.7156                   48.19   20.75
1             TorchScript       ✅       12.5               0.7134                   11.54   86.68
2                    ONNX       ✅       12.2               0.7133                   17.15   58.31
3                OpenVINO       ❌        0.0                  NaN                     NaN     NaN
4                TensorRT       ✅       19.3               0.7132                    5.96  167.83
5                  CoreML       ❌        0.0                  NaN                     NaN     NaN
6   TensorFlow SavedModel       ✅       30.6               0.7134                   33.36   29.98
7     TensorFlow GraphDef       ✅       12.3               0.7134                   3

Unnamed: 0,Format,Status❔,Size (MB),metrics/mAP50-95(B),Inference time (ms/im),FPS
0,PyTorch,✅,6.2,0.7156,48.19,20.75
1,TorchScript,✅,12.5,0.7134,11.54,86.68
2,ONNX,✅,12.2,0.7133,17.15,58.31
3,OpenVINO,❌,0.0,,,
4,TensorRT,✅,19.3,0.7132,5.96,167.83
5,CoreML,❌,0.0,,,
6,TensorFlow SavedModel,✅,30.6,0.7134,33.36,29.98
7,TensorFlow GraphDef,✅,12.3,0.7134,32.0,31.25
8,TensorFlow Lite,❌,0.0,,,
9,TensorFlow Edge TPU,❌,0.0,,,


## 2. Utility Function

In [24]:
def get_base_path(model_name, load_weights=True):
    """
    Get the base path for the model directory and weights file.

    Args:
        model_name (str): The name of the model.
        load_weights (bool, optional): Whether to return the weights path. Default is True.

    Returns:
        tuple: Directory path and optionally weights path.
    """
    directory = f'../run/detect/{model_name}/'
    weights = f'{directory}weights/best.pt' if load_weights else None

    return (directory, weights) if load_weights else directory

def benchmark_inference_speed(model, img, imgsz=640, device='cpu', half=False, nwarmup=50, num_runs=1000, verbose=False):
    """
    Benchmark the inference speed of a model by averaging over multiple runs.

    Args:
        model (YOLO): The YOLO model to benchmark.
        img (str): The image URL or path to use for inference.
        imgsz (int, optional): The image size for the benchmark. Default is 640.
        device (str, optional): The device to run the benchmark on, either 'cpu' or 'cuda'. Default is 'cpu'.
        half (bool, optional): Use half-precision for the model if True. Default is False.
        nwarmup (int, optional): The number of warmup runs. Default is 50.
        num_runs (int, optional): The number of runs to average the inference time. Default is 1000.
        verbose (bool, optional): Print detailed logs if True. Default is False.

    Returns:
        float: The average inference time in milliseconds.
        float: The average FPS (Frames Per Second).
    """
    # List to store inference times
    inference_times = []

    # Warmup runs
    for _ in range(nwarmup):
        _ = model.predict(img, imgsz=imgsz, device=device, half=half, verbose=verbose)
    if device == 'cuda':
        torch.cuda.synchronize()

    # Timed runs
    for _ in range(num_runs):
        start_time = time.time()
        results = model.predict(img, imgsz=imgsz, device=device, half=half, verbose=verbose)
        if device == 'cuda':
            torch.cuda.synchronize()
        end_time = time.time()

        # Calculate inference time and append to the list
        inference_time = results[0].speed["inference"]
        inference_times.append(inference_time)

    # Calculate average inference time and FPS
    average_inference_time = np.mean(inference_times)
    average_fps = 1000 / average_inference_time

    if verbose:
        print(f"Average Inference Time: {average_inference_time:.2f} ms")
        print(f"Average FPS: {average_fps:.2f}")

    return average_inference_time, average_fps

def load_benchmark_results(models):
    """
    Load and display benchmark results for each model in the models dictionary.

    Args:
        models (dict): Dictionary containing model keys and names.

    Returns:
        pd.DataFrame: Combined benchmark results DataFrame.
    """
    all_results = []

    for model_key, model_name in models.items():
        save_dir = get_base_path(model_name, load_weights=False)
        csv_path = f'{save_dir}/benchmark_results.csv'
        
        try:
            df = pd.read_csv(csv_path)
            df['Model Key'] = model_key  # Add a column to identify which model the results belong to
            all_results.append(df)
            # Add a separator row
            separator_row = pd.DataFrame([['-'*10, '-'*10, '-'*10, '-'*10]], columns=df.columns)
            all_results.append(separator_row)
        except FileNotFoundError:
            print(f"File not found: {csv_path}")
        except Exception as e:
            print(f"Error loading {csv_path}: {e}")

    if all_results:
        combined_df = pd.concat(all_results, ignore_index=True)
        print(combined_df)
        return combined_df
    else:
        print("No benchmark results found.")
        return None

## 3. Benchmarking

### Validation to find mAP50-95

### Prediction to find _Latency(ms)_ and _FPS_ 

**Average Inference Speed**

The average inference speed (in milliseconds) is calculated by averaging the individual inference times.

$$\text{Average Inference Time (ms)} = \frac{1}{N} \sum_{i=1}^{N} t_i$$

where:
- \( N \) is the number of inference runs.
- \( t_i \) is the inference time for the \( i \)-th run.

**Frames Per Second (FPS)**

The FPS is calculated as the reciprocal of the average inference time (in seconds).

$$\text{FPS} = \frac{1000}{\text{Average Inference Time (ms)}}$$

Putting it together, we can calculate the FPS as:

$$\text{FPS} = \frac{1000}{\frac{1}{N} \sum_{i=1}^{N} t_i}$$

In [None]:
# Dictionary to store model formats and their corresponding weights
models = {
    'PyTorch': 'best.pt',
    'PyTorchQAT': 'best_qat.pt',
    'Engine': 'best_qat.engine'
}

# Base model name
# MODEL_NAME = 'QAT_b16_e200_q20_silu_rect_rotate_detection'
# MODEL_NAME = 'QAT_b16_e100_q40_silu_rect_syringe_detection'
# MODEL_NAME = 'QAT_b16_e100_q40_silu_rect_rubber_detection'
MODEL_NAME = 'QAT_b16_e100_q40_silu_rect_line_detection_50cc'

images = {
    'rotate': './datasets/rotate-detection-qat/valid/images/50cc_46ml_red_v2_8_jpg.rf.a5c8196aa94985df7158469594f775ef.jpg',
    'syringe': './datasets/syringe-detection-qat/train/images/50cc_46ml_red_v2_8_jpg.rf.7aa009fd59101708aa31cd85baece847.jpg',
    'rubber': './datasets/syringe-detection-qat/valid/images/50cc_46ml_red_v2_8_jpg.rf.d5e5c08395fafd01a273e1eaf70bc29c.jpg',
    'line': './datasets/line-detection-qat/cc50/valid/images/50cc_46ml_red_v2_10_jpg.rf.b4451a2f564a61f713934b3838683075.jpg'
}

PREDICT = 'rotate'
PREDICT = 'syringe'
PREDICT = 'rubber'
PREDICT = 'line'

# List to store benchmark results
benchmark_results = []

# Iterate through the dictionary and benchmark each model
for model_name, model_weights in models.items():
    print(f"\nBenchmarking {model_name} model...")

    # Get the base path for the current model
    save_dir, weights = get_base_path(MODEL_NAME, model_weights)

    # Initialize the model
    model = YOLO(weights)

    # Benchmark the model
    average_inference_time, average_fps = benchmark_inference_speed(model, img=images[PREDICT], device=device, nwarmup=50, num_runs=1000, verbose=True)

    # Append results to the list
    benchmark_results.append({
        'Model': model_name,
        'Average Inference Time (ms)': average_inference_time,
        'Average FPS': average_fps
    })

# Create a DataFrame from the benchmark results
df = pd.DataFrame(benchmark_results)

# Save the DataFrame to a CSV file in the directory
csv_path = f'{save_dir}/benchmark_results.csv'
df.to_csv(csv_path, index=False)

# Display the DataFrame as a table
print(df)

In [25]:
# Dictionary of model names
models = {
    'ROTATION_MODEL_NAME': 'QAT_b16_e200_q20_silu_rect_rotate_detection',
    'SYRINGE_MODEL_NAME': 'QAT_b16_e100_q40_silu_rect_syringe_detection',
    'RUBBER_MODEL_NAME': 'QAT_b16_e100_q40_silu_rect_rubber_detection',
    'LINE_MODEL_NAME': 'QAT_b16_e100_q40_silu_rect_line_detection_50cc'
}

# Load and display benchmark results
combined_benchmark_results = load_benchmark_results(models)

         Model Average Inference Time (ms) Average FPS            Model Key
0      PyTorch                   21.338333   46.864016  ROTATION_MODEL_NAME
1   PyTorchQAT                   25.643749   38.995857  ROTATION_MODEL_NAME
2       Engine                    9.198055  108.718642  ROTATION_MODEL_NAME
3   ----------                  ----------  ----------           ----------
4      PyTorch                    21.90941   45.642489   SYRINGE_MODEL_NAME
5   PyTorchQAT                   26.837237   37.261659   SYRINGE_MODEL_NAME
6       Engine                    9.735532  102.716523   SYRINGE_MODEL_NAME
7   ----------                  ----------  ----------           ----------
8      PyTorch                   21.154766   47.270673    RUBBER_MODEL_NAME
9   PyTorchQAT                    24.30437   41.144865    RUBBER_MODEL_NAME
10      Engine                   10.265786   97.410956    RUBBER_MODEL_NAME
11  ----------                  ----------  ----------           ----------
12     PyTor