In [1]:
import torch
from ultralytics import YOLO
import copy

In [24]:
# Load the models
model1 = YOLO('yolov8m_1920_finetune.pt') # v1
model2 = YOLO('v8m_finetune.pt') # v2
model3 = YOLO('v8m_finetune_stages.pt') # v3
# Get state dictionaries
state_dict1 = model1.model.state_dict()
state_dict2 = model2.model.state_dict()
state_dict3 = model3.model.state_dict()

# Initialize an empty dictionary for the averaged state
averaged_state_dict = {}

# Average the weights
for key in state_dict1.keys():
    if key in state_dict2 and key in state_dict3:
        averaged_state_dict[key] = (state_dict2[key] + state_dict3[key]) / 2
        # print(averaged_state_dict[key])
    else:
        print("ERROR: KEY DOES NOT EXIST")

# Load the averaged state into a new model
averaged_model = copy.deepcopy(model1)
averaged_model.model.load_state_dict(averaged_state_dict)

# Save the averaged model
averaged_model.ckpt = {
    'model': averaged_model.model
}
averaged_model.save('./v8m_averaged_model.pt')

In [2]:
averaged_model = YOLO('v8m_averaged_model.pt')

In [3]:
input_path = "test_images/trucks.jpg"
results1 = averaged_model.predict(input_path, imgsz=(1088,1920), conf=0.1, agnostic_nms=False, iou=0.6, device=0, project="test_images", name="output", save=True, exist_ok=True)


image 1/1 /home/jupyter/brainhack-til-2025/til-25-main/cv/src/test_images/trucks.jpg: 960x1920 7 trucks, 1 bus, 139.1ms
Speed: 98.8ms preprocess, 139.1ms inference, 2077.7ms postprocess per image at shape (1, 3, 960, 1920)
Results saved to [1mtest_images/output[0m


In [4]:
averaged_model.export(format="engine", imgsz=(1088, 1920), device=0, half=True, nms=False)

Ultralytics 8.3.131 🚀 Python-3.12.10 torch-2.7.0+cu126 CUDA:0 (Tesla T4, 14918MiB)

[34m[1mPyTorch:[0m starting from 'v8m_averaged_model.pt' with input shape (1, 3, 1088, 1920) BCHW and output shape(s) (1, 22, 42840) (49.9 MB)

[34m[1mONNX:[0m starting export with onnx 1.18.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.53...
[34m[1mONNX:[0m export success ✅ 6.9s, saved as 'v8m_averaged_model.onnx' (99.5 MB)

[34m[1mTensorRT:[0m starting export with TensorRT 10.10.0.31...
[06/10/2025-09:07:09] [TRT] [I] [MemUsageChange] Init CUDA: CPU -2, GPU +0, now: CPU 1783, GPU 950 (MiB)
[06/10/2025-09:07:35] [TRT] [I] [MemUsageChange] Init builder kernel library: CPU +541, GPU +2, now: CPU 2123, GPU 952 (MiB)
[06/10/2025-09:07:35] [TRT] [I] ----------------------------------------------------------------
[06/10/2025-09:07:35] [TRT] [I] Input filename:   v8m_averaged_model.onnx
[06/10/2025-09:07:35] [TRT] [I] ONNX IR version:  0.0.9
[06/10/2025-09:07:35] [TRT] [I] Opset ver

'v8m_averaged_model.engine'

In [2]:
# Load the models
model1 = YOLO('singlemodel/v8l_f7_fixed.pt')
model2 = YOLO('singlemodel/v8l_ft_5e.pt')
# Get state dictionaries
state_dict1 = model1.model.state_dict()
state_dict2 = model2.model.state_dict()

# Initialize an empty dictionary for the averaged state
averaged_state_dict = {}

# Average the weights
for key in state_dict1.keys():
    if key in state_dict2:
        averaged_state_dict[key] = (state_dict1[key] + state_dict2[key]) / 2
        # print(averaged_state_dict[key])
    else:
        print("ERROR: KEY DOES NOT EXIST")

# Load the averaged state into a new model
averaged_model = copy.deepcopy(model1)
averaged_model.model.load_state_dict(averaged_state_dict)

# Save the averaged model
averaged_model.ckpt = {
    'model': averaged_model.model
}
averaged_model.save('./v8l_averaged_model.pt')

In [2]:
averaged_model = YOLO('v8l_averaged_model.pt')

In [8]:
input_path = "test_images/homemade_test_img2.jpg"
results1 = averaged_model.predict(input_path, imgsz=(1088,1920), conf=0.1, agnostic_nms=False, iou=0.6, device=0, project="test_images", name="output", save=True, exist_ok=True)


image 1/1 /home/jupyter/brainhack-til-2025/til-25-main/cv/src/test_images/homemade_test_img2.jpg: 1088x1920 2 commercial aircrafts, 2 drones, 2 fighter jets, 1 fighter plane, 1 helicopter, 2 light aircrafts, 1 missile, 1 tank, 1 van, 2 cargo ships, 2 warships, 294.2ms
Speed: 15.8ms preprocess, 294.2ms inference, 2.1ms postprocess per image at shape (1, 3, 1088, 1920)
Results saved to [1mtest_images/output[0m


In [3]:
averaged_model.export(format="onnx", imgsz=(1088, 1920), device=0, half=True, nms=False)

Ultralytics 8.3.131 🚀 Python-3.12.10 torch-2.7.0+cu126 CUDA:0 (Tesla T4, 14918MiB)
Model summary (fused): 112 layers, 43,620,486 parameters, 0 gradients, 164.9 GFLOPs

[34m[1mPyTorch:[0m starting from 'v8l_averaged_model.pt' with input shape (1, 3, 1088, 1920) BCHW and output shape(s) (1, 22, 42840) (83.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.18.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.53...
[34m[1mONNX:[0m export success ✅ 7.0s, saved as 'v8l_averaged_model.onnx' (83.7 MB)

Export complete (8.9s)
Results saved to [1m/home/jupyter/brainhack-til-2025/til-25-main/cv/src[0m
Predict:         yolo predict task=detect model=v8l_averaged_model.onnx imgsz=1088,1920 half 
Visualize:       https://netron.app


'v8l_averaged_model.onnx'