In [None]:
!pip install ultralytics
!pip install psutil

Collecting ultralytics
  Downloading ultralytics-8.2.54-py3-none-any.whl (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m368.6/800.1 kB[0m [31m11.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.1/800.1 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cache

In [None]:
import os
import xml.etree.ElementTree as ET
from PIL import Image
from torchvision.datasets import VOCDetection
from torchvision.transforms import ToPILImage, ToTensor
import shutil
import yaml
import time
import psutil
from ultralytics import YOLO
from sklearn.metrics import confusion_matrix

In [None]:
# Define class names for the PASCAL VOC dataset
class_names = [
    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
    "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}

def convert_voc_to_yolo(voc_dataset, output_dir):
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)
    os.makedirs(os.path.join(output_dir, 'images'))
    os.makedirs(os.path.join(output_dir, 'labels'))

    to_pil = ToPILImage()

    for idx, (img, target) in enumerate(voc_dataset):
        img = to_pil(img)  # Convert tensor to PIL image
        img_path = os.path.join(output_dir, 'images', f'{idx}.jpg')
        label_path = os.path.join(output_dir, 'labels', f'{idx}.txt')
        img.save(img_path)

        with open(label_path, 'w') as f:
            objects = target['annotation'].get('object', [])
            if not isinstance(objects, list):
                objects = [objects]
            for obj in objects:
                cls_name = obj['name']
                cls_idx = class_to_idx[cls_name]
                bbox = obj['bndbox']
                xmin = float(bbox['xmin'])
                ymin = float(bbox['ymin'])
                xmax = float(bbox['xmax'])
                ymax = float(bbox['ymax'])

                # Convert VOC bbox to YOLO format (x_center, y_center, width, height)
                x_center = (xmin + xmax) / 2.0
                y_center = (ymin + ymax) / 2.0
                width = xmax - xmin
                height = ymax - ymin
                img_width, img_height = img.size
                x_center /= img_width
                y_center /= img_height
                width /= img_width
                height /= img_height

                f.write(f'{cls_idx} {x_center} {y_center} {width} {height}\n')

transform = ToTensor()
voc_train = VOCDetection(root='/content/Dataset', year='2012', image_set='train', download=True, transform=transform)
voc_val = VOCDetection(root='/content/Dataset', year='2012', image_set='val', download=True, transform=transform)

convert_voc_to_yolo(voc_train, '/content/Dataset/voc_yolo/train')
convert_voc_to_yolo(voc_val, '/content/Dataset/voc_yolo/val')

Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to /content/Dataset/VOCtrainval_11-May-2012.tar


100%|██████████| 1999639040/1999639040 [02:46<00:00, 12002031.34it/s]


Extracting /content/Dataset/VOCtrainval_11-May-2012.tar to /content/Dataset
Using downloaded and verified file: /content/Dataset/VOCtrainval_11-May-2012.tar
Extracting /content/Dataset/VOCtrainval_11-May-2012.tar to /content/Dataset


In [None]:
data_config = {
    'path': '/content/Dataset/voc_yolo',
    'train': 'train/images',
    'val': 'val/images',
    'names': ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
              "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
}

# Save the configuration to a YAML file
with open('/content/Dataset/voc_yolo.yaml', 'w') as f:
    yaml.dump(data_config, f)

In [None]:
# Measure Training Time
start_time = time.time()

# Load the YOLOv8 model
model = YOLO('yolov8s.pt')  # Load a YOLOv8 model pre-trained on COCO

# Train the model using the YAML configuration file
model.train(data='/content/Dataset/voc_yolo.yaml', epochs=5)

training_time = time.time() - start_time
print(f'Training Time: {training_time} seconds')

# Save the trained model
model_path = '/content/model/yolov8_trained_model.pt'
model.save(model_path)

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 527MB/s]


Ultralytics YOLOv8.2.54 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (NVIDIA L4, 22700MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/content/Dataset/voc_yolo.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show

100%|██████████| 755k/755k [00:00<00:00, 114MB/s]


Overriding model.yaml nc=80 with nc=20

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytic

100%|██████████| 6.25M/6.25M [00:00<00:00, 303MB/s]
  return F.conv2d(input, weight, bias, self.stride,


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /content/Dataset/voc_yolo/train/labels... 5717 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5717/5717 [00:04<00:00, 1344.20it/s]


[34m[1mtrain: [0mNew cache created: /content/Dataset/voc_yolo/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
[34m[1mval: [0mScanning /content/Dataset/voc_yolo/val/labels... 5823 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5823/5823 [00:04<00:00, 1243.68it/s]


[34m[1mval: [0mNew cache created: /content/Dataset/voc_yolo/val/labels.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000417, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  return F.conv2d(input, weight, bias, self.stride,
        1/5      4.19G     0.9418      1.838      1.232         41        640: 100%|██████████| 358/358 [01:03<00:00,  5.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:35<00:00,  5.11it/s]


                   all       5823      15787      0.742      0.638      0.708       0.51

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5      4.19G     0.9734      1.352      1.255         26        640: 100%|██████████| 358/358 [00:58<00:00,  6.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:31<00:00,  5.80it/s]


                   all       5823      15787      0.703      0.585      0.647      0.444

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5      4.15G     0.9862      1.304      1.264         38        640: 100%|██████████| 358/358 [00:57<00:00,  6.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:31<00:00,  5.77it/s]


                   all       5823      15787      0.727      0.606      0.672      0.466

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5      4.28G     0.9463      1.203       1.25         34        640: 100%|██████████| 358/358 [00:56<00:00,  6.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:31<00:00,  5.77it/s]


                   all       5823      15787      0.759      0.641      0.718      0.514

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5      4.15G     0.8942      1.056      1.211         22        640: 100%|██████████| 358/358 [00:56<00:00,  6.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:31<00:00,  5.82it/s]


                   all       5823      15787      0.781      0.681      0.755      0.558

5 epochs completed in 0.129 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train/weights/best.pt, 22.5MB

Validating runs/detect/train/weights/best.pt...
Ultralytics YOLOv8.2.54 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (NVIDIA L4, 22700MiB)
Model summary (fused): 168 layers, 11133324 parameters, 0 gradients, 28.5 GFLOPs


  return F.conv2d(input, weight, bias, self.stride,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:35<00:00,  5.19it/s]


                   all       5823      15787       0.78      0.681      0.755      0.558
             aeroplane        348        484       0.85      0.744      0.813      0.624
               bicycle        290        380      0.818      0.679      0.767      0.565
                  bird        374        629       0.78      0.628      0.725      0.515
                  boat        252        491      0.701      0.493      0.592      0.378
                bottle        369        733      0.776      0.516      0.633      0.437
                   bus        211        320      0.887      0.775      0.862      0.724
                   car        608       1173      0.852      0.698      0.802       0.59
                   cat        544        618      0.743      0.869      0.856      0.671
                 chair        642       1449      0.701      0.523      0.618      0.441
                   cow        154        347        0.8      0.686      0.797      0.598
           diningtabl

In [None]:
# Inference on the validation set to get predictions
predictions = model.predict(source='/content/Dataset/voc_yolo/val/images', save=True, save_txt=True)

# Load ground truth labels and predicted labels
def load_labels(label_path):
    labels = {}
    for label_file in os.listdir(label_path):
        if label_file.endswith(".txt"):
            with open(os.path.join(label_path, label_file), 'r') as f:
                labels[label_file] = f.readlines()
    return labels

ground_truth_labels = load_labels('/content/Dataset/voc_yolo/val/labels')
predicted_labels = load_labels('/content/runs/detect/train2/labels')  # Adjust the path based on actual saved location

total_predictions = 0
incorrect_predictions = 0

for label_file in ground_truth_labels:
    if label_file in predicted_labels:
        gt_labels = ground_truth_labels[label_file]
        pred_labels = predicted_labels[label_file]

        for gt, pred in zip(gt_labels, pred_labels):
            total_predictions += 1
            gt_cls = int(gt.split()[0])
            pred_cls = int(pred.split()[0])
            if gt_cls != pred_cls:
                incorrect_predictions += 1

# Debugging: Check the values of total_predictions and incorrect_predictions
print(f"Total predictions: {total_predictions}")
print(f"Incorrect predictions: {incorrect_predictions}")

if total_predictions > 0:
    misclassification_rate = incorrect_predictions / total_predictions
    print(f'Misclassification Rate: {misclassification_rate * 100:.2f}%')
else:
    print("No predictions to calculate misclassification rate.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
image 830/5823 /content/Dataset/voc_yolo/val/images/1744.jpg: 448x640 2 boats, 8.6ms
image 831/5823 /content/Dataset/voc_yolo/val/images/1745.jpg: 480x640 2 boats, 1 car, 1 person, 8.5ms
image 832/5823 /content/Dataset/voc_yolo/val/images/1746.jpg: 480x640 2 sheeps, 7.8ms
image 833/5823 /content/Dataset/voc_yolo/val/images/1747.jpg: 640x448 2 chairs, 1 diningtable, 1 dog, 8.8ms
image 834/5823 /content/Dataset/voc_yolo/val/images/1748.jpg: 480x640 2 cars, 1 person, 8.4ms
image 835/5823 /content/Dataset/voc_yolo/val/images/1749.jpg: 480x640 4 chairs, 1 diningtable, 2 persons, 7.9ms
image 836/5823 /content/Dataset/voc_yolo/val/images/175.jpg: 480x640 1 bird, 7.8ms
image 837/5823 /content/Dataset/voc_yolo/val/images/1750.jpg: 480x640 5 cars, 1 person, 8.7ms
image 838/5823 /content/Dataset/voc_yolo/val/images/1751.jpg: 480x640 1 train, 9.0ms
image 839/5823 /content/Dataset/voc_yolo/val/images/1752.jpg: 640x480 1 bicycle, 4 per

In [None]:
results = model.val(data='/content/Dataset/voc_yolo.yaml')

inference_time = results.speed['inference']

# Print the inference time
print(f"Inference Time: {inference_time:.6f} seconds")

# Measure Memory Usage
process = psutil.Process(os.getpid())
memory_usage = process.memory_info().rss / (1024 * 1024)  # Convert to MB
print(f'Memory Usage: {memory_usage:.2f} MB')
# Measure Model Storage Consumption
model_storage_consumption = os.path.getsize(model_path) / (1024 * 1024)  # Convert to MB
print(f'Model Storage Consumption: {model_storage_consumption:.2f} MB')

Ultralytics YOLOv8.2.54 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (NVIDIA L4, 22700MiB)


[34m[1mval: [0mScanning /content/Dataset/voc_yolo/val/labels.cache... 5823 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5823/5823 [00:00<?, ?it/s]
  self.pid = os.fork()
  return F.conv2d(input, weight, bias, self.stride,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 364/364 [00:45<00:00,  7.92it/s]


                   all       5823      15787      0.781      0.682      0.755      0.558
             aeroplane        348        484      0.855      0.745      0.813      0.625
               bicycle        290        380      0.821      0.679      0.768      0.566
                  bird        374        629      0.782      0.628      0.725      0.515
                  boat        252        491      0.706      0.494      0.595       0.38
                bottle        369        733      0.776      0.516      0.632      0.436
                   bus        211        320      0.889      0.777      0.861      0.725
                   car        608       1173      0.853      0.698      0.802       0.59
                   cat        544        618      0.742      0.869      0.856       0.67
                 chair        642       1449        0.7      0.523      0.617      0.442
                   cow        154        347      0.805      0.689      0.797      0.599
           diningtabl