In [1]:
pip install -U ultralytics

Note: you may need to restart the kernel to use updated packages.


In [8]:
!pip install -U pycocotools



Collecting pycocotools
  Using cached pycocotools-2.0.6.tar.gz (24 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (pyproject.toml): started
  Building wheel for pycocotools (pyproject.toml): finished with status 'done'
  Created wheel for pycocotools: filename=pycocotools-2.0.6-cp311-cp311-win_amd64.whl size=75971 sha256=9aed2d4663ca8976f9595e87523a910c2f3f8da86579716155974de13c74fd41
  Stored in directory: c:\users\jedrz\appdata\local\packages\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\localcache\local\pip\cache\wheels\ad\ca\ea\fb115e04c841c3f71fd369b7d9805a43a5193f4f9251bed0ec
Successfully built pycocotools
Ins

In [1]:
!pip install scikit-learn
import torch
from IPython.display import Image  # for displaying images
import os 
import random
import shutil
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
from xml.dom import minidom
from tqdm import tqdm
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt

random.seed(108)

yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)



Using cache found in C:\Users\jedrz/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-6-16 Python-3.11.4 torch-2.0.1+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|█████████████████████████████████████████████████████████████████████████████| 14.1M/14.1M [00:11<00:00, 1.27MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [2]:
# Function to get the data from XML Annotation
def extract_info_from_xml(xml_file):
    root = ET.parse(xml_file).getroot()
    
    # Initialise the info dict 
    info_dict = {}
    info_dict['bboxes'] = []

    # Parse the XML Tree
    for elem in root:
        # Get the file name 
        if elem.tag == "filename":
            info_dict['filename'] = elem.text
            
        # Get the image size
        elif elem.tag == "size":
            image_size = []
            for subelem in elem:
                image_size.append(int(subelem.text))
            
            info_dict['image_size'] = tuple(image_size)
        
        # Get details of the bounding box 
        elif elem.tag == "object":
            bbox = {}
            for subelem in elem:
                if subelem.tag == "name":
                    bbox["class"] = subelem.text
                    
                elif subelem.tag == "bndbox":
                    for subsubelem in subelem:
                        bbox[subsubelem.tag] = int(float(subsubelem.text))         
            info_dict['bboxes'].append(bbox)
    
    return info_dict

In [3]:
print(extract_info_from_xml(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations\2007_000027.xml'))

{'bboxes': [{'class': 'person', 'xmin': 174, 'ymin': 101, 'xmax': 349, 'ymax': 351}], 'filename': '2007_000027.jpg', 'image_size': (486, 500, 3)}


In [4]:
# Dictionary that maps class names to IDs
class_name_to_id_mapping = {"person": 0,
                           "bird": 1,
                           "cat": 2,
                           "cow": 3,
                           "dog": 4,
                           "horse": 5,
                           "sheep": 6, 
                           "aeroplane": 7, 
                           "bicycle": 8,
                           "boat": 9,
                           "bus": 10,
                           "car": 11,
                           "motorbike": 12,
                           "train": 13,
                           "bottle": 14,
                           "chair": 15,
                           "diningtable": 16,
                           "pottedplant": 17,
                           "sofa": 18,
                           "tvmonitor": 19}

# Convert the info dict to the required yolo format and write it to disk
def convert_to_yolov5(info_dict):
    print_buffer = []
    
    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print(class_id)
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())
            
        
        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2 
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])
        
        # Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = info_dict["image_size"]  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h 
        
        #Write the bbox details to the file 
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))
        
    # Name of the file which we have to save 
    save_file_name = os.path.join(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations', info_dict["filename"].replace("jpg", "txt"))
    
    # Save the annotation to disk
    print("\n".join(print_buffer), file= open(save_file_name, "w"))

In [5]:
# Get the annotations
annotations = [os.path.join(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations', x) for x in os.listdir(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations') if x[-3:] == "xml"]
annotations.sort()

# Convert and save the annotations
for ann in tqdm(annotations):
    info_dict = extract_info_from_xml(ann)
    convert_to_yolov5(info_dict)

annotations = [os.path.join(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations', x) for x in os.listdir(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations') if x[-3:] == "txt"]


100%|███████████████████████████████████████████████████████████████████████████| 17125/17125 [02:06<00:00, 135.61it/s]


In [6]:
import random
from PIL import Image, ImageDraw
random.seed(0)
print(annotations)
class_id_to_name_mapping = dict(zip(class_name_to_id_mapping.values(), class_name_to_id_mapping.keys()))

def plot_bounding_box(image, annotation_list):
    annotations = np.array(annotation_list)
    w, h = image.size
    
    plotted_image = ImageDraw.Draw(image)

    transformed_annotations = np.copy(annotations)
    transformed_annotations[:,[1,3]] = annotations[:,[1,3]] * w
    transformed_annotations[:,[2,4]] = annotations[:,[2,4]] * h 
    
    transformed_annotations[:,1] = transformed_annotations[:,1] - (transformed_annotations[:,3] / 2)
    transformed_annotations[:,2] = transformed_annotations[:,2] - (transformed_annotations[:,4] / 2)
    transformed_annotations[:,3] = transformed_annotations[:,1] + transformed_annotations[:,3]
    transformed_annotations[:,4] = transformed_annotations[:,2] + transformed_annotations[:,4]
    
    for ann in transformed_annotations:
        obj_cls, x0, y0, x1, y1 = ann
        plotted_image.rectangle(((x0,y0), (x1,y1)))
        
        plotted_image.text((x0, y0 - 10), class_id_to_name_mapping[(int(obj_cls))])
    
    plt.imshow(np.array(image))
    plt.show()

print(annotations)
# Get any random annotation file 
annotation_file = random.choice(annotations)

with open(annotation_file, "r") as file:
    annotation_list = file.read().split("\n")[:-1]
    annotation_list = [x.split(" ") for x in annotation_list]
    annotation_list = [[float(y) for y in x ] for x in annotation_list]

#Get the corresponding image file
image_file = annotation_file.replace("annotations", "images").replace("txt", "jpg")
print(image_file + "dupa")
image_file = r'' + image_file

assert os.path.exists(image_file)

#Load the image

image = Image.open(image_file)
plot_bounding_box(image, annotation_list)

    

#Plot the Bounding Box


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



UnidentifiedImageError: cannot identify image file 'C:\\Users\\jedrz\\Desktop\\UM-proj\\VOCdevkit\\VOC2012\\Annotations\\2011_002554.jpg'

In [None]:
# Read images and annotations
images = [os.path.join(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\JPEGImages', x) for x in os.listdir(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\JPEGImages')]
annotations = [os.path.join(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations', x) for x in os.listdir(r'C:\Users\jedrz\Desktop\UM-proj\VOCdevkit\VOC2012\Annotations') if x[-3:] == "txt"]

images.sort()
annotations.sort()

# Split the dataset into train-valid-test splits 
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)

In [None]:
!mkdir images\train images\val images\test annotations\train annotations\val annotations\test

In [None]:
#Utility function to move images 
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False

# Move the splits into their folders
move_files_to_folder(train_images, 'images/train')
move_files_to_folder(val_images, 'images/val/')
move_files_to_folder(test_images, 'images/test/')
move_files_to_folder(train_annotations, 'annotations/train/')
move_files_to_folder(val_annotations, 'annotations/val/')
move_files_to_folder(test_annotations, 'annotations/test/')

In [None]:
!git clone https://github.com/ultralytics/yolov5


Cloning into 'yolov5'...


In [14]:
#!python val.py --weights yolov5s.pt --data VOC.yaml --task val --name yolo_det
!python val.py --weights yolov5s.pt --data VOC.yaml --img 640

[34m[1mval: [0mdata=C:\Users\jedrz\Desktop\UM-proj\yolo_model_metrics\yolov5\data\VOC.yaml, weights=['yolov5s.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs\val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5  v7.0-183-g878d9c8 Python-3.11.4 torch-2.0.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients

Dataset not found , missing paths ['C:\\Users\\jedrz\\Desktop\\UM-proj\\yolo_model_metrics\\datasets\\VOC\\images\\test2007']
Downloading https://github.com/ultralytics/yolov5/releases/download/v1.0/VOCtest_06-Nov-2007.zip to C:\Users\jedrz\Desktop\UM-proj\yolo_model_metrics\datasets\VOC\images\VOCtest_06-Nov-2007.zip...
Downloading https://github.com/ultralytics/yolov5/releases/download/v1.0/VOCtrainval_11-May-2012.zip to C:\Users\jedrz\Desktop\UM-pr