In [16]:
import torch
from IPython.display import Image  # for displaying images
import os 
import random
import xml.etree.ElementTree as ET
from xml.dom import minidom
from tqdm import tqdm
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt

In [17]:
def extract_info_from_xml(xml_file):
    root = ET.parse(xml_file).getroot()
    
    # Initialise the info dict 
    info_dict = {}
    info_dict['bboxes'] = []

    # Parse the XML Tree
    for elem in root:
        # Get the file name 
        if elem.tag == "filename":
            info_dict['filename'] = elem.text
            
        # Get the image size
        elif elem.tag == "size":
            image_size = []
            for subelem in elem:
                image_size.append(int(subelem.text))
            
            info_dict['image_size'] = tuple(image_size)
        
        # Get details of the bounding box 
        elif elem.tag == "object":
            bbox = {}
            for subelem in elem:
                if subelem.tag == "name":
                    bbox["class"] = subelem.text
                    
                elif subelem.tag == "bndbox":
                    for subsubelem in subelem:
                        bbox[subsubelem.tag] = int(subsubelem.text)            
            info_dict['bboxes'].append(bbox)
    
    return info_dict

In [18]:
class_name_to_id_mapping = { "defeat": 0, "victory": 1 }

In [19]:
def convert_to_yolov5(info_dict):
    print_buffer = []
    
    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())
        
        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2 
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])
        
        # Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = info_dict["image_size"]  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h 
        
        #Write the bbox details to the file 
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))
        
    # Name of the file which we have to save 
    save_file_name = os.path.join("./dataset/annotations", info_dict["filename"].replace("PNG", "txt"))
    
    # Save the annotation to disk
    print("\n".join(print_buffer), file= open(save_file_name, "w"))

In [20]:
annotations = [ os.path.join('./dataset/labels', x) for x in os.listdir('./dataset/labels') if x[-3:] == "xml" ]
annotations.sort()

for ann in tqdm(annotations):
    info_dict = extract_info_from_xml(ann)
    convert_to_yolov5(info_dict)

FileNotFoundError: [Errno 2] No such file or directory: './dataset/labels'

In [1]:
!mkdir ./dataset/images/train ./dataset/images/val ./dataset/images/test ./dataset/annotations/train ./dataset/annotations/val ./dataset/annotations/test

mkdir: ./dataset/annotations/train: File exists
mkdir: ./dataset/annotations/val: File exists
mkdir: ./dataset/annotations/test: File exists


In [5]:
from sklearn.model_selection import train_test_split
import shutil

def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False

# Read images and annotations
images = [ os.path.join('./dataset/images', x) for x in os.listdir('./dataset/images') if x[-3:] == "PNG" ]
annotations = [os.path.join('./dataset/annotations', x) for x in os.listdir('./dataset/annotations') if x[-3:] == "txt" ]

images.sort()
annotations.sort()

# Split the dataset into train-valid-test splits 
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)

move_files_to_folder(train_images, './dataset/images/train')
move_files_to_folder(val_images, './dataset/images/val/')
move_files_to_folder(test_images, './dataset/images/test/')
move_files_to_folder(train_annotations, './dataset/annotations/train/')
move_files_to_folder(val_annotations, './dataset/annotations/val/')
move_files_to_folder(test_annotations, './dataset/annotations/test/')

In [6]:
!mv ./dataset/annotations ./dataset/labels

In [None]:
!python3 train.py --img 640 --cfg yolov5s.yaml --hyp hyp.scratch-low.yaml --batch 32 --epochs 100 --data batch.yaml --weights ' ' --workers 24 --name res

/usr/local/bin/python3: can't open file '/Users/admin/Documents/stats-image/train.py': [Errno 2] No such file or directory
