In [12]:
import os
from sklearn.model_selection import train_test_split
import yaml
import xml.etree.ElementTree as ET
import xml2yolo
import shutil

Annotate dataset with labels and filter out images with labels others than the four we are interested in.
The parsed dataset are saved to ./dataset

In [13]:
from shutil import copyfile


with open("env.yaml", 'r') as stream:
    env_yaml = yaml.safe_load(stream)

path_rdd2020 = env_yaml["PATHRD2022"]
path_nor_images = path_rdd2020 + "/Norway/train/images"
path_nor_annotations = path_rdd2020 + "/Norway/train/annotations/xmls"

wdir = env_yaml["WDIR"]
path_parsed_annotations = wdir + r"dataset/Annotations"
path_parsed_images = wdir + r"dataset/JPEGImages"
labels = ["D00", "D10", "D20", "D40"]

images = os.listdir(path_nor_images)
annotations = os.listdir(path_nor_annotations)

try:
    os.makedirs("./dataset", exist_ok=False)
    os.makedirs("./dataset/Annotations", exist_ok=True)
    os.makedirs("./dataset/JPEGImages", exist_ok=True)

    for annotation in annotations:
        tree = ET.parse(os.path.join(path_nor_annotations, annotation))
        root = tree.getroot()
        
        for obj in root.findall("object"):
            lb = obj.find("name").text
            if lb not in labels:
                root.remove(obj)
            
        if len(root.findall("object")) > 0:
            out = os.path.join(path_parsed_annotations, annotation)
            tree.write(out)
            jpeg_name = annotation.split(".")[0] + ".jpg"
            copyfile(os.path.join(path_nor_images, jpeg_name), os.path.join(path_parsed_images, jpeg_name))
except:
    print("Dataset folder already exists. Delete folder if you want to recreate it.")


Dataset folder already exists. Delete folder if you want to recreate it.


Create a file with the paths to all the annotated images. 

In [None]:
with open("./all_xml_annotations.txt", "w") as f:
    parsed_annotations = os.listdir(path_parsed_images)
    for annotation in parsed_annotations:
        f.writelines(os.path.join(path_parsed_annotations, annotation)+ "\n")


Run xml2yolo.py to convert from our format to YOLO format. 
Here we use a snippet from one of the contestants in 2018.
See https://github.com/dweeptrivedi/road-damage-detection/blob/master/examples/xml2Yolo_sample.txt

This created the folder labels under dataset.

In [None]:
xml2yolo.main('all_xml_annotations.txt', 'labels.txt')


In [None]:
shutil.copytree('./dataset/labels/', './dataset/JPEGImages/', dirs_exist_ok=True)

Split our images into 80% training and 20% validation.
Write the file paths of the split images to files.

In [None]:
# Split into train validate sets
parsed_images = os.listdir(path_parsed_images)
ds_train, ds_val = train_test_split(parsed_images, test_size=0.2)

with open(wdir+"no-train.txt", "w") as f:
    for t in ds_train:
        f.writelines(os.path.join(path_parsed_images, t) + "\n")

with open(wdir+"no-val.txt", "w") as f:
    for t in ds_val:
        f.writelines(os.path.join(path_parsed_images, t) + "\n")

Run yolov7 train.py

In [None]:
!python /path/to/yolov7/train.py --workers 2 --device 0 --batch-size 7 --cache --data ./data.yaml --img 640 640 --weights yolov7.pt --name yolov7 --save_period 5

After this is done a new folder is created at yolov7/runs/exp or something similar
Run the command below on the test set with the created weights to get predictions

Use --nosave to skip saving the images

In [None]:
!python /path/to/yolov7/detect.py --source /path/to/RDD2022/RDD2022/RDD2022_all_countries/Norway/test/images --weights /path/to/yolov7/runs/exp/best.pt --img-size 640 640 --conf-thres 0.35 --augment --save-txt

This creates prediction files in yolov7/runs/detect/exp
run convert_detect_format.py to parse the results into a submittable format

This creates the output submission.csv with prediction boxes for each image in the test folder

In [None]:
import convert_detect_format

y7 = env_yaml["YOLOV7PATH"]
test_images = path_rdd2020 + "/Norway/test/images"
detect_folder = y7 + "runs/detect/exp3/labels"
convert_detect_format.run(detect_folder, test_images)