## Config

In [None]:
from ultralytics import YOLO
import json
from collections import defaultdict
from pathlib import Path
import os
import numpy as np

from ultralytics.utils import TQDM
from ultralytics.utils.files import increment_path
from ultralytics.data.converter import merge_multi_segment
import zipfile
import os
import yaml
import shutil
import traceback




data_path = "/vol/bitbucket/ajm223/SWE_GP/data/"

## Converting Synth COCO zip file into a YOLO suitable dataset

In [None]:
def convert_coco(
    labels_dir="../coco/annotations/",
    save_dir="coco_converted/",
    json_file=None,
    use_segments=False,
    use_keypoints=False,
):
    """
    Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.

    Args:
        labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
        save_dir (str, optional): Path to directory to save results to.
        use_segments (bool, optional): Whether to include segmentation masks in the output.
        use_keypoints (bool, optional): Whether to include keypoint annotations in the output.

    Example:
        ```python
        from ultralytics.data.converter import convert_coco

        convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
        ```

    Output:
        Generates output files in the specified output directory.
    """

    # Create dataset directory
    if os.path.exists(save_dir):
        shutil.rmtree(save_dir)  # Delete the directory and all its contents

    os.makedirs(save_dir)  # Create the directory again
    save_dir = Path(save_dir)  # Convert to Path object

    for p in save_dir / "labels", save_dir / "images":
        p.mkdir(parents=True, exist_ok=True)  # make dir


    # Import json
    # for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
    fn = Path(save_dir) / "labels"  # folder name
    fn.mkdir(parents=True, exist_ok=True)
    with open(json_file) as f:
        data = json.load(f)

        
    # Create image dict
    images = {f'{x["id"]:d}': x for x in data["images"]}
    # Create image-annotations dict
    imgToAnns = defaultdict(list)
    for ann in data["annotations"]:
        imgToAnns[ann["image_id"]].append(ann)

    # Write labels file
    for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
        img = images[f"{img_id:d}"]
        h, w, f = img["height"], img["width"], img["file_name"]

        bboxes = []
        segments = []
        keypoints = []
        for ann in anns:
            if ann["iscrowd"]:
                continue
            # The COCO box format is [top left x, top left y, width, height]
            box = np.array(ann["bbox"], dtype=np.float64)
            box[:2] += box[2:] / 2  # xy top-left corner to center
            box[[0, 2]] /= w  # normalize x
            box[[1, 3]] /= h  # normalize y
            if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
                continue

            cls = ann["category_id"]  # class
            box = [cls] + box.tolist()
            if box not in bboxes:
                bboxes.append(box)
                if use_segments and ann.get("segmentation") is not None:
                    if len(ann["segmentation"]) == 0:
                        segments.append([])
                        continue
                    elif len(ann["segmentation"]) > 1:
                        s = merge_multi_segment(ann["segmentation"])
                        s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
                    else:
                        s = [j for i in ann["segmentation"] for j in i]  # all segments concatenated
                        s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
                    s = [cls] + s
                    if cls == None:
                        print(f"cls is none for: {img_id}")
                    segments.append(s)
                if use_keypoints and ann.get("keypoints") is not None:
                    keypoints.append(
                        box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist()
                    )

        # Write
        file_path = fn / f.split("/")[1]  # Constructs the full path
        file_path_with_suffix = file_path.with_suffix(".txt")  # Ensures the file has a .txt extension
        # Create the parent directories if they don't exist
        file_path_with_suffix.parent.mkdir(parents=True, exist_ok=True)   


        with open(file_path_with_suffix, "a") as file:
            for i in range(len(bboxes)):
                if use_keypoints:
                    line = (*(keypoints[i]),)  # cls, box, keypoints
                else:
                    line = (
                        *(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]),
                    )  # cls, box or segments
                if None not in line:
                    file.write(("%g " * len(line)).rstrip() % line + "\n")

In [None]:
new_ds_dir_name = "yoloDS6"

# create the new directory
new_ds_dir = data_path + new_ds_dir_name
os.makedirs(new_ds_dir, exist_ok=True)


# Assuming zip_file is the path to your zip file and data_path is your target directory
zip_name = "synth"
zip_file = data_path + zip_name + ".zip"
unzip_container =  data_path + new_ds_dir_name + "/"+ zip_name + "_dir"

# Create the target directory if it doesn't exist
os.makedirs(unzip_container, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(unzip_container)


# get all the json files in the unzipped directory and move them to the new one
for file in os.listdir(unzip_container):
    if file.endswith(".json"):
        os.rename(unzip_container + "/" + file, new_ds_dir + "/" + file)

# now we make da yaml
data = {
    "path": new_ds_dir_name,
    "train": "../" + new_ds_dir_name + "/train",
    "val": "../" + new_ds_dir_name + "/val",
    "test": "../" + new_ds_dir_name + "/test",
}


with open(f"{data_path}{new_ds_dir_name}/synth_train.json", "r") as f:
    js = json.load(f)



# # Update the class names in the data structure
data["names"] = {cat["id"]: cat["name"] for cat in js["categories"]}

# move the unzipped files to the new directory
yaml_file_path = f"{new_ds_dir}/{new_ds_dir_name}.yaml"
with open(yaml_file_path, 'w') as file:
    yaml.safe_dump(data, file, default_flow_style=False, sort_keys=False)


# here we create the yolo labels for each set using the corresponding jsons, remember, the synth dataset is the train set
convert_coco(labels_dir=new_ds_dir + "synth",save_dir=new_ds_dir + "/train", json_file= f"{new_ds_dir}/synth_train.json" , use_segments=True)
convert_coco(labels_dir=new_ds_dir + "val",save_dir=new_ds_dir + "/val", json_file= f"{new_ds_dir}/synth_val.json" , use_segments=True)
#convert_coco(labels_dir=new_ds_dir + "test",save_dir=new_ds_dir + "/test", json_file= f"{new_ds_dir}/test.json" , use_segments=True)

# now we need to move the images from the unzip container to the correct folder so they correspond to the labels

# for the train set, simply rename the entire synth images folder
train_images_dir = new_ds_dir + "/train/images"
if os.path.exists(train_images_dir) and os.listdir(train_images_dir):
    shutil.rmtree(train_images_dir)
os.rename(unzip_container + "/synth_train_images", train_images_dir)

# do the same for the synth val set
val_images_dir = new_ds_dir + "/val/images"
if os.path.exists(val_images_dir) and os.listdir(val_images_dir):
    shutil.rmtree(val_images_dir)
os.rename(unzip_container + "/synth_val_images", val_images_dir)

# we need to do the test set differently since the images arent isolated in their folder
#with open(f"{new_ds_dir}/test.json", "r") as f:
    #js = json.load(f)
    #for img in js["images"]:
        #os.rename(unzip_container + "/images/" + img["file_name"].split("/")[1], new_ds_dir + "/test/images/" + img["file_name"].split("/")[1])


# remove the unzipped container
shutil.rmtree(unzip_container)

## Grid Search and Hyperparameter Tuning


In [None]:
from ultralytics import YOLO
from sklearn.model_selection import ParameterGrid
import torch
import os
import json

# Define parameter grid
"""
grid = {
    'batch': [24],
    'device': [0],
    'project': ["segment"],
    'seed': [42],
    'rect': [False, True],
    'cos_lr': [False, True],
    'amp': [False, True],
    'fraction': [0.8, 1.0],
    'lr0': [0.01, 0.001, 0.0001],
    'lrf': [0.01, 1, 0.1],
    'momentum': [0.937, 0.949, 0.95],
    'weight_decay': [0.0005, 0.01, 0.1],
    'warmup_epochs': [0, 3],
    'overlap_mask': [True],
    'dropout': [0.0, 0.1, 0.2],
    'val': [True],
    'plots': [True],
    'epochs': [300],
    'patience': [10, 20, 30]
}
"""
grid = {
    'batch': [24],
    'device': [0],
    'project': ["segment"],
    'seed': [42],
    'rect': [False],
    'cos_lr': [False],
    'amp': [False],
    'fraction': [1.0],
    'lr0': [0.01],
    'lrf': [0.1],
    'momentum': [0.937],
    'weight_decay': [0.0005],
    'warmup_epochs': [3],
    'overlap_mask': [True],
    'dropout': [0.0],
    'val': [True],
    'plots': [True],
    'epochs': [200],
    'patience': [10]
}

ts = 6

best_model = None
best_val_loss = float('inf')
best_params = None

save_dir = "/vol/bitbucket/ajm223/SWE_GP/runs"
epoch = 0

for params in ParameterGrid(grid):
    print(f"Training with parameters: {params}")
    model = YOLO('yolov8n-seg.pt')
    results = model.train(
        data=f'/vol/bitbucket/ajm223/SWE_GP/data/yoloDS{ts}/yoloDS{ts}.yaml',
        epochs=params['epochs'],
        imgsz=1000,
        device=params['device'],
        batch=params['batch'],
        rect=params['rect'],
        cos_lr=params['cos_lr'],
        amp=params['amp'],
        fraction=params['fraction'],
        lr0=params['lr0'],
        lrf=params['lrf'],
        momentum=params['momentum'],
        weight_decay=params['weight_decay'],
        warmup_epochs=params['warmup_epochs'],
        overlap_mask=params['overlap_mask'],
        dropout=params['dropout'],
        val=params['val'],
        plots=params['plots']
    )
    val_loss = results[0]['val_loss']
    
    # Save the model into the runs directory
    model.save(os.path.join(save_dir, f'model_epoch_{epoch + 1}.pt'))
    epoch += 1
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model
        best_params = params
        

print(f"Best validation loss: {best_val_loss}")
print(f"Best model parameters: {best_params}")

# Save the best model
best_model.save('best_model.pt')

# Save the best model parameters
with open("best_model_params.json", "w") as f:
    json.dump(best_params, f)
    
