# File used to create the datasets. 

Basically, to use Ultralytics's YOLO we need to add the datasets (divided into train/test/valid, with each one having two folders: images and labels) inside the folder indicated in the file usually found in the path $HOME/.config/Ultralytics/settings.yaml". Then, we also need a yaml defining the path of the dataset, the number of classes etc. in the root of this folder. 

The point of this notebook is to automate the process of: taking the dataset, adding it the wanted noise, using the wanted model to denoising, and the save this "reconstructed dataset" in this format (i.e. in the correct folder), and then create the correct yaml to describe it, so that we can use YOLO on this reconstructed folder.

See for example "african_wildlife.yaml" or "wildlife_bernoulli_0.1_v0.yaml".

In [5]:
import yaml
from torch.utils.data import DataLoader
from DataLoader import (
    AfricanWildlifeDataset,
    bernoulli_noise_transform,
    gaussian_noise_transform,
)
from DenoisingAE import DenoisingAE, DenoisingAEV1, DenoisingAEV2
from DenoisingResnet import DenoisingResnet
import torch
import gc
import torchvision
import yaml
import gin
import os
from itertools import product
import shutil
import numpy as np
import matplotlib.image
import cv2

checkpoints = {
    "gaussian": {
        0.1: {
            "v0": "denoising_checkpoints/gaussian/0.1/v0/best.ckpt",
            "v1": "denoising_checkpoints/gaussian/0.1/v1/best.ckpt",
            "v2": "denoising_checkpoints/gaussian/0.1/v2/best.ckpt",
        },
        0.2: {
            "v0": "denoising_checkpoints/gaussian/0.2/v0/best.ckpt",
            "v1": "denoising_checkpoints/gaussian/0.2/v1/best.ckpt",
            "v2": "denoising_checkpoints/gaussian/0.2/v2/best.ckpt",
        },
    },
    "bernoulli": {
        0.1: {
            "v0": "denoising_checkpoints/bernoulli/0.1/v0/best.ckpt",
            "v1": "denoising_checkpoints/bernoulli/0.1/v1/best.ckpt",
            "v2": "denoising_checkpoints/bernoulli/0.1/v2/best.ckpt",
        },
        0.3: {
            "v0": "denoising_checkpoints/bernoulli/0.3/v0/best.ckpt",
            "v1": "denoising_checkpoints/bernoulli/0.3/v1/best.ckpt",
            "v2": "denoising_checkpoints/bernoulli/0.3/v2/best.ckpt",
        },
    },
}
torch.cuda.empty_cache()
gc.collect()

gin.parse_config_file("config.cfg")

ParsedConfigFileIncludesAndImports(filename='config.cfg', imports=[], includes=[])

WARNING: if you have already yolo or you just download it, it won't find the datasets in this project unless you set the 'datasets_dir' param in $Home/.config/Ultralytics/settings.yaml correctly (to the datasets folder in this project). 

In [20]:
## You can do the above process by setting the following variable to True.
set_correct_dataset_folder = False

In [21]:
from pathlib import Path
import os

if set_correct_dataset_folder:
    y = None
    with open(f"{Path.home()}/.config/Ultralytics/settings.yaml", "r") as stream:
        this_dataset_folder = os.path.join(os.getcwd(), "datasets")
        try:
            y = yaml.safe_load(stream)
            print("Before: ", y)
            y["datasets_dir"] = this_dataset_folder
            print("After: ", y)

        except yaml.YAMLError as exc:
            print(exc)

    with open(f"{Path.home()}/.config/Ultralytics/settings.yaml", "w") as f:
        yaml.dump(y, f)

In [18]:
# Helper to copy the images in the correct folder
def copy_images(dataset, model, noise_type, noise):
    for index in range(len(dataset)):
        img, _ = dataset[index]
        shape = dataset.image_dimensions[index]
        original_name = dataset.list_dir[index]

        t = torchvision.transforms.Resize((shape[1], shape[2]))

        # Clean and returning to the original size
        cleanedv0 = t(model(img.to("cuda")).detach().cpu())
        # Transpose the axes to make it WHC and reconvert it to a [0,...,255] image
        normalized = np.transpose((cleanedv0 * 255).numpy(), [1, 2, 0]).astype(
            dtype=np.uint8
        )
        matplotlib.image.imsave(
            f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/{dataset.kind}/images/{original_name}",
            normalized,
        )


noises = {"gaussian": [0.1, 0.2], "bernoulli": [0.1, 0.3]}

# Create the datasets, for all noises and denoising models.
# NOTE: it won't work if the datasets are already present
for noise_type in ["gaussian", "bernoulli"]:
    for noise in noises[noise_type]:
        if noise_type == "gaussian":
            noise_transform = gaussian_noise_transform(0, noise, width=640)
        else:
            noise_transform = bernoulli_noise_transform(noise, width=640)
        modelv2 = DenoisingAEV2.load_from_checkpoint(
            checkpoints[noise_type][noise]["v2"]
        )
        modelv1 = DenoisingAEV1.load_from_checkpoint(
            checkpoints[noise_type][noise]["v1"]
        )
        modelv0 = DenoisingAE.load_from_checkpoint(checkpoints[noise_type][noise]["v0"])
        for model in [modelv0, modelv1, modelv2]:
            train_dataset = AfricanWildlifeDataset(
                kind="train", transform=noise_transform
            )
            val_dataset = AfricanWildlifeDataset(
                kind="valid", transform=noise_transform
            )
            test_dataset = AfricanWildlifeDataset(
                kind="test", transform=noise_transform
            )
            dataset_yaml = dict(
                {
                    "path": f"../datasets/wildlife_{noise_type}_{noise}_{model.kind}",
                    "train": "train/images",
                    "val": "valid/images",
                    "test": "test/images",
                    "names": {0: "buffalo", 1: "elephant", 2: "rhino", 3: "zebra"},
                }
            )
            with open(f"./wildlife_{noise_type}_{noise}_{model.kind}.yaml", "w") as f:
                yaml.dump(dataset_yaml, f)
            os.mkdir(f"datasets/wildlife_{noise_type}_{noise}_{model.kind}")

            os.mkdir(f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/train")
            os.mkdir(
                f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/train/images"
            )
            # we just copy the labels because we have kept the right image dimensions
            shutil.copytree(
                "datasets/wildlife/train/labels",
                f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/train/labels",
            )
            os.mkdir(f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/test")
            os.mkdir(f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/test/images")
            # we just copy the labels because we have kept the right image dimensions
            shutil.copytree(
                "datasets/wildlife/test/labels",
                f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/test/labels",
            )
            os.mkdir(f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/valid")
            os.mkdir(
                f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/valid/images"
            )
            # we just copy the labels because we have kept the right image dimensions
            shutil.copytree(
                "datasets/wildlife/valid/labels",
                f"datasets/wildlife_{noise_type}_{noise}_{model.kind}/valid/labels",
            )
            copy_images(train_dataset, model, noise_type, noise)
            copy_images(test_dataset, model, noise_type, noise)
            copy_images(val_dataset, model, noise_type, noise)

FileExistsError: [Errno 17] File exists: 'datasets/wildlife_gaussian_0.1_v0'