<a target="_blank" href="https://colab.research.google.com/drive/1kHdTl66zi2AmCGaBu5-9YcABOOPSc4Ji?usp=share_link">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
%%capture
! pip install pip==21.3.1
! pip install kaggle
! pip install clearml==1.9.3

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import os
from fastai.vision.all import get_image_files
from PIL import Image
from tqdm import tqdm
from glob import iglob
from collections import Counter
import clearml
from clearml import Dataset

## Download dataset from kaggle

In [None]:
%%shell
# upload kaggle API key and move it to proper location
mkdir -p ~/.kaggle
cp kaggle.json ~/.kaggle/
chmod 600 ~/.kaggle/kaggle.json
ls -l ~/.kaggle/kaggle.json

-rw------- 1 root root 68 Mar 15 19:43 /root/.kaggle/kaggle.json




In [None]:
%%shell
# download citypersons dataset from kaggle
kaggle datasets download -d muttahirulislam/citypersons-dataset-with-bg-image

Downloading citypersons-dataset-with-bg-image.zip to /content
100% 6.47G/6.48G [00:52<00:00, 131MB/s]
100% 6.48G/6.48G [00:52<00:00, 133MB/s]




In [None]:
# remove kaggle API key
! rm ~/.kaggle/kaggle.json

In [None]:
! unzip -q citypersons-dataset-with-bg-image.zip

In [None]:
def resize(
        src_dir: str,
        out_dir: str,
        width: int = 512,
        convert_to_jpg: bool = True,
        optimize: bool = True,
        quality: int = 90,
        fn_to_lower: bool = True,
    ):
        """Resize images in a directory in proportion to given width.

        Args:
            src_dir (str): Path to the source directory containing images.
            out_dir (str): Path to the destination directory for saving resized images.
                A new directory will be created if out_dir doesn't exist already.
            width (int, optional): Desired width of the resized image. Defaults to 512.
            convert_to_jpg (bool, optional): Convert to jpg format if true. Defaults to True.
            optimize (bool, optional): If true, do an extra pass on the image to find a way 
                to reduce its size as much as possible. Defaults to True.
            quality (int, optional): Quality used for storing the image. Defaults to 80.
            fn_to_lower (bool, optional): If true, convert the output filename into lowercase. Defaults to True.
        """
        os.makedirs(out_dir, exist_ok=True)
        fp_images= get_image_files(src_dir)
        for fp_img in tqdm(fp_images, total=len(fp_images)):
            suffix = fp_img.suffix
            fn = fp_img.name.replace(suffix, "")
            if fn_to_lower:
                fn = fn.lower()
                suffix = suffix.lower()
                
            img = Image.open(fp_img)
            _width, _height = img.size
            _scale = width / _width
            _new_width = int(_width * _scale)
            _new_height = int(_height * _scale)
            img_resized = img.resize((_new_width, _new_height))
            if convert_to_jpg:
                final_img = img_resized.convert("RGB")
                suffix = ".jpg"
            else:
                final_img = img_resized

            fp_output = os.path.join(out_dir, fn + suffix)
            final_img.save(fp=fp_output, optimize=optimize, quality=quality)

In [None]:
%%shell
mkdir -p citypersons/train/images
cp -r /content/yolo_dir/yolo_dir/train/labels citypersons/train/
mkdir -p citypersons/valid/images
cp -r /content/yolo_dir/yolo_dir/valid/labels citypersons/valid/



In [None]:
# resize train images
resize(src_dir="/content/yolo_dir/yolo_dir/train/images", 
       out_dir="/content/citypersons/train/images", 
       width=1024, 
       convert_to_jpg=False, 
       fn_to_lower=False)

100%|██████████| 2550/2550 [37:34<00:00,  1.13it/s]


In [None]:
# resize valid images
resize(src_dir="/content/yolo_dir/yolo_dir/valid/images", 
       out_dir="/content/citypersons/valid/images", 
       width=1024, 
       convert_to_jpg=False, 
       fn_to_lower=False)

100%|██████████| 451/451 [06:29<00:00,  1.16it/s]


## Custom YOLO dataset

### Images
Create a `dataset.yaml` file. Dataset config file defines 1) the dataset root directory `path` and relative paths to `train` / `val` / `test` image directories (or *.txt files with image paths) and 2) a class `names` dictionary

### Labels
One `*.txt` file per image (if no objects in image, no `*.txt` file is required).
Each row is `class x_center y_center width height` format. Box coordinates must be in **normalized xywh** format (from 0 - 1)

YOLO locates labels automatically for each image by replacing the last instance of `/images/` in each image path with `/labels/`

In [None]:
# train classes
classes = []
for fp in iglob("/content/citypersons/train/labels/*.txt"):
    lines = []
    with open(fp, 'r') as f:
        for l in f.readlines():
            classes.append(l[1])
            # strip whitespaces and rewrite the txt file
            lines.append(l.strip() + '\n')
    with open(fp, 'w') as f:
        f.writelines(lines)

print(Counter(classes))

Counter({'1': 16526, '0': 4476})


In [None]:
# valid classes
classes = []
for fp in iglob("/content/citypersons/valid/labels/*.txt"):
    lines = []
    with open(fp, 'r') as f:
        for l in f.readlines():
            classes.append(l[1])
            # strip whitespaces and rewrite the txt file
            lines.append(l.strip() + '\n')
    with open(fp, 'w') as f:
        f.writelines(lines)

print(Counter(classes))

Counter({'1': 3157, '0': 1007})


In [None]:
dataset_yaml = ( 
"""path: /content/citypersons  # dataset root dir
train: train/images  # train images (relative to 'path')
val: valid/images  # val images (relative to 'path')
test:  # test images (optional)

# Classes
names:
  0: rider
  1: pedestrian
""")

with open("citypersons/dataset.yaml", "w") as f:
    f.write(dataset_yaml)

In [None]:
%%shell
# compress the dataset
zip -r -9 -q citypersons.zip citypersons/
# copy the dataset to GDrive
cp citypersons.zip /content/drive/MyDrive/Reza/Projects/Pedestrian-Detection/data/



## Create a ClearML dataset for data versioning

In [6]:
clearml.browser_login()

<IPython.core.display.Javascript object>


🤖 ClearML connected successfully - let's build something! 🚀


In [9]:
dataset = Dataset.create(
    dataset_name="CityPersons",
    dataset_project="Pedestrian-Detection-YOLOv8",
    dataset_version="1.0",
)
dataset.add_files(path="/content/citypersons")
dataset.upload()
dataset.finalize()

ClearML results page: https://app.clear.ml/projects/35a40981371241af84af2dff03e4bc36/experiments/20b6552b86804f9e97adcf335838d10d/output/log
ClearML dataset page: https://app.clear.ml/datasets/simple/35a40981371241af84af2dff03e4bc36/experiments/20b6552b86804f9e97adcf335838d10d
Generating SHA2 hash for 5943 files


100%|██████████| 5943/5943 [00:08<00:00, 736.75it/s]


Hash generation completed
Uploading dataset changes (764 files compressed to 512.6 MiB) to https://files.clear.ml
Uploading dataset changes (763 files compressed to 512.12 MiB) to https://files.clear.ml
Uploading dataset changes (2216 files compressed to 429.72 MiB) to https://files.clear.ml
Uploading dataset changes (2200 files compressed to 426.32 MiB) to https://files.clear.ml
File compression and upload completed: total size 1.84 GiB, 4 chunk(s) stored (average size 470.19 MiB)


True