# build_new_dataset.ipynb

In [None]:
ORIGINAL_IMAGE_DIR = '/home/aubrey/Desktop/Guam07-training-set/rawdata'
NEW_DATASET_DIR = '/home/aubrey/Desktop/Guam07-training-set/datasets/Guam07v4'
MODEL='/home/aubrey/Desktop/Guam07-training-set/datasets/Guam07v1/runs/detect/train5/weights/best.pt'

In [None]:
from ultralytics import YOLO
import numpy as np
from glob import iglob
import os
import subprocess

## Create data structure for the new dataset

In [None]:
# create destination folders
for dir in ['train', 'val', 'test']:
    os.makedirs(f'{NEW_DATASET_DIR}/{dir}')

# save data.yaml in top level folder
yaml = f'''{NEW_DATASET_DIR}
train: train
val: val
test: test
names:
  0: zero
  1: low
  2: medium
  3: high 
  4: fatal
  5: vcut'''

with open(f'{NEW_DATASET_DIR}/data.yaml', 'w') as f:
    f.write(yaml)
    
# save classes.txt in train, val and test folders
classes = f'''zero
low
medium
high
fatal
vcut'''

for dir in ['train', 'val', 'test']:
    with open(f'{NEW_DATASET_DIR}/{dir}/classes.txt', 'w') as f:
        f.write(classes)

## Populate train, val and test folders with symlinks to images

In [None]:
def random_subset(subset_list, probability_list):
    r = rng.multinomial(1, probability_list)
    i = np.where(r == 1)[0][0]
    return subset_list[i]
 
rng = np.random.default_rng(42)
subset_list = ['train', 'val', 'test']
probability_list = [0.8, 0.1, 0.1]

for imagepath in iglob(f'{ORIGINAL_IMAGE_DIR}/*.jpg'): 
    destination_folder = random_subset(subset_list, probability_list)
    src = imagepath
    dst = f'{NEW_DATASET_DIR}/{destination_folder}'
    subprocess.run(['ln', '-s', src, dst])

## Populate train, val and test folders with labels (*.txt files)

Reference https://stackoverflow.com/questions/76100975/yolov8-custom-save-directory-path


You can change the directory where the results are saved (save_dir) by modifying two arguments in predict: project and name
```
results = model.predict(source=xxx, save_txt = True, project="xxx", name="yyy")
```
such that:
```
save_dir=project/name
```


In [113]:
# Perform object detection on an image using the model
model = YOLO('/home/aubrey/Desktop/Guam07-training-set/code/runs/detect/imgsz9603/weights/best.pt')

destination_folder = 'val'
results = model(
    source=f'{NEW_DATASET_DIR}/{destination_folder}/*.jpg',
    imgsz=960,
    save_txt=True,
    project=NEW_DATASET_DIR,
    name=destination_folder,
    agnostic_nms=True,
    iou=0.5,
    )

print('FINISHED')



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/2632 /home/aubrey/Desktop/Guam07-training-set/datasets/Guam07v4/val/IMG_20221115_111527.jpg: 544x960 (no detections), 29.4ms
image 2/2632 /home/aubrey/Desktop/Guam07-training-set/datasets/Guam07v4/val/IMG_20221115_111549.jpg: 544x960 (no detections), 28.9ms
image 3/2632 /home/aubrey/Desktop/Guam07-training-set/datasets/Guam07v4/val/IMG_20221115_111620.jpg: 544x960 (no detections), 28.5ms
image 4/2632 /home/aubrey/Desktop/Guam07-training-set/datasets/Guam07v4/val/IMG_20221115_111634.jpg: 544x960 (no detections), 28.6ms
image 5/2632 /home/aubrey/Desktop/Guam07

In [112]:
results[0]

ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'zero', 1: 'low', 2: 'medium', 3: 'high', 4: 'fatal', 5: 'vcut'}
obb: None
orig_img: array([[[136, 107,  92],
        [153, 124, 109],
        [152, 124, 107],
        ...,
        [ 20,  20,  32],
        [ 18,  18,  30],
        [ 13,  13,  25]],

       [[107,  79,  62],
        [145, 117, 100],
        [161, 131, 112],
        ...,
        [ 18,  18,  30],
        [ 17,  17,  29],
        [ 14,  14,  26]],

       [[ 68,  38,  19],
        [124,  94,  75],
        [163, 132, 111],
        ...,
        [ 16,  16,  28],
        [ 16,  16,  28],
        [ 15,  15,  27]],

       ...,

       [[ 14,  27,  19],
        [ 16,  29,  21],
        [ 15,  28,  20],
        ...,
        [ 21,  19,  31],
        [ 21,  19,  31],
        [ 22,  20,  32]],

       [[ 20,  33,  25],
        [ 20,  33,  25],
        [ 17,  30,  22],
        ...,
        