### 1.Organize Data

In [None]:
import os
import random
import shutil

#define paths
base_dir = 'Dataset'
raw_images_dir = os.path.join(base_dir, 'raw_images')
raw_labels_dir = os.path.join(base_dir, 'raw_labels')
images_dir = os.path.join(base_dir, 'images')
labels_dir = os.path.join(base_dir, 'labels')

#recheck target folders
for split in ['train', 'val']:
    os.makedirs(os.path.join(images_dir, split), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, split), exist_ok=True)

#list all images
image_files = [f for f in os.listdir(raw_images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

random.seed(42)
random.shuffle(image_files)
split_idx = int(len(image_files) * 0.8)
train_files = image_files[:split_idx]
val_files = image_files[split_idx:]

def move_files(file_list, img_dst, lbl_dst):
    for file in file_list:
        name, _ = os.path.splitext(file)
        img_src = os.path.join(raw_images_dir, file)
        lbl_src = os.path.join(raw_labels_dir, name + '.txt')

        if os.path.exists(img_src) and os.path.exists(lbl_src):
            shutil.copy(img_src, os.path.join(img_dst, file))
            shutil.copy(lbl_src, os.path.join(lbl_dst, name + '.txt'))

move_files(train_files, os.path.join(images_dir, 'train'), os.path.join(labels_dir, 'train'))
move_files(val_files, os.path.join(images_dir, 'val'), os.path.join(labels_dir, 'val'))

print (f"Total images: {len(image_files)}")
print (f"Train images: {len(train_files)}")
print (f"Val images: {len(val_files)}")

### 2. Create data.yaml file

In [None]:
data_yaml = """
path: C:/Users/handw/Documents/Portfolio-FTDS/YOLO-manga-bubble-detector/Dataset
train: images/train
val: images/val

names:
    0: bubble
    1: narration
    2: ui
"""

#save inside 'project/' directory
with open("data.yaml", "w") as f:
    f.write(data_yaml)

print ("Data.yaml saved")

### 4. Fix truncated Image

In [None]:
from PIL import Image, ImageFile
import os

ImageFile.LOAD_TRUNCATED_IMAGES = True  # allow truncated image loading

def reload_and_save_images(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            path = os.path.join(folder_path, filename)
            try:
                img = Image.open(path)
                img = img.convert("RGB")  # force full load
                img.save(path)  # overwrite with cleaned file
            except Exception as e:
                print(f"Skipping {filename}: {e}")

reload_and_save_images('Dataset/images/train')
reload_and_save_images('Dataset/images/val')

### 4. Train YOLOV8 Model

In [None]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')

results = model.train(
    data="data.yaml",
    epochs=50,
    imgsz=640
    )

metrics = results.results_dict
print ("Training Complete")
print (f"Best mAP@0.5: {metrics['metrics/mAP50']:.4f}")
print (f"Best mAP@0.5:50: {metrics['metrics/mAP50-95']:.4f}")
print (f"Best Precision: {metrics['metrics/Precision']:.4f}")
print (f"Best Recall: {metrics['metrics/Recall']:.4f}")

### Prediction test

| using train8 best.pt

In [None]:
from ultralytics import YOLO
from PIL import Image, ImageFile
import os

ImageFile.LOAD_TRUNCATED_IMAGES = True  # allow truncated image loading

def reload_and_save_images(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            path = os.path.join(folder_path, filename)
            try:
                img = Image.open(path)
                img = img.convert("RGB")  # force full load
                img.save(path)  # overwrite with cleaned file
            except Exception as e:
                print(f"Skipping {filename}: {e}")

reload_and_save_images('Dataset/test_set')

model = YOLO('runs/detect/train8/weights/best.pt')

results = model.predict('Dataset/test_set', save=True)