### 1. Dataset creation

Since no datasets existed, the idea was to create my own using blender and check if automatically generated dataset could be created from taking pictures of assets in Blender. Therefore multiple 3D object of pokémons have been downloaded and a script called blender_script has been coded to make multiple pictures of those pokémons creating the needed dataset.
The command to create picture is `blender --background --python "blender_script.py"` (from project root). Multiple datasets have been made (from dataset1 to dataset6) with small modifications to create the one that gives best results.

As proof of concept only 9 Pokémon models have been downloaded: Abra, Chansey, Eevee, Jigglypuff, Pidgeotto, Pidgey, Psyduck, Rattata, Vulpix.
Those Pokémons corresponds to the entities that can be encountered in route 8 in the game "Pokémon: Let's go, Eevee".

### 2. Data augmentation and COCO to YOLO conversion

To ensure there was enough data and to train the model more efficiently, datasets were subjected to data augmentation using albumentations. Three augmented version of each images are attempted but it may happen that no augmentation have been triggered resulting in an omission in this case. Those initially COCO format datasets were during reformatted in YOLO datasets containing an image with a 80/20 split ratio for training and validation
sets.


In [None]:
import os
import cv2
import json
import shutil
import yaml
import random
from tqdm import tqdm
from pathlib import Path
import albumentations as A

COCO_ANNOTS = "datasets/dataset6-annotations.json"
IMAGES_DIR = "datasets/dataset6-images"
YOLO_OUT = "datasets/dataset6-neg_bg"
YOLO_OUT_AUG = "datasets/dataset6-neg_bg_aug"

NUM_AUGS_PER_IMAGE = 3
TRAIN_RATIO = 0.8
SEED = 511

augmentations = [
    A.OneOf([
        A.GaussNoise(),
        A.ISONoise()
    ]),
    A.CoarseDropout(num_holes_range=(1, 3), fill=(123, 176, 84), p=0.25),
    A.RandomBrightnessContrast(),
    A.RandomGamma(),
    A.HueSaturationValue(),
    A.Blur(),
    A.MotionBlur(),
    A.ToGray(p=0.2),
    A.Affine(shear=(-10, 10), p=0.3),
    A.Affine(scale=(0.5, 1), p=0.3),
    A.ElasticTransform(p=0.2)
]

transform = A.Compose(
    augmentations,
    bbox_params=A.BboxParams(format='coco', label_fields=['category_ids'], min_visibility=0.05)
)

def coco_to_yolo(image_list, split_name, images_input_dir,
                images_output_dir, labels_output_dir,
                image_annots, category_mapping):

    for image_info in tqdm(image_list, desc=f"Converting {split_name} set"):
        image_id = image_info['id']
        image_filename = image_info['file_name']
        image_width = image_info['width']
        image_height = image_info['height']

        if os.path.isdir(images_input_dir):
            source_image_path = None
            for root, _, files in os.walk(images_input_dir):
                if image_filename in files:
                    source_image_path = os.path.join(root, image_filename)
                    break
        else:
            source_image_path = images_input_dir if os.path.basename(images_input_dir) == image_filename else None

        if not source_image_path or not os.path.exists(source_image_path):
            continue

        dest_image_path = os.path.join(images_output_dir, image_filename)
        shutil.copy2(source_image_path, dest_image_path)

        label_filename = os.path.splitext(image_filename)[0] + ".txt"
        label_path = os.path.join(labels_output_dir, label_filename)

        yolo_annotations = []
        if image_id in image_annots:
            for ann in image_annots[image_id]:
                coco_bbox = ann['bbox']
                x, y, w, h = coco_bbox

                # Convert to YOLO format
                center_x = (x + w/2) / image_width
                center_y = (y + h/2) / image_height
                norm_width = w / image_width
                norm_height = h / image_height

                coco_category_id = ann['category_id']
                yolo_class_id = category_mapping[coco_category_id]

                yolo_line = f"{yolo_class_id} {center_x:.6f} {center_y:.6f} {norm_width:.6f} {norm_height:.6f}"
                yolo_annotations.append(yolo_line)

        with open(label_path, 'w') as f:
            f.write('\n'.join(yolo_annotations))
            if yolo_annotations:
                f.write('\n')
    return

def data_aug_and_split(input_annot_file, input_images_dir, output_yolo_dir, output_yolo_aug_dir):
    random.seed(SEED)
    with open(input_annot_file, 'r') as f:
        coco_data = json.load(f)

    categories = coco_data['categories']
    category_mapping = {}
    class_names = []
    sorted_categories = sorted(categories, key=lambda x: x['id'])
    for idx, category in enumerate(sorted_categories):
        category_mapping[category['id']] = idx
        class_names.append(category['name'])

    # Split images into train and validation
    images_list = list(coco_data['images'])
    random.shuffle(images_list)
    train_count = int(len(images_list) * TRAIN_RATIO)
    train_images = images_list[:train_count]
    val_images = images_list[train_count:]

    image_id_to_annotations = {}
    for annot in coco_data['annotations']:
        image_id = annot['image_id']
        image_id_to_annotations.setdefault(image_id, []).append(annot)

    for yolo_dir in [output_yolo_dir, output_yolo_aug_dir]:
        os.makedirs(os.path.join(yolo_dir, "images", "train"), exist_ok=True)
        os.makedirs(os.path.join(yolo_dir, "images", "val"), exist_ok=True)
        os.makedirs(os.path.join(yolo_dir, "labels", "train"), exist_ok=True)
        os.makedirs(os.path.join(yolo_dir, "labels", "val"), exist_ok=True)

    # Process regular dataset
    print(f"Processing images without augmentation...")
    coco_to_yolo(train_images, "training regular", input_images_dir, 
                os.path.join(output_yolo_dir, "images", "train"),
                os.path.join(output_yolo_dir, "labels", "train"),
                image_id_to_annotations, category_mapping)

    coco_to_yolo(val_images, "validation regular", input_images_dir,
                os.path.join(output_yolo_dir, "images", "val"),
                os.path.join(output_yolo_dir, "labels", "val"),
                image_id_to_annotations, category_mapping)

    # Create dataset.yaml for regular dataset
    yaml_content = {
        'path': output_yolo_dir,
        'train': 'images/train',
        'val': 'images/val',
        'nc': len(class_names),
        'names': class_names
    }
    with open(os.path.join(output_yolo_dir, 'dataset.yaml'), 'w') as f:
        yaml.dump(yaml_content, f, default_flow_style=False)

    # Augment training images
    print(f"Processing images with augmentation...")
    for image in tqdm(train_images, desc="Augmenting training images"):
        if os.path.isdir(input_images_dir):
            img_path = None
            for root, _, files in os.walk(input_images_dir):
                if image['file_name'] in files:
                    img_path = os.path.join(root, image['file_name'])
                    break
        else:
            img_path = input_images_dir if os.path.basename(input_images_dir) == image['file_name'] else None

        image_id = image['id']
        annots = image_id_to_annotations.get(image_id, [])

        img = cv2.imread(img_path)
        if img is None:
            print(f"Couldn't read {img_path}")
            continue

        if annots:
            bboxes = [ann['bbox'] for ann in annots]
            category_ids = [ann['category_id'] for ann in annots]
        else:
            bboxes = []
            category_ids = []

        for i in range(NUM_AUGS_PER_IMAGE + 1):
            try:
                if i == 0:
                    aug_img = img.copy()
                    aug_bboxes = bboxes
                    aug_categories = category_ids
                else:
                    if bboxes:
                        augmented = transform(image=img, bboxes=bboxes, category_ids=category_ids)
                        aug_img = augmented['image']
                        aug_bboxes = augmented['bboxes']
                        aug_categories = augmented['category_ids']
                    else:
                        bg_transform = A.Compose(augmentations)
                        augmented = bg_transform(image=img)
                        aug_img = augmented['image']
                        aug_bboxes = []
                        aug_categories = []

                new_filename = f"{Path(image['file_name']).stem}_aug{i}.png"
                new_img_path = os.path.join(output_yolo_aug_dir, "images", "train", new_filename)
                cv2.imwrite(new_img_path, aug_img)

                label_filename = f"{Path(image['file_name']).stem}_aug{i}.txt"
                label_path = os.path.join(output_yolo_aug_dir, "labels", "train", label_filename)

                yolo_annotations = []
                for bbox, cat_id in zip(aug_bboxes, aug_categories):
                    x, y, w, h = bbox
                    center_x = (x + w/2) / aug_img.shape[1]
                    center_y = (y + h/2) / aug_img.shape[0]
                    norm_width = w / aug_img.shape[1]
                    norm_height = h / aug_img.shape[0]
                    yolo_class_id = category_mapping[cat_id]
                    yolo_line = f"{yolo_class_id} {center_x:.6f} {center_y:.6f} {norm_width:.6f} {norm_height:.6f}"
                    yolo_annotations.append(yolo_line)

                with open(label_path, 'w') as f:
                    f.write('\n'.join(yolo_annotations))
                    if yolo_annotations:
                        f.write('\n')

            except Exception as e:
                print(f"Error while Augmenting {img_path} aug{i}: {e}")

    print("Copying validation images to augmented dataset...")
    coco_to_yolo(val_images, "validation", input_images_dir,
                os.path.join(output_yolo_aug_dir, "images", "val"),
                os.path.join(output_yolo_aug_dir, "labels", "val"),
                image_id_to_annotations, category_mapping)

    # Create dataset.yaml for augmented dataset
    yaml_content_aug = {
        'path': output_yolo_aug_dir,
        'train': 'images/train',
        'val': 'images/val',
        'nc': len(class_names),
        'names': class_names
    }
    with open(os.path.join(output_yolo_aug_dir, 'dataset.yaml'), 'w') as f:
        yaml.dump(yaml_content_aug, f, default_flow_style=False)

# Process both datasets
if os.path.exists(COCO_ANNOTS) and os.path.exists(IMAGES_DIR):
    print("Processing dataset...")
    data_aug_and_split(COCO_ANNOTS, IMAGES_DIR, YOLO_OUT, YOLO_OUT_AUG)
    print("Dataset processing complete!")

### 3. Check for GPU usage

Since training was too low, it has been constated that the cpu was in fact used for training. Those few lines permit to be sure that the GPU is being used for training.

In [None]:
import torch

if torch.cuda.is_available():
    print(f"GPU {torch.cuda.get_device_name(0)} will be used for training.")
else:
    print("CPU will be used for training.")

While training different models one after the other without restarting the computer, the computer had tendency to crash during training. Those small functions should help preventing that by checking RAM usage and by emptying cache.

In [None]:
import torch
import psutil
import gc

def check_memory():
    memory = psutil.virtual_memory()
    print(f"Total RAM: {memory.total / (1024**3):.1f} GB")
    print(f"Available RAM: {memory.available / (1024**3):.1f} GB")
    print(f"Used RAM: {memory.used / (1024**3):.1f} GB ({memory.percent}%)")


def cleanup():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

check_memory()
cleanup()

### 4. Install and train model

The model that has been chosen is yolo11n. The best performing model has also been trained on yolo11s and an attempt has been made to use yolo11m but this model was too big for the ressources I have.

In [None]:
from ultralytics import YOLO
import torch
import os

IN_MODELS_DIR = "downloaded_models"
OUT_MODELS_DIR = "trained_models"

config = {
    "imgsz": 640,
    "epochs": 50,
    "batch": 16,
    "device": 0 if torch.cuda.is_available() else 'cpu',
    "save": True,
    "exist_ok": True,
    "verbose": True,
    "workers": 4,
    "cache": 'disk',
}

# Models to download
models_name = ["yolo11n", "yolo11s"]

# Download models
for model_name in models_name:
    model_path = f"{IN_MODELS_DIR}/{model_name}.pt"
    os.makedirs(f'{IN_MODELS_DIR}', exist_ok=True)
    model = YOLO(model_name)
    model.save(model_path)
    print(f"Downloaded {model_name} in {IN_MODELS_DIR}/")
    os.remove(f"{model_name}.pt")

# Define the training function
def train_model(model_name, dataset_name, dataset_path):
    print(f"Training {model_name}")
    results = None
    try:
        model = YOLO(f"{IN_MODELS_DIR}/{model_name}.pt")
        print(f"Loaded {model_name}.pt successfully.")
    except Exception as e:
        print(f"Failed to load {model_name}.pt: {e}")
        return

    print(f"Training on {dataset_name} dataset...")
    print(f"Dataset: {dataset_path}")

    project_name = OUT_MODELS_DIR
    run_name = f"{model_name}_{dataset_name}"
    results_summary = {}

    try:
        results = model.train(
            data=dataset_path,
            project=project_name,
            name=run_name,
            **config
        )

        if results:
            results_summary = {
                "model": model_name,
                "dataset": dataset_name,
                "project": project_name,
                "results": results,
                "status": "Completed"
            }
            print(f"{model_name} training completed on {dataset_name}")

        else:
            print(f"Training completed but no results returned")

    except Exception as e:
        error_msg = str(e)
        print(f"Training failed for {model_name} on {dataset_name}")
        print(f"Error: {error_msg}")

        results_summary = {
            "model":model_name,
            "dataset": dataset_name,
            "project": project_name,
            "results": None,
            "status": f"Failed: {error_msg}",
            "error": error_msg
        }
        return
    finally:
        summary = results_summary
        print(f"\n{model_name} using {dataset_name}:")
        print(f"   Model: {summary['model']}")
        print(f"   Dataset: {summary['dataset']}")
        print(f"   Status: {summary['status']}")
        print(f"   Project: {summary['project']}")
        print(f"   Status: {summary['status']}")
        print(f"   Error: {summary.get('error', 'No error')}")
        if hasattr(results, 'results_dict'):
            metrics = results.results_dict
            print(f"   Key Metrics:")
            for key, value in metrics.items():
                if isinstance(value, (int, float)):
                    print(f"      {key}: {value:.4f}")

        print("\nTraining completed!\n")

##### Model training

The model yolo11n have been retrained on multiple different datasets, searching for the most suitable one

In [None]:
check_memory()
train_model("yolo11n", "dataset6-neg_bg_aug", "datasets/dataset6-neg_bg_aug/dataset.yaml")
cleanup()

The best performant dataset on yolo11n has been used to retrained the bigger model yolo11s to examine the difference on performance with yolo11n model.

In [None]:
check_memory()
train_model("yolo11s", "dataset6bis-neg_bg_aug", "datasets/dataset6bis-neg_bg_aug/dataset.yaml")
cleanup()

The resulting model called `dataset6bis-neg_bg_aug` has been reused on 50 manually labeled images from the game "Pokémon: Let's go, Eevee" to create a last model called `Pokédetector.pt` powerful enough to detect Pokémon efficiently in the corresponding game. This resulting model can be used to create a database for training an AI model from scratch.

### 5. `Pokédetector` usage for Dataset Creation

In [None]:
import os
from glob import glob
from ultralytics import YOLO
import cv2

SRC_DIR = "final_dataset/original_images"
OUT_PROJECT = "final_dataset"
OUT_RUN_NAME = ""

IOU_THRESHOLD = 0.4
CONF_THRESHOLD = 0.7

os.makedirs(OUT_PROJECT, exist_ok=True)

model = YOLO("trained_models/last_model/pokedetector.pt")

def get_image_dimensions(image_path):
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Cannot open image: {image_path}")
    height, width = img.shape[:2]

    return width, height

image_files = sorted(glob(os.path.join(SRC_DIR, "*.*")))
for img_path in image_files:
    w, h = get_image_dimensions(img_path)
    model.predict(
        source=img_path,
        imgsz=(w, h),
        conf=CONF_THRESHOLD,
        iou=IOU_THRESHOLD,
        project=OUT_PROJECT,
        name=OUT_RUN_NAME,
        exist_ok=True,
        save=True,
        save_txt=True,
        verbose=False
    )