In [1]:
# Install required packages
!pip install ultralytics pyyaml split-folders --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━

In [2]:
import os
import xml.etree.ElementTree as ET
import shutil
import splitfolders
from IPython.display import FileLink
from ultralytics import YOLO
import yaml

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# Define class mapping
CLASS_MAPPING = {
    'Horse': 0,
    'Serpent': 1,
    'Bird': 2,
    'Boar': 3,
    'Monkey': 4,
    'Ox': 5,
    'Ram': 6,
    'Tiger': 7,
    'Dog': 8,
    'Dragon': 9,
    'Hare': 10,
    'Rat': 11
}

# Create YOLO directory structure
dataset_dir = '/kaggle/working/dataset'
os.makedirs(f'{dataset_dir}/images', exist_ok=True)
os.makedirs(f'{dataset_dir}/labels', exist_ok=True)

In [4]:
def convert_xml_to_yolo(xml_path, output_dir):
    """Convert PASCAL VOC XML to YOLO format txt files"""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Get image dimensions
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    # Create label file
    txt_file = os.path.join(output_dir, os.path.splitext(os.path.basename(xml_path))[0] + '.txt')
    
    with open(txt_file, 'w') as f:
        for obj in root.iter('object'):
            class_name = obj.find('name').text
            if class_name not in CLASS_MAPPING:
                continue  # Skip unknown classes
                
            cls = CLASS_MAPPING[class_name]
            
            # Get bounding box coordinates
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            
            # Convert to YOLO format (center x, center y, width, height)
            x_center = ((xmin + xmax) / 2) / width
            y_center = ((ymin + ymax) / 2) / height
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height
            
            f.write(f"{cls} {x_center} {y_center} {w} {h}\n")

In [5]:
# Process all images and annotations
input_dir = '/kaggle/input/naruto-shity-data/Images'

for filename in os.listdir(input_dir):
    if filename.endswith('.xml'):
        xml_path = os.path.join(input_dir, filename)
        img_name = os.path.splitext(filename)[0] + '.jpg'
        img_path = os.path.join(input_dir, img_name)
        
        # Copy image if exists
        if os.path.exists(img_path):
            shutil.copy(img_path, f'{dataset_dir}/images/{img_name}')
            # Convert and save label
            convert_xml_to_yolo(xml_path, f'{dataset_dir}/labels')

In [6]:
# Split dataset (80% train, 20% validation)
splitfolders.ratio(
    dataset_dir,
    output="/kaggle/working/split_dataset",
    seed=42,
    ratio=(0.8, 0.2),
    group_prefix=None,
    move=False
)

Copying files: 232 files [00:00, 5934.13 files/s]


In [10]:
# Create dataset.yaml
yaml_content = """
path: /kaggle/working/split_dataset
train: train/images
val: val/images
names:
  0: Horse
  1: Serpent
  2: Bird
  3: Boar
  4: Monkey
  5: Ox
  6: Ram
  7: Tiger
  8: Dog
  9: Dragon
  10: Hare
  11: Rat
"""

with open('/kaggle/working/dataset.yaml', 'w') as f:
    f.write(yaml_content.strip())

In [8]:
# Initialize YOLOv8 model (using larger model for better multi-class performance)
model = YOLO('yolov8m.pt')  # Using medium version for better accuracy

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:02<00:00, 24.5MB/s]


In [12]:
# Train the model with adjusted parameters for multi-class
results = model.train(
    data='/kaggle/working/dataset.yaml',
    epochs=10,  # Increased epochs for better convergence
    batch=16,
    imgsz=640,
    device=0,
    project='/kaggle/working/results',
    name='naruto_signs',
    optimizer='AdamW',  # Better for multi-class
    lr0=0.001,
    patience=20,  # Increased patience
    weight_decay=0.0005,
    hsv_h=0.015,  # Color augmentation
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10,  # Rotation augmentation
    flipud=0.5,  # Vertical flip
    fliplr=0.5   # Horizontal flip
)

Ultralytics 8.3.118 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/kaggle/working/dataset.yaml, epochs=10, time=None, patience=20, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=/kaggle/working/results, name=naruto_signs3, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels

[34m[1mtrain: [0mScanning /kaggle/working/split_dataset/train/labels.cache... 92 images, 0 backgrounds, 0 corrupt: 100%|██████████| 92/92 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1159.5±356.4 MB/s, size: 81.2 KB)


[34m[1mval: [0mScanning /kaggle/working/split_dataset/val/labels.cache... 24 images, 0 backgrounds, 0 corrupt: 100%|██████████| 24/24 [00:00<?, ?it/s]


Plotting labels to /kaggle/working/results/naruto_signs3/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1m/kaggle/working/results/naruto_signs3[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      12.1G      1.343      2.753      1.698         12        640: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.26it/s]

                   all         24         24      0.354      0.636      0.358      0.164






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10        12G      1.316      2.626      1.616         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.45it/s]

                   all         24         24      0.576      0.379      0.413      0.212






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      12.2G      1.446       2.05      1.713         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.52it/s]

                   all         24         24      0.589      0.459        0.5      0.295






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      12.5G      1.292      1.655      1.522         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.54it/s]

                   all         24         24      0.574      0.591      0.617      0.418






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      12.5G      1.167      1.416      1.475         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.54it/s]

                   all         24         24      0.491       0.58      0.591      0.396






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      12.5G      1.137      1.433       1.42         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.52it/s]

                   all         24         24      0.775      0.536      0.625      0.363






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      12.5G        1.1      1.006      1.303         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.57it/s]

                   all         24         24      0.552      0.455      0.529      0.299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      12.5G      1.141      1.154      1.403         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.55it/s]

                   all         24         24      0.751      0.781      0.974      0.678






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      12.5G      1.098     0.9321      1.353         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.56it/s]

                   all         24         24       0.83      0.868      0.995      0.716






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      12.2G     0.9681     0.9012      1.281         12        640: 100%|██████████| 6/6 [00:03<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.56it/s]

                   all         24         24      0.848      0.876      0.995      0.724






10 epochs completed in 0.015 hours.
Optimizer stripped from /kaggle/working/results/naruto_signs3/weights/last.pt, 52.0MB
Optimizer stripped from /kaggle/working/results/naruto_signs3/weights/best.pt, 52.0MB

Validating /kaggle/working/results/naruto_signs3/weights/best.pt...
Ultralytics 8.3.118 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
Model summary (fused): 92 layers, 25,846,708 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.32it/s]


                   all         24         24      0.849      0.876      0.995      0.724
                 Horse          1          1      0.763          1      0.995      0.895
               Serpent          2          2          1          0      0.995      0.483
                  Bird          2          2      0.548          1      0.995      0.821
                  Boar          4          4      0.932          1      0.995      0.555
                Monkey          1          1      0.895          1      0.995      0.796
                    Ox          2          2          1      0.641      0.995      0.799
                   Ram          3          3      0.488          1      0.995      0.553
                 Tiger          3          3      0.955          1      0.995      0.713
                   Dog          2          2      0.897          1      0.995      0.722
                Dragon          3          3      0.953          1      0.995      0.831
                   Ra

  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 0.1ms preprocess, 8.8ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1m/kaggle/working/results/naruto_signs3[0m


In [14]:
# Export best model
best_model = '/kaggle/working/results/naruto_signs3/weights/best.pt'
shutil.copy(best_model, '/kaggle/working/best_naruto_signs.pt')

# Create downloadable link
FileLink('/kaggle/working/best_naruto_signs.pt')

In [16]:
# Verification test with class labels
test_model = YOLO('/kaggle/working/best_naruto_signs.pt')

# Get class names from YAML
with open('/kaggle/working/dataset.yaml') as f:
    data = yaml.safe_load(f)
    class_names = data['names']

results = test_model.predict(
    source='/kaggle/working/split_dataset/val/images',
    save=True,
    conf=0.5,
    line_thickness=2,
    show_labels=True,
    show_conf=True
)

# Display class distribution in validation set
print("\nClass distribution in validation set:")
for i, name in class_names.items():
    count = len([r for r in results if int(r.boxes.cls[0]) == int(i)])
    print(f"{name}: {count} detections")


image 1/24 /kaggle/working/split_dataset/val/images/Bird.20fa2833-da6c-11ec-a6e5-9cfce8f20ad4.jpg: 480x640 1 Bird, 42.6ms
image 2/24 /kaggle/working/split_dataset/val/images/Bird.23fac499-da6c-11ec-9117-9cfce8f20ad4.jpg: 480x640 1 Bird, 13.9ms
image 3/24 /kaggle/working/split_dataset/val/images/Boar.3ca7088d-dff3-11ec-ae62-9cfce8f20ad4.jpg: 480x640 1 Boar, 13.8ms
image 4/24 /kaggle/working/split_dataset/val/images/Boar.3f14da29-dff3-11ec-a1ed-9cfce8f20ad4.jpg: 480x640 1 Boar, 1 Tiger, 13.9ms
image 5/24 /kaggle/working/split_dataset/val/images/Boar.404b946f-dff3-11ec-a9dd-9cfce8f20ad4.jpg: 480x640 1 Boar, 1 Tiger, 13.9ms
image 6/24 /kaggle/working/split_dataset/val/images/Boar.43f0a86b-dff3-11ec-af36-9cfce8f20ad4.jpg: 480x640 1 Boar, 13.6ms
image 7/24 /kaggle/working/split_dataset/val/images/Dog.295189b0-dff6-11ec-a1a6-9cfce8f20ad4.jpg: 480x640 1 Dog, 13.5ms
image 8/24 /kaggle/working/split_dataset/val/images/Dog.2a8792fc-dff6-11ec-aaa2-9cfce8f20ad4.jpg: 480x640 1 Dog, 13.5ms
image 9/2