In [6]:
import os
import shutil
import random
import numpy as np
import torch
from pathlib import Path
from sklearn.model_selection import train_test_split
from torchvision import  transforms
from ultralytics import YOLO
import matplotlib.pyplot as plt
import pandas as pd
from glob import glob
from PIL import Image
import yaml
import argparse

In [23]:
model = YOLO("yolov8n.pt")  

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 1.3MB/s 4.9s 4.9s<0.4s7s8s


In [24]:
def split_dataset(data_path, train_pct=0.8):
    """
    Splits images and labels from a source directory into train/validation sets.
    
    Args:
        data_path (str): Path to folder containing 'images' and 'labels' subfolders.
        train_pct (float): Ratio of data to be used for training (0.01 to 0.99).
    """
    # 1. Validation Logic
    if not os.path.isdir(data_path):
        print(f"Error: {data_path} is not a valid directory.")
        return

    if not (0.01 <= train_pct <= 0.99):
        print("Error: train_pct must be between 0.01 and 0.99.")
        return

    # 2. Setup Paths
    input_image_path = os.path.join(data_path, 'images')
    input_label_path = os.path.join(data_path, 'labels')
    
    cwd = os.getcwd()
    dirs = {
        'train_img': os.path.join(cwd, 'data/train/images'),
        'train_txt': os.path.join(cwd, 'data/train/labels'),
        'val_img':   os.path.join(cwd, 'data/validation/images'),
        'val_txt':   os.path.join(cwd, 'data/validation/labels')
    }

    # 3. Create Folders
    for path in dirs.values():
        os.makedirs(path, exist_ok=True)

    # 4. Gather Files
    # Using list() to ensure we can remove items during the loop
    img_file_list = list(Path(input_image_path).rglob('*'))
    img_file_list = [f for f in img_file_list if f.is_file()] # Ensure no directories
    
    file_num = len(img_file_list)
    train_num = int(file_num * train_pct)
    
    print(f"Total images: {file_num}")
    print(f"Splitting: {train_num} train | {file_num - train_num} validation")

    # 5. Process Split
    # We shuffle once and then slice to avoid using random.choice + remove in a loop (faster)
    random.shuffle(img_file_list)
    
    for i, img_path in enumerate(img_file_list):
        # Determine destination based on current index
        if i < train_num:
            dest_img, dest_txt = dirs['train_img'], dirs['train_txt']
        else:
            dest_img, dest_txt = dirs['val_img'], dirs['val_txt']

        # Setup filenames
        img_fn = img_path.name
        txt_fn = img_path.stem + '.txt'
        txt_path = os.path.join(input_label_path, txt_fn)

        # Copy Image
        shutil.copy(img_path, os.path.join(dest_img, img_fn))
        
        # Copy Label (if exists)
        if os.path.exists(txt_path):
            shutil.copy(txt_path, os.path.join(dest_txt, txt_fn))

    print("Dataset split complete.")


In [11]:

split_dataset("Label_data")

Total images: 77
Splitting: 61 train | 16 validation
Dataset split complete.


In [25]:
def create_data_yaml(path_to_classes_txt, path_to_data_yaml):
  if not os.path.exists(path_to_classes_txt):
    print(f'classes.txt file not found! Please create a classes.txt labelmap and move it to {path_to_classes_txt}')
    return
  with open(path_to_classes_txt, 'r') as f:
    classes = []
    for line in f.readlines():
      if len(line.strip()) == 0: continue
      classes.append(line.strip())
  number_of_classes = len(classes)

  # Create data dictionary
  data = {
      'path': 'data',
      'train': 'train/images',
      'val': 'validation/images',
      'nc': number_of_classes,
      'names': classes
  }

  # Write data to YAML file
  with open(path_to_data_yaml, 'w') as f:
    yaml.dump(data, f, sort_keys=False)
  print(f'Created config file at {path_to_data_yaml}')

  return

# Define path to classes.txt and run function
path_to_classes_txt = 'Label_data/classes.txt'
path_to_data_yaml = 'data.yaml'

create_data_yaml(path_to_classes_txt, path_to_data_yaml)


Created config file at data.yaml


In [26]:
results = model.train(
    data='data.yaml',
    epochs=50,
    imgsz=224,
    patience=10,
    device=0 if torch.cuda.is_available() else 'cpu',
    workers=2,
    weight_decay=0.005,
    save=True,
    verbose=True,
)

Ultralytics 8.3.240  Python-3.10.19 torch-2.9.1+cpu CPU (12th Gen Intel Core i5-12450H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=10, perspective=0.0, plots=True, pose=12.0

In [None]:
model=YOLO(r'runs\detect\train\weights\best.pt')

In [35]:
df=pd.read_csv(r'runs\detect\train\results.csv')
df.head()

Unnamed: 0,epoch,time,train/box_loss,train/cls_loss,train/dfl_loss,metrics/precision(B),metrics/recall(B),metrics/mAP50(B),metrics/mAP50-95(B),val/box_loss,val/cls_loss,val/dfl_loss,lr/pg0,lr/pg1,lr/pg2
0,1,10.2913,1.39382,3.46988,1.36725,0.01084,1.0,0.14739,0.10179,0.54724,3.31793,0.96287,5e-05,5e-05,5e-05
1,2,13.9701,0.95433,3.24497,1.15683,0.00909,1.0,0.56127,0.4065,0.8261,2.99463,0.93178,0.000114,0.000114,0.000114
2,3,26.0865,0.71763,2.85956,1.02757,0.00798,1.0,0.67244,0.45457,0.84129,2.6817,0.84365,0.000176,0.000176,0.000176
3,4,37.5081,0.67593,2.16536,0.95206,0.00728,1.0,0.71334,0.52245,0.88497,2.37895,0.86233,0.000235,0.000235,0.000235
4,5,41.1901,0.65866,1.69944,0.93933,0.00721,1.0,0.68062,0.55252,0.75573,2.3657,0.85485,0.000292,0.000292,0.000292
