In [2]:
import os, glob, shutil, tqdm
from sklearn.model_selection import train_test_split

# Paths
base_dir = r'C:\Users\aswin\Downloads\Yolo_test_dataset'
img_dir = os.path.join(base_dir, 'TsignDet Test Database', 'test_image')
ann_dir = os.path.join(base_dir, 'TsignDet Test Database Annotation', 'lable')

output_dir = r'C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset'
img_out_dir = os.path.join(output_dir, 'images')
label_out_dir = os.path.join(output_dir, 'labels')

# Step 1: Collect all valid image-label pairs
images = glob.glob(os.path.join(img_dir, '*.jpg'))
pairs = [(img, os.path.join(ann_dir, os.path.basename(img).replace('.jpg', '.txt')))
         for img in images if os.path.exists(os.path.join(ann_dir, os.path.basename(img).replace('.jpg', '.txt')))]

# Step 2: Split into train/val/test
train, temp = train_test_split(pairs, test_size=0.3, random_state=42)
val, test = train_test_split(temp, test_size=1/3, random_state=42)

splits = {'train': train, 'val': val, 'test': test}

# Step 3: Conversion function (polygon to YOLO box)
def convert_polygon_to_yolo(poly_line, img_w=640, img_h=480):
    nums = list(map(int, poly_line.strip().split(',')))
    xs = nums[::2]
    ys = nums[1::2]
    x_min, x_max = min(xs), max(xs)
    y_min, y_max = min(ys), max(ys)

    x_c = (x_min + x_max) / 2 / img_w
    y_c = (y_min + y_max) / 2 / img_h
    w   = (x_max - x_min) / img_w
    h   = (y_max - y_min) / img_h
    return f"0 {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}"

# Step 4: Copy + Convert
for split, data in splits.items():
    img_dst = os.path.join(img_out_dir, split)
    lbl_dst = os.path.join(label_out_dir, split)
    os.makedirs(img_dst, exist_ok=True)
    os.makedirs(lbl_dst, exist_ok=True)

    for img_path, ann_path in tqdm.tqdm(data, desc=f'Preparing {split}'):
        fname = os.path.basename(img_path)
        shutil.copy(img_path, os.path.join(img_dst, fname))

        with open(ann_path, 'r') as f:
            lines = f.readlines()

        yolo_lines = []
        for line in lines:
            if ',' in line:
                try:
                    yolo_lines.append(convert_polygon_to_yolo(line))
                except Exception as e:
                    print(f"⚠️ Skipping bad line in {ann_path}: {line.strip()}")

        with open(os.path.join(lbl_dst, fname.replace('.jpg', '.txt')), 'w') as out_f:
            out_f.write('\n'.join(yolo_lines))

print('✅ Dataset conversion complete for YOLOv8 (1-class detection only).')


Preparing train: 100%|██████████| 426/426 [00:00<00:00, 437.28it/s]
Preparing val: 100%|██████████| 122/122 [00:00<00:00, 454.22it/s]
Preparing test: 100%|██████████| 61/61 [00:00<00:00, 440.65it/s]


✅ Dataset conversion complete for YOLOv8 (1-class detection only).


In [10]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')
model.train(data=r'C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\tsdd.yaml', epochs=10, imgsz=640)

Ultralytics 8.3.161  Python-3.12.3 torch-2.7.1+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\tsdd.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train10, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, 

[34m[1mtrain: [0mScanning C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\labels\train... 1403 images, 468 backgrounds, 44 corrupt: 100%|██████████| 1871/1871 [00:04<00:00, 455.85it/s]

[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd (34).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     3.2086      3.2854      3.4383      3.2854      3.7391      3.2635]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd (36).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.9742      1.6073]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd_1000.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.4719      1.3844]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd_1003.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.7984      1.1323]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd_1011.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     1.




[34m[1mtrain: [0mNew cache created: C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\labels\train.cache
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 432.239.3 MB/s, size: 137.7 KB)


[34m[1mval: [0mScanning C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\labels\val... 122 images, 0 backgrounds, 2 corrupt: 100%|██████████| 122/122 [00:00<00:00, 410.35it/s]

[34m[1mval: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\val\tsd (33).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     3.2008      3.6531]
[34m[1mval: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\val\tsd (35).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.7891      4.0146      3.0109      3.9875      3.2133      4.0583]
[34m[1mval: [0mNew cache created: C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\labels\val.cache





Plotting labels to runs\detect\train10\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train10[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      2.355      4.021      1.591          1        640: 100%|██████████| 115/115 [13:10<00:00,  6.87s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:13<00:00,  3.28s/it]

                   all        120        203      0.379      0.327      0.261     0.0801






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      2.418      2.986      1.601          2        640: 100%|██████████| 115/115 [12:01<00:00,  6.27s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:11<00:00,  2.89s/it]

                   all        120        203      0.327       0.35      0.247     0.0862






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      2.362      2.571        1.6          5        640: 100%|██████████| 115/115 [11:41<00:00,  6.10s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:10<00:00,  2.65s/it]

                   all        120        203      0.447      0.378      0.318      0.109






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      2.341      2.396      1.595          3        640: 100%|██████████| 115/115 [11:41<00:00,  6.10s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:10<00:00,  2.69s/it]

                   all        120        203      0.445       0.41      0.334      0.119






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      2.294      2.185      1.565          4        640: 100%|██████████| 115/115 [11:36<00:00,  6.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:11<00:00,  2.86s/it]

                   all        120        203      0.442      0.399      0.362      0.124






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      2.268      2.156      1.557          5        640: 100%|██████████| 115/115 [11:43<00:00,  6.11s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:10<00:00,  2.71s/it]

                   all        120        203      0.513      0.404      0.384      0.148






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      2.235      2.057      1.538          7        640: 100%|██████████| 115/115 [11:44<00:00,  6.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:10<00:00,  2.69s/it]

                   all        120        203      0.484      0.414      0.388      0.156






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      2.236      1.996      1.504          4        640: 100%|██████████| 115/115 [15:45<00:00,  8.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:19<00:00,  4.97s/it]

                   all        120        203      0.431      0.458      0.414      0.156






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      2.188      1.962      1.478          4        640: 100%|██████████| 115/115 [19:49<00:00, 10.34s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:19<00:00,  4.83s/it]

                   all        120        203      0.459      0.468      0.462      0.173






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      2.188      1.916      1.479          2        640: 100%|██████████| 115/115 [18:32<00:00,  9.67s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:19<00:00,  4.98s/it]

                   all        120        203      0.486      0.452      0.441      0.169






10 epochs completed in 2.336 hours.
Optimizer stripped from runs\detect\train10\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train10\weights\best.pt, 6.2MB

Validating runs\detect\train10\weights\best.pt...
Ultralytics 8.3.161  Python-3.12.3 torch-2.7.1+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:18<00:00,  4.62s/it]


                   all        120        203       0.46      0.468      0.462      0.173
Speed: 3.4ms preprocess, 129.4ms inference, 0.0ms loss, 1.9ms postprocess per image
Results saved to [1mruns\detect\train10[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000001DEB9A03560>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0480

In [16]:
from ultralytics import YOLO

# Load your finished model as the starting point
model = YOLO(r"C:\Users\aswin\jupyter_codes\runs\detect\train10\weights\last.pt")  # or use best.pt

# Start new training from that point
model.train(data=r'C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\tsdd.yaml', epochs=5)  # This means 5 NEW epochs

Ultralytics 8.3.161  Python-3.12.3 torch-2.7.1+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\tsdd.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=C:\Users\aswin\jupyter_codes\runs\detect\train10\weights\last.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train12, nbs=64, nms=False, opset=None, optimize=Fal

[34m[1mtrain: [0mScanning C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\labels\train.cache... 1403 images, 468 backgrounds, 44 corrupt: 100%|██████████| 1871/1871 [00:00<?, ?it/s]

[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd (34).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     3.2086      3.2854      3.4383      3.2854      3.7391      3.2635]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd (36).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.9742      1.6073]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd_1000.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.4719      1.3844]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd_1003.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.7984      1.1323]
[34m[1mtrain: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\train\tsd_1011.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     1.


[34m[1mval: [0mScanning C:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\labels\val.cache... 122 images, 0 backgrounds, 2 corrupt: 100%|██████████| 122/122 [00:00<?, ?it/s]

[34m[1mval: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\val\tsd (33).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     3.2008      3.6531]
[34m[1mval: [0mC:\Users\aswin\jupyter_codes\tsdd_yolo_dataset\images\val\tsd (35).jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     2.7891      4.0146      3.0109      3.9875      3.2133      4.0583]
Plotting labels to runs\detect\train12\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train12[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5         0G      2.269      2.027      1.478          4        640: 100%|██████████| 115/115 [19:39<00:00, 10.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:14<00:00,  3.52s/it]

                   all        120        203      0.503      0.424      0.457       0.17






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5         0G      2.217      1.994      1.465          3        640: 100%|██████████| 115/115 [19:10<00:00, 10.00s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:17<00:00,  4.48s/it]

                   all        120        203      0.393      0.433       0.31      0.106






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5         0G      2.176      1.958      1.499         48        640:   5%|▌         | 6/115 [00:56<16:59,  9.36s/it]


KeyboardInterrupt: 