In [5]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install the ultralytics library for YOLOv8
!pip install ultralytics -q

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import json
import os
from tqdm import tqdm

# Path to the dataset folder in Google Drive
# TODO: Update this path to the actual location of your dataset in Google Drive
dataset_base_path = '/content/drive/MyDrive/dataset'

# --- 1. Load Class Names from meta.json ---
meta_path = os.path.join(dataset_base_path, 'meta.json')
with open(meta_path, 'r') as f:
    meta_data = json.load(f)
class_names = [cls['title'] for cls in meta_data['classes']]
class_map = {name: i for i, name in enumerate(class_names)}

print(f"✅ Found classes: {class_map}")

# --- 2. Conversion Function ---
def convert_to_yolo(data, img_width, img_height, class_map):
    yolo_annotations = []
    for obj in data['objects']:
        class_title = obj['classTitle']
        if class_title not in class_map:
            continue

        class_id = class_map[class_title]

        # Extract coordinates from "points" -> "exterior"
        # It's a bounding box: [[x1, y1], [x2, y2]]
        x1, y1 = obj['points']['exterior'][0]
        x2, y2 = obj['points']['exterior'][1]

        # Calculate bounding box center, width, and height
        box_width = x2 - x1
        box_height = y2 - y1
        x_center = x1 + box_width / 2
        y_center = y1 + box_height / 2

        # Normalize coordinates
        x_center_norm = x_center / img_width
        y_center_norm = y_center / img_height
        width_norm = box_width / img_width
        height_norm = box_height / img_height

        yolo_annotations.append(f"{class_id} {x_center_norm} {y_center_norm} {width_norm} {height_norm}")
    return "\n".join(yolo_annotations)

# --- 3. Process all dataset splits (train, val, test) ---
for split in ['train', 'val', 'test']:
    ann_dir = os.path.join(dataset_base_path, split, 'ann')
    label_dir = os.path.join(dataset_base_path, split, 'labels')
    os.makedirs(label_dir, exist_ok=True)

    print(f"\nProcessing '{split}' split...")
    # Check if the annotation directory exists before listing files
    if not os.path.exists(ann_dir):
        print(f"Warning: Annotation directory not found for split '{split}': {ann_dir}")
        continue # Skip to the next split if the directory doesn't exist

    for ann_file in tqdm(os.listdir(ann_dir)):
        if ann_file.endswith('.json'):
            json_path = os.path.join(ann_dir, ann_file)
            with open(json_path, 'r') as f:
                annotation_data = json.load(f)

            # Get image dimensions
            img_width = annotation_data['size']['width']
            img_height = annotation_data['size']['height']

            # Convert to YOLO format
            yolo_content = convert_to_yolo(annotation_data, img_width, img_height, class_map)

            # Save the new .txt label file
            label_file_name = os.path.splitext(ann_file)[0] + '.txt'
            label_path = os.path.join(label_dir, label_file_name)
            with open(label_path, 'w') as f:
                f.write(yolo_content)

print("\n✅ Conversion to YOLO format complete!")

✅ Found classes: {'broken': 0, 'insulator': 1, 'pollution-flashover': 2}

Processing 'train' split...


100%|██████████| 219/219 [00:04<00:00, 52.33it/s]



Processing 'val' split...


100%|██████████| 30/30 [00:31<00:00,  1.05s/it]



Processing 'test' split...


100%|██████████| 44/44 [01:06<00:00,  1.52s/it]


✅ Conversion to YOLO format complete!





In [7]:
import yaml
import os

# Create the YAML configuration dictionary
data_yaml = {
    'train': os.path.join(dataset_base_path, 'train', 'img'),
    'val': os.path.join(dataset_base_path, 'val', 'img'),
    'test': os.path.join(dataset_base_path, 'test', 'img'),
    'nc': len(class_names),
    'names': class_names
}

# Write the dictionary to a YAML file
with open('/content/insulator_dataset.yaml', 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print("✅ insulator_dataset.yaml created successfully.")
print("\nYAML file content:")
!cat /content/insulator_dataset.yaml

✅ insulator_dataset.yaml created successfully.

YAML file content:
names:
- broken
- insulator
- pollution-flashover
nc: 3
test: /content/drive/MyDrive/dataset/test/img
train: /content/drive/MyDrive/dataset/train/img
val: /content/drive/MyDrive/dataset/val/img


In [None]:
from ultralytics import YOLO

# Load a pretrained YOLOv8 model
model = YOLO('yolov8n.pt')

# Train the model
results = model.train(
    data='/content/insulator_dataset.yaml',
    imgsz=640,
    epochs=10,
    batch=16,
    name='insulator_yolov8'
)

Ultralytics 8.3.169 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/insulator_dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=insulator_yolov82, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=Tru

[34m[1mtrain: [0mScanning /content/drive/MyDrive/dataset/train/img.cache... 0 images, 219 backgrounds, 0 corrupt: 100%|██████████| 219/219 [00:00<?, ?it/s]



[34m[1mtrain: [0mScanning /content/drive/MyDrive/dataset/train/img.cache... 0 images, 219 backgrounds, 0 corrupt: 100%|██████████| 219/219 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.3±0.1 ms, read: 305.4±89.5 MB/s, size: 1399.0 KB)



[34m[1mval: [0mScanning /content/drive/MyDrive/dataset/val/img.cache... 0 images, 30 backgrounds, 0 corrupt: 100%|██████████| 30/30 [00:00<?, ?it/s]

Plotting labels to runs/detect/insulator_yolov82/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/insulator_yolov82[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



       1/10         0G          0      120.7          0          0        640: 100%|██████████| 14/14 [03:33<00:00, 15.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:56<00:00, 56.23s/it]

                   all         30          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G          0        115          0          0        640:   7%|▋         | 1/14 [00:14<03:02, 14.08s/it]

In [None]:
from ultralytics import YOLO

# The path to the best weights saved during training
best_model_path = '/content/runs/detect/insulator_yolov8/weights/best.pt'

# Load the trained model
model = YOLO(best_model_path)

# Run validation on the test set
metrics = model.val(split='test')

print("\nValidation Metrics:")
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"mAP50: {metrics.box.map50:.4f}")

In [None]:
from IPython.display import Image

# Display the results.png file which contains charts for mAP, precision, recall, and loss
Image(filename='/content/runs/detect/insulator_yolov8/results.png', width=800)

In [None]:
from IPython.display import Image

# Display the confusion matrix
Image(filename='/content/runs/detect/insulator_yolov8/confusion_matrix.png', width=600)

In [None]:
from google.colab.patches import cv2_imshow
import cv2

# Path to a test image
test_image_path = '/content/dataset/test/img/des-2d-045-090-p2_1052.png' # You can change this to any image

# Run inference
results = model(test_image_path)

# Plot the results
res_plotted = results[0].plot()

# Show the image with detections
cv2_imshow(res_plotted)