In [48]:
# Force CPU-only so torch.cuda calls are no-ops
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print("GPU disabled; training will run on CPU.")


GPU disabled; training will run on CPU.


In [49]:
!pip install -q ultralytics pycocotools pyyaml


# Imports

In [50]:
import json, shutil
from pathlib import Path
import yaml

# Configuration

In [52]:
import json, shutil
from pathlib import Path
import yaml

# Your 10 target classes
TARGET_CLASSES = [
    "person","bicycle","car","motorcycle","bus",
    "truck","dog","cat","traffic light","stop sign"
]
TRAIN_COUNT = 1000
VAL_COUNT   = 200

# Paths
COCO_BASE = Path("/kaggle/input/coco-2017-dataset/coco2017")
WORK_DIR  = Path("/kaggle/working/coco_subset")
WORK_DIR.mkdir(exist_ok=True)
print("Working directory:", WORK_DIR)


Working directory: /kaggle/working/coco_subset


# Create Subset Folders

In [53]:
for d in ["annotations","train2017","val2017"]:
    (WORK_DIR/d).mkdir(parents=True, exist_ok=True)
print("Created:", [str(WORK_DIR/d) for d in ["annotations","train2017","val2017"]])


Created: ['/kaggle/working/coco_subset/annotations', '/kaggle/working/coco_subset/train2017', '/kaggle/working/coco_subset/val2017']


# Build & Save Coco Dataset

In [54]:
# Load full annotations
coco_train = json.load(open(COCO_BASE/"annotations"/"instances_train2017.json"))
coco_val   = json.load(open(COCO_BASE/"annotations"/"instances_val2017.json"))

# Map names → COCO IDs
name2id  = {c["name"]:c["id"] for c in coco_train["categories"]}
keep_ids = [name2id[n] for n in TARGET_CLASSES]

def make_subset(coco, count):
    anns    = [a for a in coco["annotations"] if a["category_id"] in keep_ids]
    img_ids = list({a["image_id"] for a in anns})[:count]
    imgs    = [im for im in coco["images"] if im["id"] in img_ids]
    anns    = [a  for a  in anns if a["image_id"] in img_ids]
    cats    = [c  for c  in coco_train["categories"] if c["id"] in keep_ids]
    return {"images":imgs, "annotations":anns, "categories":cats}

sub_train = make_subset(coco_train, TRAIN_COUNT)
sub_val   = make_subset(coco_val,   VAL_COUNT)

# Write subset JSONs
json.dump(sub_train, open(WORK_DIR/"annotations"/"train.json","w"))
json.dump(sub_val,   open(WORK_DIR/"annotations"/"val.json",  "w"))

print("Subset JSONs written:",
      (WORK_DIR/"annotations"/"train.json"),
      (WORK_DIR/"annotations"/"val.json"))


Subset JSONs written: /kaggle/working/coco_subset/annotations/train.json /kaggle/working/coco_subset/annotations/val.json


# Copy all subset images

In [55]:
for split, imgs in [("train2017",sub_train["images"]), ("val2017",sub_val["images"])]:
    src = COCO_BASE/split
    dst = WORK_DIR/split
    for im in imgs:
        shutil.copy(src/im["file_name"], dst/im["file_name"])
print(f"Copied {len(sub_train['images'])} train and {len(sub_val['images'])} val images")


Copied 1000 train and 200 val images


# Create data.yaml for YOLOv8

In [56]:
data_yaml = {
    'train': str(WORK_DIR/"train2017"),
    'val':   str(WORK_DIR/"val2017"),
    'nc':    len(TARGET_CLASSES),
    'names': TARGET_CLASSES
}
with open("data.yaml","w") as f:
    yaml.dump(data_yaml, f)
print("data.yaml:")
print(open("data.yaml").read())


data.yaml:
names:
- person
- bicycle
- car
- motorcycle
- bus
- truck
- dog
- cat
- traffic light
- stop sign
nc: 10
train: /kaggle/working/coco_subset/train2017
val: /kaggle/working/coco_subset/val2017



# Fine-Tune YOLOv8-Seg Nano

In [None]:
from ultralytics import YOLO

# Load COCO-pretrained segmentation nano
model = YOLO("yolov8n-seg.pt")

# Train for 10 epochs, on CPU (device="" means auto-detect, but GPU hidden)
model.train(
    data="data.yaml",
    epochs=10,
    imgsz=640,
    batch=16,
    device="",  # empty → defaults to CPU since CUDA_VISIBLE_DEVICES=""
    name="custom_yolov8n_seg",
    project="runs/train"
)


# Save Best Weights

In [None]:
from pathlib import Path
import shutil

best = Path("runs/train/custom_yolov8n_seg/weights/best.pt")
out  = Path("yolov8n_seg_custom.pt")
shutil.copy(best, out)
print(f"✅ Fine-tuned weights saved to {out}")
