In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
# Colab Cell 1 — adjust this path to where your dataset lives on Drive
DRIVE_PATH = "/content/drive/MyDrive/archive"   # folder
LOCAL_ROOT = "/content/urban_yolo"

import os, shutil
print("Drive path:", DRIVE_PATH)
print("Local root:", LOCAL_ROOT)

# If local folder already exists, remove it
if os.path.exists(LOCAL_ROOT):
    print("Local folder exists — removing to ensure clean copy")
    shutil.rmtree(LOCAL_ROOT)

# Decide whether it's a zip or a folder in Drive
if os.path.isfile(DRIVE_PATH) and DRIVE_PATH.lower().endswith((".zip", ".tar.gz", ".tgz")):
    print("Detected archive file. Copying and will unzip...")
    shutil.copy(DRIVE_PATH, "/content/")
    archive_name = os.path.basename(DRIVE_PATH)
    print("Copied archive:", archive_name)
    if archive_name.endswith(".zip"):
        !unzip -q /content/{archive_name} -d {LOCAL_ROOT}
    else:
        !tar -xzf /content/{archive_name} -C {LOCAL_ROOT}
    print("Unpacked archive to", LOCAL_ROOT)
else:
    print("Assuming DRIVE_PATH is a folder. Copying folder contents...")
    src = DRIVE_PATH
    if os.path.exists(src):
        shutil.copytree(src, LOCAL_ROOT)   # LOCAL_ROOT doesn’t exist anymore
        print("Copied folder to", LOCAL_ROOT)
    else:
        raise FileNotFoundError(f"Drive path not found: {src}")

# final listing
print("\nTop-level content:")
!ls -la {LOCAL_ROOT} | head -n 40


Drive path: /content/drive/MyDrive/archive
Local root: /content/urban_yolo
Local folder exists — removing to ensure clean copy
Assuming DRIVE_PATH is a folder. Copying folder contents...
Copied folder to /content/urban_yolo

Top-level content:
total 48
drwx------ 11 root root 4096 Sep 17 21:40 .
drwxr-xr-x  1 root root 4096 Sep 18 01:13 ..
-rw-------  1 root root  510 Sep 17 20:24 config.yaml
drwx------  3 root root 4096 Sep 17 21:40 Damaged concrete structures
drwx------  3 root root 4096 Sep 17 21:40 DamagedElectricalPoles
drwx------  3 root root 4096 Sep 17 21:40 DamagedRoadSigns
drwx------  3 root root 4096 Sep 17 21:40 DeadAnimalsPollution
drwx------  3 root root 4096 Sep 17 21:40 FallenTrees
drwx------  3 root root 4096 Sep 17 21:40 Garbage
drwx------  3 root root 4096 Sep 17 21:40 Graffitti
drwx------  3 root root 4096 Sep 17 21:40 IllegalParking
drwx------  3 root root 4096 Sep 17 21:40 Potholes and RoadCracks


In [None]:
# Colab cell 1 — flatten folders and make consistent labels
import os, shutil, glob, re, sys
ROOT = "/content/urban_yolo"
IMAGES_DIR = os.path.join(ROOT, "images")
LABELS_DIR = os.path.join(ROOT, "labels")
os.makedirs(IMAGES_DIR, exist_ok=True)
os.makedirs(LABELS_DIR, exist_ok=True)

# detect class folders (exclude images/ labels/ urban.yaml config etc.)
exclude = {"images","labels"}
all_entries = sorted([d for d in os.listdir(ROOT) if os.path.isdir(os.path.join(ROOT,d)) and d not in exclude])
print("Detected class folders (will be used as class names):")
for i,name in enumerate(all_entries):
    print(f"  {i:02d}: {name}")

# create mapping folder_name -> id (deterministic by sorted order)
class_names = all_entries.copy()
cls_to_id = {name: i for i,name in enumerate(class_names)}
print("\nMapping (class -> id):")
print(cls_to_id)

# Helper to rewrite label files: assumes each label line: class x_center y_center w h
def remap_label_file(src_lbl_path, dst_lbl_path, new_class_id):
    try:
        with open(src_lbl_path, "r") as f:
            lines = [l.strip() for l in f if l.strip()]
    except FileNotFoundError:
        lines = []
    out_lines = []
    for line in lines:
        parts = line.split()
        if len(parts) >= 5:
            # replace class id only, keep coords
            out = " ".join([str(new_class_id)] + parts[1:5])
            out_lines.append(out)
        else:
            # malformed line: keep as-is but warn
            out_lines.append(line)
    with open(dst_lbl_path, "w") as f:
        for L in out_lines:
            f.write(L + "\n")

# iterate folders and copy images + labels into flat structure
copied_images = 0
copied_labels = 0
no_label_count = 0
for cls in class_names:
    folder = os.path.join(ROOT, cls)
    files = sorted(os.listdir(folder))
    for fname in files:
        if fname.lower().endswith((".jpg",".jpeg",".png")):
            src_img = os.path.join(folder, fname)
            dst_img = os.path.join(IMAGES_DIR, fname)
            # ensure unique destination filename (if duplicates exist, prefix with class)
            if os.path.exists(dst_img):
                base, ext = os.path.splitext(fname)
                dst_img = os.path.join(IMAGES_DIR, f"{cls.replace(' ','')}{base}{ext}")
            shutil.copy2(src_img, dst_img)
            copied_images += 1

            # label handling: look for same-basename .txt in same class folder
            base = os.path.splitext(fname)[0]
            src_lbl = os.path.join(folder, base + ".txt")
            dst_lbl = os.path.join(LABELS_DIR, os.path.splitext(os.path.basename(dst_img))[0] + ".txt")
            if os.path.exists(src_lbl):
                remap_label_file(src_lbl, dst_lbl, cls_to_id[cls])
                copied_labels += 1
            else:
                # create empty label file so YOLO recognizes no objects in that image
                open(dst_lbl, "w").close()
                no_label_count += 1

print(f"\nDone. Images copied: {copied_images}, labels present and remapped: {copied_labels}, images with NO label (empty .txt): {no_label_count}")
print("Images dir:",IMAGES_DIR)
print("Labels dir:",LABELS_DIR)

Detected class folders (will be used as class names):
  00: Damaged concrete structures
  01: DamagedElectricalPoles
  02: DamagedRoadSigns
  03: DeadAnimalsPollution
  04: FallenTrees
  05: Garbage
  06: Graffitti
  07: IllegalParking
  08: Potholes and RoadCracks

Mapping (class -> id):
{'Damaged concrete structures': 0, 'DamagedElectricalPoles': 1, 'DamagedRoadSigns': 2, 'DeadAnimalsPollution': 3, 'FallenTrees': 4, 'Garbage': 5, 'Graffitti': 6, 'IllegalParking': 7, 'Potholes and RoadCracks': 8}

Done. Images copied: 0, labels present and remapped: 0, images with NO label (empty .txt): 0
Images dir: /content/urban_yolo/images
Labels dir: /content/urban_yolo/labels


In [None]:
# Colab cell 2 — create urban.yaml from class_names
import yaml, os
ROOT = "/content/urban_yolo"
yaml_path = os.path.join(ROOT, "urban.yaml")

# build names dict with numeric keys (0..n-1)
names = {i: name.replace(" ", "_") for i,name in enumerate(class_names)}
data_yaml = {
    "path": ROOT,
    "train": "images/train" if os.path.isdir(os.path.join(ROOT,"images","train")) else "images",
    "val": "images/val" if os.path.isdir(os.path.join(ROOT,"images","val")) else "images",
    "test": "images/test" if os.path.isdir(os.path.join(ROOT,"images","test")) else "images",
    "nc": len(names),
    "names": names
}
with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f)
print("Created", yaml_path)
print("nc:", data_yaml["nc"])
print("names:", data_yaml["names"])

Created /content/urban_yolo/urban.yaml
nc: 9
names: {0: 'Damaged_concrete_structures', 1: 'DamagedElectricalPoles', 2: 'DamagedRoadSigns', 3: 'DeadAnimalsPollution', 4: 'FallenTrees', 5: 'Garbage', 6: 'Graffitti', 7: 'IllegalParking', 8: 'Potholes_and_RoadCracks'}


In [None]:
# Colab cell 3 — split images/ + labels/ into train/val/test (moves files)
import os, random, shutil, glob
ROOT = "/content/urban_yolo"
IMG = os.path.join(ROOT, "images")
LBL = os.path.join(ROOT, "labels")

# Only split if images/train doesn't already exist
if not os.path.isdir(os.path.join(IMG, "train")):
    imgs = [f for f in os.listdir(IMG) if f.lower().endswith(('.jpg','.jpeg','.png'))]
    print("Total flat images to split:", len(imgs))
    random.seed(42)
    random.shuffle(imgs)
    n = len(imgs)
    t1 = int(0.8*n); t2 = int(0.9*n)
    splits = {"train": imgs[:t1], "val": imgs[t1:t2], "test": imgs[t2:]}
    for split, files in splits.items():
        os.makedirs(os.path.join(IMG, split), exist_ok=True)
        os.makedirs(os.path.join(LBL, split), exist_ok=True)
        for fname in files:
            shutil.move(os.path.join(IMG, fname), os.path.join(IMG, split, fname))
            lbl = os.path.splitext(fname)[0] + ".txt"
            src_lbl = os.path.join(LBL, lbl)
            dst_lbl = os.path.join(LBL, split, lbl)
            if os.path.exists(src_lbl):
                shutil.move(src_lbl, dst_lbl)
            else:
                open(dst_lbl,"w").close()
    print("Split complete. Sizes:", {k: len(v) for k,v in splits.items()})
else:
    print("images/train already exists — skipping split.")

Total flat images to split: 0
Split complete. Sizes: {'train': 0, 'val': 0, 'test': 0}


In [None]:
# Colab cell 4 — count & inspect
import os, glob, json
ROOT = "/content/urban_yolo"
for part in ["train","val","test"]:
    imgp = os.path.join(ROOT,"images",part) if os.path.isdir(os.path.join(ROOT,"images",part)) else os.path.join(ROOT,"images")
    lblp = os.path.join(ROOT,"labels",part) if os.path.isdir(os.path.join(ROOT,"labels",part)) else os.path.join(ROOT,"labels")
    imgs = glob.glob(os.path.join(imgp,"."))
    lbls = glob.glob(os.path.join(lblp,"*.txt"))
    print(f"{part.upper():5}: images={len(imgs):5}, labels={len(lbls):5}, img-folder={imgp}")
# print a few sample label files
import random
sample_lbls = glob.glob(os.path.join(ROOT,"labels","","*.txt"), recursive=True)[:10]
print("\nSample label files (first 10):")
for s in sample_lbls[:10]:
    print("==", s)
    print(open(s).read().strip()[:400])
    print("----")
print("\nIf label lines look like: '2 0.5123 0.412 0.23 0.15' that's good (class x_center y_center w h normalized).")

TRAIN: images=    1, labels=    0, img-folder=/content/urban_yolo/images/train
VAL  : images=    1, labels=    0, img-folder=/content/urban_yolo/images/val
TEST : images=    1, labels=    0, img-folder=/content/urban_yolo/images/test

Sample label files (first 10):

If label lines look like: '2 0.5123 0.412 0.23 0.15' that's good (class x_center y_center w h normalized).


In [None]:
# Cell: delete top-level folders with zero images (run in Colab)
import os, shutil

ROOT = "/content/urban_yolo"
img_exts = (".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp")
removed = []
kept = []

for entry in sorted(os.listdir(ROOT)):
    path = os.path.join(ROOT, entry)
    if not os.path.isdir(path):
        continue
    # count images recursively
    img_count = 0
    for r, _, files in os.walk(path):
        for f in files:
            if f.lower().endswith(img_exts):
                img_count += 1
    if img_count == 0:
        try:
            shutil.rmtree(path)
            removed.append(entry)
            print(f"Removed (no images): {entry}")
        except Exception as e:
            print(f"Failed to remove {entry}: {e}")
    else:
        kept.append((entry, img_count))
        print(f"Kept: {entry}  — images: {img_count}")

print("\nSummary:")
print("Removed:", removed)
print("Kept:", kept)

# final top-level listing
print("\nFinal top-level content:")
for name in sorted(os.listdir(ROOT)):
    p = os.path.join(ROOT, name)
    flag = "DIR" if os.path.isdir(p) else "FILE"
    print(f"{flag}\t{name}")


Kept: Damaged concrete structures  — images: 9315
Kept: DamagedElectricalPoles  — images: 8112
Kept: Garbage  — images: 300
Kept: Potholes and RoadCracks  — images: 104

Summary:
Removed: []
Kept: [('Damaged concrete structures', 9315), ('DamagedElectricalPoles', 8112), ('Garbage', 300), ('Potholes and RoadCracks', 104)]

Final top-level content:
DIR	Damaged concrete structures
DIR	DamagedElectricalPoles
DIR	Garbage
DIR	Potholes and RoadCracks
FILE	config.yaml
FILE	urban.yaml


In [None]:
# Cell 1 — inspect structure & annotations
import os, glob
ROOT = "/content/urban_yolo"
img_exts = (".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp")
ann_exts = (".txt",".xml",".json")

summary = {}
for cls in sorted(os.listdir(ROOT)):
    p = os.path.join(ROOT, cls)
    if not os.path.isdir(p):
        continue
    summary[cls] = {"images":0, "annotations":0, "has_splits": False, "split_names": []}
    for root, dirs, files in os.walk(p):
        for f in files:
            lf = f.lower()
            if lf.endswith(img_exts):
                summary[cls]["images"] += 1
            if lf.endswith(ann_exts):
                summary[cls]["annotations"] += 1
        # detect presence of train/val/test folders directly under class
        rel = os.path.relpath(root, p)
        if rel in ("train","val","test"):
            if "has_splits" in summary[cls]:
                summary[cls]["has_splits"] = True
                summary[cls]["split_names"].append(rel)

# Print concise report
for cls, info in summary.items():
    print(f"{cls:<30} images: {info['images']:6}   anns: {info['annotations']:6}   splits: {info['has_splits']} {info['split_names']}")


Damaged concrete structures    images:   9315   anns:   8554   splits: False []
DamagedElectricalPoles         images:   8112   anns:   8112   splits: False []
Garbage                        images:    300   anns:      0   splits: False []
Potholes and RoadCracks        images:    104   anns:      0   splits: False []


In [None]:
# Cell 2 — reorganize dataset automatically
import os, glob, shutil, random
random.seed(42)

SRC = "/content/urban_yolo"
YOLO_DST = "/content/urban_yolo_prepared"
IMGCLS_DST = "/content/urban_yolo_imageclass"
img_exts = (".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp")
ann_exts = (".txt",".xml",".json")

# helper to ensure dir
def mk(d): os.makedirs(d, exist_ok=True)

# detect if ANY annotation files exist anywhere
any_anns = False
for root, _, files in os.walk(SRC):
    for f in files:
        if f.lower().endswith(ann_exts):
            any_anns = True
            break
    if any_anns: break

print("Annotations found anywhere?:", any_anns)

if any_anns:
    print("Preparing YOLO-style dataset at:", YOLO_DST)
    # create structure
    for split in ("train","val","test"):
        mk(os.path.join(YOLO_DST, "images", split))
        mk(os.path.join(YOLO_DST, "labels", split))
    # For each class, try to find images; if pre-split exists (train/val/test inside class) keep them, else split 80/10/10
    for cls in sorted(os.listdir(SRC)):
        cls_path = os.path.join(SRC, cls)
        if not os.path.isdir(cls_path): continue
        # check if class contains train/val/test
        splits_present = [d for d in ("train","val","test") if os.path.isdir(os.path.join(cls_path, d))]
        if splits_present:
            # iterate through each split folder and copy images+anns
            for sp in splits_present:
                # likely nested under images/ subfolder
                img_dirs = []
                candidate = os.path.join(cls_path, sp, "images")
                if os.path.isdir(candidate):
                    img_dirs.append(candidate)
                else:
                    img_dirs.append(os.path.join(cls_path, sp))
                for idir in img_dirs:
                    if not os.path.isdir(idir): continue
                    for f in os.listdir(idir):
                        if f.lower().endswith(img_exts):
                            src_img = os.path.join(idir, f)
                            shutil.copy(src_img, os.path.join(YOLO_DST, "images", sp, f))
                            # try copy matching annotations by basename
                            base = os.path.splitext(f)[0]
                            for aext in ann_exts:
                                src_ann = os.path.join(os.path.dirname(idir), "labels", base + aext)
                                if os.path.exists(src_ann):
                                    shutil.copy(src_ann, os.path.join(YOLO_DST, "labels", sp, base + aext))
                                else:
                                    # check same folder as images
                                    src_ann2 = os.path.join(idir, base + aext)
                                    if os.path.exists(src_ann2):
                                        shutil.copy(src_ann2, os.path.join(YOLO_DST, "labels", sp, base + aext))
        else:
            # no pre-splits: gather all images and split 80/10/10
            imgs = []
            for root, _, files in os.walk(cls_path):
                for f in files:
                    if f.lower().endswith(img_exts):
                        imgs.append(os.path.join(root, f))
            if not imgs:
                continue
            random.shuffle(imgs)
            n = len(imgs)
            n_train = int(n*0.8)
            n_val = int(n*0.1)
            train_imgs = imgs[:n_train]
            val_imgs = imgs[n_train:n_train+n_val]
            test_imgs = imgs[n_train+n_val:]
            for sp, group in [("train", train_imgs), ("val", val_imgs), ("test", test_imgs)]:
                for src_img in group:
                    fname = os.path.basename(src_img)
                    shutil.copy(src_img, os.path.join(YOLO_DST, "images", sp, fname))
                    base = os.path.splitext(fname)[0]
                    # try to find annotation file near the image
                    found_ann = False
                    for aext in ann_exts:
                        cand = os.path.join(os.path.dirname(src_img), base + aext)
                        if os.path.exists(cand):
                            shutil.copy(cand, os.path.join(YOLO_DST, "labels", sp, base + aext))
                            found_ann = True
                            break
    print("YOLO-style dataset prepared. Preview counts:")
    for sp in ("train","val","test"):
        imgs = len(list(glob.glob(os.path.join(YOLO_DST,"images",sp,"*"))))
        anns = len(list(glob.glob(os.path.join(YOLO_DST,"labels",sp,"*"))))
        print(f"{sp}: images={imgs}, annotations={anns}")

else:
    print("No annotations found — preparing ImageFolder classification dataset at:", IMGCLS_DST)
    for split in ("train","val","test"):
        for cls in sorted(os.listdir(SRC)):
            cls_path = os.path.join(SRC, cls)
            if not os.path.isdir(cls_path): continue
            # gather images recursively
            imgs = []
            for root, _, files in os.walk(cls_path):
                for f in files:
                    if f.lower().endswith(img_exts):
                        imgs.append(os.path.join(root, f))
            if not imgs:
                continue
            random.shuffle(imgs)
            n = len(imgs)
            n_train = int(n*0.8)
            n_val = int(n*0.1)
            train_imgs = imgs[:n_train]
            val_imgs = imgs[n_train:n_train+n_val]
            test_imgs = imgs[n_train+n_val:]
            groups = {"train":train_imgs,"val":val_imgs,"test":test_imgs}
            out_dir = os.path.join(IMGCLS_DST, split, cls)
            os.makedirs(out_dir, exist_ok=True)
            for sp, group in groups.items():
                out_dir_sp = os.path.join(IMGCLS_DST, sp, cls)
                os.makedirs(out_dir_sp, exist_ok=True)
                for src_img in group:
                    shutil.copy(src_img, os.path.join(out_dir_sp, os.path.basename(src_img)))
    # print counts
    for split in ("train","val","test"):
        tot = sum(len(files) for _,_,files in os.walk(os.path.join(IMGCLS_DST,split)))
        print(f"{split}: total images = {tot}")

print("Done. Check the new folder(s) and tell me which format you prefer to train with.")


Annotations found anywhere?: True
Preparing YOLO-style dataset at: /content/urban_yolo_prepared
YOLO-style dataset prepared. Preview counts:
train: images=14264, annotations=0
val: images=1782, annotations=0
test: images=1785, annotations=0
Done. Check the new folder(s) and tell me which format you prefer to train with.


In [None]:
# Cell 3 — generate dataset.yaml for YOLO (run if /content/urban_yolo_prepared exists)
import os, yaml, glob
PREP = "/content/urban_yolo_prepared"
if not os.path.isdir(PREP):
    print("YOLO prepared folder not found:", PREP)
else:
    # gather classes by scanning train images parent class names — we can't infer class names directly from images,
    # so we'll read class names from the original SRC top-level folders that were kept (best-effort)
    SRC = "/content/urban_yolo"
    classes = [d for d in sorted(os.listdir(SRC)) if os.path.isdir(os.path.join(SRC,d))]
    nc = len(classes)
    data = {
        "train": os.path.join(PREP, "images", "train"),
        "val":   os.path.join(PREP, "images", "val"),
        "test":  os.path.join(PREP, "images", "test"),
        "nc": nc,
        "names": classes
    }
    yaml_path = "/content/urban_yolo_prepared/dataset.yaml"
    with open(yaml_path, "w") as f:
        yaml.dump(data, f)
    print("Wrote dataset.yaml ->", yaml_path)
    print(data)


Wrote dataset.yaml -> /content/urban_yolo_prepared/dataset.yaml
{'train': '/content/urban_yolo_prepared/images/train', 'val': '/content/urban_yolo_prepared/images/val', 'test': '/content/urban_yolo_prepared/images/test', 'nc': 4, 'names': ['Damaged concrete structures', 'DamagedElectricalPoles', 'Garbage', 'Potholes and RoadCracks']}


In [None]:
# run in a Colab cell (bash)
!pip install -q ultralytics
!yolo detect train data=/content/urban_yolo_prepared/dataset.yaml model=yolov8n.pt epochs=25 imgsz=640


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 66.1MB/s 0.1s
Ultralytics 8.3.201 🚀 Python-3.12.11 torch-2.8.0+cu126 CPU (AMD EPYC 7B12)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/urban_yolo_prepared/dataset.yaml, degrees=0.0, deter