In [None]:
!pip install -U ultralytics

In [None]:
import os
import shutil
from pathlib import Path
import random



INPUT_IMAGES = Path("/kaggle/input/vehicles-images/Images")
INPUT_LABELS = Path("/kaggle/input/labels/labels")

# Output clean filtered structure
OUTPUT_BASE   = Path("/kaggle/working/filtered_dataset")
OUTPUT_IMAGES = OUTPUT_BASE / "images"
OUTPUT_LABELS = OUTPUT_BASE / "labels"



In [None]:
labeled_pairs = []

for txt_path in INPUT_LABELS.glob("*.txt"):
    stem = txt_path.stem
    for ext in [".jpg", ".jpeg", ".png"]:
        img_path = INPUT_IMAGES / f"{stem}{ext}"
        if img_path.exists():
            labeled_pairs.append((img_path, txt_path))
            break

print(f"Found {len(labeled_pairs)} images with matching labels")

if not labeled_pairs:
    raise ValueError("No image-label pairs found! Check folder names/paths.")


In [None]:
# ────────────────────────────────────────────────
# Step 2: Random shuffle + split into train / val
# ────────────────────────────────────────────────
TRAIN_RATIO = 0.8

random.seed(42)  # for reproducibility
random.shuffle(labeled_pairs)

split_idx = int(len(labeled_pairs) * TRAIN_RATIO)

train_pairs = labeled_pairs[:split_idx]
val_pairs   = labeled_pairs[split_idx:]

print(f"→ Train: {len(train_pairs)} images")
print(f"→ Val:   {len(val_pairs)} images")



In [None]:
# ────────────────────────────────────────────────
# Step 3: Create folders and copy only labeled files
# ────────────────────────────────────────────────

for p in [
    OUTPUT_IMAGES / "train", OUTPUT_IMAGES / "val",
    OUTPUT_LABELS / "train", OUTPUT_LABELS / "val"
]:
    p.mkdir(parents=True, exist_ok=True)

def safe_copy(src, dst):
    if src.exists():
        shutil.copy(src, dst)
    else:
        print(f"Warning: Source not found → {src}")

for img_src, txt_src in train_pairs:
    safe_copy(img_src, OUTPUT_IMAGES / "train" / img_src.name)
    safe_copy(txt_src, OUTPUT_LABELS / "train" / txt_src.name)

for img_src, txt_src in val_pairs:
    safe_copy(img_src, OUTPUT_IMAGES / "val" / img_src.name)
    safe_copy(txt_src, OUTPUT_LABELS / "val" / txt_src.name)

print("Filtering and copying completed.")

In [None]:
import yaml

dataset_yaml_content = {
    'path': str(OUTPUT_BASE),          # /kaggle/working/filtered_dataset
    'train': 'images/train',
    'val': 'images/val',
    'nc': 11,
    'names': [
        'Auto Rickshaw',
        'Cycle Rickshaw',
        'CNG/Tempo',
        'Bus',
        'Jeep/SUV',
        'Microbus',
        'Minibus',
        'Motorcycle',
        'Truck',
        'Private Sedan Car',
        'Trailer'
    ]
}

yaml_file = "/kaggle/working/data.yaml"

with open(yaml_file, 'w') as f:
    yaml.dump(dataset_yaml_content, f, default_flow_style=False, sort_keys=False)

print("Created data.yaml:")
print(open(yaml_file).read())

In [None]:
import os
import cv2
import matplotlib.pyplot as plt

IMAGES_DIR = "/kaggle/working/filtered_dataset/images/train"
LABELS_DIR = "/kaggle/working/filtered_dataset/labels/train"

CLASS_NAMES = [
    'Auto Rickshaw',
    'Cycle Rickshaw',
    'CNG/Tempo',
    'Bus',
    'Jeep/SUV',
    'Microbus',
    'Minibus',
    'Motorcycle',
    'Truck',
    'Private Sedan Car',
    'Trailer'
]

def visualize_any_sample_hd():
    image_name = next(f for f in os.listdir(IMAGES_DIR) if f.endswith((".jpg", ".png")))

    img_path = os.path.join(IMAGES_DIR, image_name)
    label_path = os.path.join(LABELS_DIR, image_name.rsplit(".", 1)[0] + ".txt")

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w, _ = img.shape

    with open(label_path, "r") as f:
        for line in f:
            cls, x, y, bw, bh = map(float, line.split())
            cls = int(cls)

            x1 = int((x - bw / 2) * w)
            y1 = int((y - bh / 2) * h)
            x2 = int((x + bw / 2) * w)
            y2 = int((y + bh / 2) * h)

            # thicker box + anti-aliasing
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3, cv2.LINE_AA)
            cv2.putText(
                img,
                CLASS_NAMES[cls],
                (x1, max(y1 - 8, 20)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8,
                (255, 0, 0),
                2,
                cv2.LINE_AA
            )

    # High DPI rendering (KEY FIX)
    plt.figure(figsize=(12, 12), dpi=200)
    plt.imshow(img, interpolation="nearest")
    plt.axis("off")
    plt.show()


visualize_any_sample_hd()


In [None]:
from ultralytics import YOLO

model = YOLO("yolo26n.pt")   # or yolo26s.pt if you want slightly better accuracy

results = model.train(
    data     = "/kaggle/working/data.yaml",
    epochs   = 100,               # smaller dataset → 60–120 epochs is usually enough
    imgsz    = 640,
    batch    = 12,               # lower if you get OOM error
    device   = 0,
    patience = 25,
    optimizer = "AdamW",
    lr0      = 0.001,
    amp      = True,
    mosaic   = 1.0,
    mixup    = 0.15,
    hsv_h    = 0.015,
    hsv_s    = 0.7,
    hsv_v    = 0.4,
    degrees  = 8.0,
    translate= 0.1,
    scale    = 0.6,
    shear    = 2.0,
    flipud   = 0.4,
    fliplr   = 0.5,
    save_period = 10
)

# Inference

In [None]:
from ultralytics import YOLO
model = YOLO('/kaggle/working/runs/detect/train/weights/best.pt')


In [None]:
import os
from pathlib import Path

val_images_dir = Path('/kaggle/working/filtered_dataset/images/val')
image_paths = list(val_images_dir.glob('*.jpg')) + list(val_images_dir.glob('*.png'))
 
print(f"Found {len(image_paths)} validation images.")


In [None]:
# Run inference and save results to a folder
results = model.predict(source=str(val_images_dir), conf=0.1, imgsz=640, save=True, save_txt=True, project='/kaggle/working/runs/detect/val_pred', name='results', exist_ok=True)


In [None]:
import os
import cv2
import matplotlib.pyplot as plt

pred_dir = "/kaggle/working/runs/detect/val_pred/results"
image_exts = (".jpg", ".jpeg", ".png")

for img_name in sorted(os.listdir(pred_dir)):
    if not img_name.lower().endswith(image_exts):
        continue  # skip non-image files

    img_path = os.path.join(pred_dir, img_name)
    img = cv2.imread(img_path)

    if img is None:
        print(f"⚠️ Could not read: {img_name}")
        continue

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(6, 6))
    plt.imshow(img)
    plt.axis("off")
    plt.title(img_name)
    plt.show()
