In [None]:
!pip install -U ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.221-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.221-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.221 ultralytics-thop-2.0.17


In [None]:
from ultralytics import YOLO
import os
import torch
import yaml
from google.colab import drive
import torch.nn as nn


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset_path = '/content/drive/MyDrive/Pothole'
data_yaml = os.path.join(dataset_path, 'data.yaml')

# Verify dataset structure
if not os.path.exists(dataset_path):
    raise ValueError(f"Dataset path not found: {dataset_path}")

# Check subfolders
for split in ['train', 'val']:
    img_dir = os.path.join(dataset_path, f'{split}/images')
    label_dir = os.path.join(dataset_path, f'{split}/labels')
    if os.path.exists(img_dir):
        img_count = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
        print(f"{split}/images: {img_count} files")
    if os.path.exists(label_dir):
        label_count = len([f for f in os.listdir(label_dir) if f.endswith('.txt')])
        print(f"{split}/labels: {label_count} files")

# Load YAML config (create if missing)
if os.path.exists(data_yaml):
    with open(data_yaml, 'r') as f:
        data_config = yaml.safe_load(f)
    print(f"Dataset path: {dataset_path}")
    print(f"Classes: {data_config.get('names', 'N/A')}")
    print(f"Number of classes: {data_config.get('nc', 'N/A')}")
else:
    # Create basic YAML if missing
    data_config = {
        'path': dataset_path,
        'train': 'train/images',
        'val': 'val/images',
        'nc': 1,
        'names': ['pothole']
    }
    with open(data_yaml, 'w') as f:
        yaml.dump(data_config, f)
    print("Created basic data.yaml. Update paths if needed.")

if not os.path.exists(os.path.join(dataset_path, 'train/images')):
    print("Warning: train/images not found. Ensure proper YOLO dataset structure.")

train/images: 1581 files
train/labels: 1581 files
Dataset path: /content/drive/MyDrive/Pothole
Classes: ['pothole']
Number of classes: 1


In [None]:
model = YOLO('yolo11n.pt')

# Access the internal model layers (DetectionModel contains nn.ModuleList of layers)
backbone = model.model.model  # Sequential-like container of modules
total_layers = len(backbone)
print(f"Total layers in YOLOv11n: {total_layers}")

# Print layer structure for verification
print("\nLayer structure:")
for i, layer in enumerate(backbone):
    layer_name = type(layer).__name__[:30]
    num_params = sum(p.numel() for p in layer.parameters())
    print(f"Layer {i:2d}: {layer_name:<30} ({num_params:>8,} params)")

# IMPROVED STRATEGY: Freeze only early backbone (layers 0-9)
# This keeps basic feature extraction fixed while allowing adaptation
print("\n" + "="*60)
print("FREEZING STRATEGY: Early backbone only (layers 0-9)")
print("="*60)

# Freeze only first 10 layers (Conv stems + early C3k2 blocks)
for i in range(10):
    for param in backbone[i].parameters():
        param.requires_grad = False
    print(f"[FROZEN] Layer {i}: {type(backbone[i]).__name__}")

# Explicitly unfreeze remaining layers (10 onwards: SPPF, neck, head)
print(f"\n{'='*60}")
print(f"UNFREEZING: Layers 10-{total_layers-1} (SPPF + Neck + Head)")
print("="*60)
for i in range(10, total_layers):
    for param in backbone[i].parameters():
        param.requires_grad = True
    num_params = sum(p.numel() for p in backbone[i].parameters())
    print(f"[TRAINABLE] Layer {i}: {type(backbone[i]).__name__:<30} ({num_params:>8,} params)")

# Calculate and display trainable parameters
trainable_params = sum(p.numel() for p in model.model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.model.parameters())
trainable_pct = 100 * trainable_params / total_params

print(f"\n{'='*60}")
print("PARAMETER SUMMARY")
print("="*60)
print(f"Total parameters:     {total_params:>12,}")
print(f"Frozen parameters:    {total_params - trainable_params:>12,}")
print(f"Trainable parameters: {trainable_params:>12,} ({trainable_pct:.2f}%)")
print("="*60)

# Verify layer-by-layer trainability
print("\n" + "="*60)
print("LAYER-BY-LAYER TRAINABILITY CHECK")
print("="*60)
for i, layer in enumerate(backbone):
    trainable = any(p.requires_grad for p in layer.parameters())
    status = "TRAINABLE" if trainable else "FROZEN"
    layer_params = sum(p.numel() for p in layer.parameters())
    print(f"Layer {i:2d}: {status:<12} {type(layer).__name__:<30} ({layer_params:>8,} params)")

print(f"\nModel ready for training with {trainable_pct:.1f}% trainable parameters")

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt': 100% ━━━━━━━━━━━━ 5.4MB 91.1MB/s 0.1s
Total layers in YOLOv11n: 24

Layer structure:
Layer  0: Conv                           (     464 params)
Layer  1: Conv                           (   4,672 params)
Layer  2: C3k2                           (   6,640 params)
Layer  3: Conv                           (  36,992 params)
Layer  4: C3k2                           (  26,080 params)
Layer  5: Conv                           ( 147,712 params)
Layer  6: C3k2                           (  87,040 params)
Layer  7: Conv                           ( 295,424 params)
Layer  8: C3k2                           ( 346,112 params)
Layer  9: SPPF                           ( 164,608 params)
Layer 10: C2PSA                          ( 249,728 params)
Layer 11: Upsample                       (       0 params)
Layer 12: Concat                         (       0 params)
Layer 13: C3k2                           (

In [None]:
# Head-only training with manual unfreezing + freeze reinforcement
train_results = model.train(
    data='/content/drive/MyDrive/Pothole/data.yaml',

    # Training duration - increase for convergence
    epochs=75,  # More epochs with early stopping
    patience=30,  # Stop if no improvement for 30 epochs

    # Batch & image settings
    batch=32,  # Smaller batch = more stable gradients with small dataset
    imgsz=640,

    # Optimizer settings - CRITICAL FIX
    optimizer='SGD',  # SGD often works better than AdamW for YOLO fine-tuning
    lr0=0.01,  # Higher initial LR for SGD
    lrf=0.1,  # Decay to 10% of initial (0.001 final)
    momentum=0.937,
    weight_decay=0.0005,

    # Warmup
    warmup_epochs=3,  # Longer warmup for stability
    warmup_momentum=0.8,
    warmup_bias_lr=0.1,

    # Loss weights - TUNED FOR POTHOLE DETECTION
    box=7.5,  # Keep high for precise localization
    cls=0.5,  # Single class, lower weight
    dfl=1.5,  # Distribution focal loss for bbox refinement

    # SOLUTION 3: Aggressive augmentation for small datasets
    hsv_h=0.02,  # Slight color jitter (lighting variations)
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10.0,  # ADD rotation (roads can be tilted in dashcam footage)
    translate=0.2,  # Increase translation (potholes at various positions)
    scale=0.9,  # More scale variation (0.5 → range 0.1x-1.0x)
    shear=5.0,  # ADD shear for perspective distortion
    perspective=0.0005,  # ADD subtle perspective warp
    flipud=0.0,
    fliplr=0.5,

    # CRITICAL: Multi-image augmentation
    mosaic=1.0,  # Keep mosaic (combines 4 images)
    mixup=0.15,  # ADD mixup (blends images, improves generalization)
    copy_paste=0.1,  # ADD copy-paste (duplicate potholes across images)

    # Close mosaic in last epochs for better bbox accuracy
    close_mosaic=20,  # Disable mosaic in last 20 epochs

    # Training settings
    device=0,
    workers=8,
    seed=42,
    amp=True,  # Mixed precision

    # Don't freeze layers via config (we manually did it above)
    freeze=None,

    # Validation & saving
    val=True,
    save=True,
    save_period=10,
    plots=True,

    # Output
    project='/content/drive/MyDrive/Pothole/',
    name='pothole_improved_v2',
    exist_ok=True
)

# Training summary
print("\nHead-only training completed!")
best_path = '/content/drive/MyDrive/Pothole/weights/best.pt'
print(f"Best model saved: {best_path}")
if hasattr(train_results, 'results_dict'):
    print(f"Final mAP50-95: {train_results.results_dict.get('metrics/mAP50-95(B)', 0):.4f}")
    print(f"Final mAP50: {train_results.results_dict.get('metrics/mAP50(B)', 0):.4f}")
else:
    print("Training results available in /content/drive/MyDrive/Pothole/weights")


Ultralytics 8.3.221 🚀 Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=20, cls=0.5, compile=False, conf=None, copy_paste=0.1, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/Pothole/data.yaml, degrees=10.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=75, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.02, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.1, mask_ratio=4, max_det=300, mixup=0.15, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=pothole_improved_v2, nbs=64, nms=False, opset=None, optimize=False, optimizer=SGD, overlap_mask=True, patience=30, pe

In [None]:
# Load the best head-trained model
head_model = YOLO('/content/drive/MyDrive/Pothole/pothole_head_yolo11/weights/best.pt')

# Run validation on val set
val_results = head_model.val(
    data=data_yaml,
    batch=16,
    imgsz=640,
    device=0,
    conf=0.25,                # Confidence threshold for evaluation
    iou=0.7,                  # NMS IoU threshold
    plots=True,               # Generate validation plots (confusion matrix, PR curve)
    save_json=True,           # Save results JSON
    save_hybrid=False,
    verbose=True,
    max_det=300,              # Max detections per image
    half=False                # Full precision for eval
)

print("High mAP50 (>0.7) indicates good head adaptation; low recall may need more data/augmentations.")


Ultralytics 8.3.221 🚀 Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.5±0.2 ms, read: 69.7±44.7 MB/s, size: 133.3 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/Pothole/valid/labels.cache... 395 images, 0 backgrounds, 1 corrupt: 100% ━━━━━━━━━━━━ 396/396 554.2Kit/s 0.0s
[34m[1mval: [0m/content/drive/MyDrive/Pothole/valid/images/pothole_1870.jpg: ignoring corrupt image/label: invalid image format GIF. Supported formats are:
images: {'tif', 'heic', 'png', 'jpeg', 'dng', 'pfm', 'mpo', 'jpg', 'webp', 'bmp', 'tiff'}
videos: {'asf', 'wmv', 'mp4', 'mpg', 'mpeg', 'ts', 'm4v', 'avi', 'mkv', 'gif', 'webm', 'mov'}
[34m[1mval: [0m/content/drive/MyDrive/Pothole/valid/images/pothole_975.jpg: corrupt JPEG restored and saved
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 25/25 2.5it

In [None]:
# Load head-trained model for inference
head_model = YOLO('/content/drive/MyDrive/Pothole/pothole_head_yolo11/weights/best.pt')

# # Test on a sample image (update path to your val set)
# sample_image = '/content/drive/MyDrive/Pothole/val/images/sample_pothole.jpg'  # Replace with actual filename
# if os.path.exists(sample_image):
#     img_results = head_model(
#         sample_image,
#         conf=0.25,      # Lower conf to detect small potholes
#         iou=0.7,        # NMS IoU
#         device=0,       # GPU
#         save=True,      # Save annotated image
#         show=True,      # Display in Colab (if possible)
#         save_txt=True,  # Save labels in YOLO format
#         save_conf=True, # Show confidence scores
#         line_width=3,   # Thicker boxes for visibility
#         max_det=100,    # Limit detections
#         augment=False   # No TTA for speed
#     )
#     num_detections = len(img_results[0].boxes) if img_results[0].boxes is not None else 0
#     print(f"Image inference: {num_detections} potholes detected")
#     print(f"Results saved to: {img_results[0].save_dir}")

#     # Print detection details
#     if num_detections > 0:
#         boxes = img_results[0].boxes
#         for i, box in enumerate(boxes):
#             cls = int(box.cls[0])
#             conf = float(box.conf[0])
#             xyxy = box.xyxy[0].tolist()
#             print(f"  Pothole {i+1}: conf={conf:.2f}, bbox=({xyxy[0]:.0f}, {xyxy[1]:.0f}, {xyxy[2]:.0f}, {xyxy[3]:.0f})")
# else:
#     print(f"Sample image not found: {sample_image}")
#     print("Choose a .jpg/.png from val/images/ folder.")

# Optional: Video inference (slower, ~1-5 FPS on T4)
video_path = '/content/drive/MyDrive/Pothole/sample_video.mp4'
if os.path.exists(video_path):
    print(f"\nRunning video inference on: {video_path}")
    vid_results = head_model(
        video_path,
        conf=0.25,
        iou=0.7,
        save=True,
        project='/content/drive/MyDrive/Pothole/',
        name='inference',
        exist_ok=True,
        vid_stride=1,  # Process every frame
        line_width=2
    )
    print(f"Video saved: /content/drive/MyDrive/Pothole/inference/")
    print("Processed frames with pothole detections overlaid.")



Running video inference on: /content/drive/MyDrive/Pothole/sample_video.mp4

inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/375) /content/drive/MyDrive/Pothole/sample_video.mp4: 384x640 11 potholes, 78.9ms
video 1/1 (frame 2/375) /content/drive/MyDrive/Pothole/sample_video.mp4: 384x640 13 potholes, 16.2ms
video 1/1 (frame 3/375) /content/drive/MyDrive/Pothole/sample_video.mp4: 384x640 12 potholes, 11.0ms
video 1/1 (frame 4/375) /content/drive/MyDrive/Pothole/sample_video.mp4: 384x640 15 poth