  # 1. Dataset and Environment Preparation


In [1]:
# Run this cell to mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Create directory structure (run only once)
!mkdir -p "/content/dataset"
!mkdir -p "/content/dataset/train"
!mkdir -p "/content/dataset/val"
!mkdir -p "/content/dataset/test"
!mkdir -p "/content/dataset/train/images"
!mkdir -p "/content/dataset/train/labels"
!mkdir -p "/content/dataset/val/images"
!mkdir -p "/content/dataset/val/labels"
!mkdir -p "/content/dataset/test/images"
!mkdir -p "/content/dataset/test/labels"

# 2. Install Required Packages


In [3]:
!pip install torch torchvision torchaudio
!pip install ultralytics opencv-python numpy pandas matplotlib tqdm
!pip install scikit-learn

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [4]:
!pip install torch torchvision torchaudio
!pip install ultralytics opencv-python numpy pandas matplotlib tqdm
!pip install scikit-learn



# 4. Prepare Dataset for YOLOv8



In [None]:
import os
import random
from sklearn.model_selection import train_test_split
import cv2
import numpy as np

# Paths to your dataset
positive_path = "/content/drive/MyDrive/Concrete Crack Images for Classification/Positive/Positive"
negative_path = "/content/drive/MyDrive/Concrete Crack Images for Classification/Negative/Negative"

# Get all image paths
positive_images = [os.path.join(positive_path, img) for img in os.listdir(positive_path) if img.endswith(('.jpg', '.png'))]
negative_images = [os.path.join(negative_path, img) for img in os.listdir(negative_path) if img.endswith(('.jpg', '.png'))]

# Since we have 20k images in each, let's use a subset (e.g., 2000 from each) for faster experimentation
random.seed(42)
positive_images = random.sample(positive_images, 2000)
negative_images = random.sample(negative_images, 2000)

# Split into train/val/test (70/20/10)
pos_train, pos_temp = train_test_split(positive_images, test_size=0.3, random_state=42)
pos_val, pos_test = train_test_split(pos_temp, test_size=1/3, random_state=42)

neg_train, neg_temp = train_test_split(negative_images, test_size=0.3, random_state=42)
neg_val, neg_test = train_test_split(neg_temp, test_size=1/3, random_state=42)

def create_yolo_annotation(image_path, has_crack, output_dir):
    """Create dummy annotations (full image bounding box for positive images)"""
    img = cv2.imread(image_path)
    if img is None:
        return

    height, width = img.shape[:2]

    # Create annotation file path
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    txt_path = os.path.join(output_dir, f"{base_name}.txt")

    # For positive images (cracks), create a full-image bounding box
    if has_crack:
        # YOLO format: class x_center y_center width height (normalized)
        x_center = 0.5
        y_center = 0.5
        box_width = 1.0
        box_height = 1.0

        with open(txt_path, 'w') as f:
            f.write(f"0 {x_center} {y_center} {box_width} {box_height}")
    else:
        # Create empty file for negative images
        open(txt_path, 'w').close()

# Process training set
for img in pos_train:
    # Copy image
    dest = os.path.join("/content/dataset/train/images", os.path.basename(img))
    !cp "{img}" "{dest}"
    # Create annotation
    create_yolo_annotation(img, True, "/content/dataset/train/labels")

for img in neg_train:
    dest = os.path.join("/content/dataset/train/images", os.path.basename(img))
    !cp "{img}" "{dest}"
    create_yolo_annotation(img, False, "/content/dataset/train/labels")

# Process validation set
for img in pos_val:
    dest = os.path.join("/content/dataset/val/images", os.path.basename(img))
    !cp "{img}" "{dest}"
    create_yolo_annotation(img, True, "/content/dataset/val/labels")

for img in neg_val:
    dest = os.path.join("/content/dataset/val/images", os.path.basename(img))
    !cp "{img}" "{dest}"
    create_yolo_annotation(img, False, "/content/dataset/val/labels")

# Process test set
for img in pos_test:
    dest = os.path.join("/content/dataset/test/images", os.path.basename(img))
    !cp "{img}" "{dest}"
    create_yolo_annotation(img, True, "/content/dataset/test/labels")

for img in neg_test:
    dest = os.path.join("/content/dataset/test/images", os.path.basename(img))
    !cp "{img}" "{dest}"
    create_yolo_annotation(img, False, "/content/dataset/test/labels")

^C
^C
^C
^C
^C


# 5. Create YOLOv8 Dataset Configuration


In [None]:
# Create dataset.yaml file
yaml_content = """
path: /content/dataset
train: train/images
val: val/images
test: test/images

names:
  0: crack
"""

with open("/content/dataset/dataset.yaml", "w") as f:
    f.write(yaml_content)

# 6. Train YOLOv8 Model


In [None]:
from ultralytics import YOLO

# Load a pretrained model
model = YOLO("yolov8n-seg.pt")  # Using nano version for faster training

# Train the model
results = model.train(
    data="/content/dataset/dataset.yaml",
    epochs=50,
    imgsz=640,
    batch=16,
    name="yolov8_crack_detection"
)

# 7. Evaluate YOLOv8 Model


In [None]:
# Evaluate on test set
metrics = model.val(
    data="/content/dataset/dataset.yaml",
    split="test"
)

# Show some predictions
results = model.predict(
    source="/content/dataset/test/images",
    save=True,
    conf=0.5
)

# Display one of the predicted images
from IPython.display import Image
Image(filename=f"/content/runs/segment/predict/{os.listdir('/content/runs/segment/predict')[-1]}")

# 8. Implement Mask R-CNN with ResNeXt Backbone


In [None]:
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torch.utils.data import Dataset, DataLoader
import numpy as np
import cv2
import os

# Custom dataset class
class CrackDataset(Dataset):
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.images = [img for img in os.listdir(image_dir) if img.endswith(('.jpg', '.png'))]

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, os.path.splitext(self.images[idx])[0] + ".txt")

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        height, width = img.shape[:2]

        # Check if it's a positive sample (has crack)
        has_crack = os.path.exists(label_path) and os.path.getsize(label_path) > 0

        if has_crack:
            # For positive images, create a full-image mask
            masks = np.ones((1, height, width), dtype=np.uint8)
            boxes = torch.as_tensor([[0, 0, width, height]], dtype=torch.float32)
            labels = torch.ones((1,), dtype=torch.int64)
        else:
            # For negative images, empty masks and boxes
            masks = np.zeros((1, height, width), dtype=np.uint8)
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "masks": torch.as_tensor(masks, dtype=torch.uint8)
        }

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.images)

# Create model with ResNeXt backbone
backbone = resnet_fpn_backbone('resnext50_32x4d', pretrained=True)
model = MaskRCNN(backbone, num_classes=2)  # Background + crack

# Create datasets
train_dataset = CrackDataset(
    "/content/dataset/train/images",
    "/content/dataset/train/labels"
)
val_dataset = CrackDataset(
    "/content/dataset/val/images",
    "/content/dataset/val/labels"
)

# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=2,
    collate_fn=lambda x: tuple(zip(*x))
)
val_loader = DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=2,
    collate_fn=lambda x: tuple(zip(*x))
)

# Move model to GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Set up optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {losses.item()}")

# 9. Compare Results

In [None]:
# Function to calculate IoU
def calculate_iou(mask1, mask2):
    intersection = np.logical_and(mask1, mask2)
    union = np.logical_or(mask1, mask2)
    return np.sum(intersection) / np.sum(union)

# Evaluate both models on test set
def evaluate_model(model, model_type="yolo"):
    ious = []
    test_images = [img for img in os.listdir("/content/dataset/test/images") if img.endswith(('.jpg', '.png'))]

    for img_name in test_images[:100]:  # Evaluate on subset for speed
        img_path = os.path.join("/content/dataset/test/images", img_name)
        label_path = os.path.join("/content/dataset/test/labels", os.path.splitext(img_name)[0] + ".txt")

        # Ground truth
        has_crack = os.path.exists(label_path) and os.path.getsize(label_path) > 0
        img = cv2.imread(img_path)
        gt_mask = np.ones(img.shape[:2], dtype=np.uint8) if has_crack else np.zeros(img.shape[:2], dtype=np.uint8)

        # Prediction
        if model_type == "yolo":
            results = model.predict(img_path)
            if len(results[0].masks) > 0:
                pred_mask = results[0].masks[0].data[0].cpu().numpy()
            else:
                pred_mask = np.zeros(img.shape[:2], dtype=np.uint8)
        else:  # Mask R-CNN
            with torch.no_grad():
                img_tensor = torch.from_numpy(img).permute(2, 0, 1).float().to(device)
                output = model([img_tensor])
                if len(output[0]['masks']) > 0:
                    pred_mask = output[0]['masks'][0, 0].cpu().numpy()
                    pred_mask = (pred_mask > 0.5).astype(np.uint8)
                else:
                    pred_mask = np.zeros(img.shape[:2], dtype=np.uint8)

        iou = calculate_iou(gt_mask, pred_mask)
        ious.append(iou)

    return np.mean(ious), np.min(ious)

# Evaluate YOLOv8
yolo_mean_iou, yolo_min_iou = evaluate_model(model, "yolo")
print(f"YOLOv8 - Mean IoU: {yolo_mean_iou:.2%}, Min IoU: {yolo_min_iou:.2%}")

# Evaluate Mask R-CNN
maskrcnn_mean_iou, maskrcnn_min_iou = evaluate_model(model, "maskrcnn")
print(f"Mask R-CNN - Mean IoU: {maskrcnn_mean_iou:.2%}, Min IoU: {maskrcnn_min_iou:.2%}")

# 10. Visualize Result

In [None]:
import matplotlib.pyplot as plt

# Sample visualization
def visualize_results(image_path, yolo_model, maskrcnn_model):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Ground truth
    label_path = os.path.join("/content/dataset/test/labels", os.path.splitext(os.path.basename(image_path))[0] + ".txt")
    has_crack = os.path.exists(label_path) and os.path.getsize(label_path) > 0
    gt_mask = np.ones(img.shape[:2], dtype=np.uint8) if has_crack else np.zeros(img.shape[:2], dtype=np.uint8)

    # YOLOv8 prediction
    yolo_results = yolo_model.predict(image_path)
    if len(yolo_results[0].masks) > 0:
        yolo_mask = yolo_results[0].masks[0].data[0].cpu().numpy()
    else:
        yolo_mask = np.zeros(img.shape[:2], dtype=np.uint8)

    # Mask R-CNN prediction
    with torch.no_grad():
        img_tensor = torch.from_numpy(img).permute(2, 0, 1).float().to(device)
        output = maskrcnn_model([img_tensor])
        if len(output[0]['masks']) > 0:
            maskrcnn_mask = output[0]['masks'][0, 0].cpu().numpy()
            maskrcnn_mask = (maskrcnn_mask > 0.5).astype(np.uint8)
        else:
            maskrcnn_mask = np.zeros(img.shape[:2], dtype=np.uint8)

    # Plot
    fig, axes = plt.subplots(1, 4, figsize=(20, 5))
    axes[0].imshow(img)
    axes[0].set_title("Original Image")
    axes[0].axis('off')

    axes[1].imshow(gt_mask, cmap='gray')
    axes[1].set_title("Ground Truth")
    axes[1].axis('off')

    axes[2].imshow(yolo_mask, cmap='gray')
    axes[2].set_title(f"YOLOv8 Prediction (IoU: {calculate_iou(gt_mask, yolo_mask):.2f}")
    axes[2].axis('off')

    axes[3].imshow(maskrcnn_mask, cmap='gray')
    axes[3].set_title(f"Mask R-CNN Prediction (IoU: {calculate_iou(gt_mask, maskrcnn_mask):.2f}")
    axes[3].axis('off')

    plt.tight_layout()
    plt.show()

# Visualize a few test images
test_images = [img for img in os.listdir("/content/dataset/test/images") if img.endswith(('.jpg', '.png'))]
for img_name in test_images[:3]:  # Show first 3
    img_path = os.path.join("/content/dataset/test/images", img_name)
    visualize_results(img_path, model, maskrcnn_model)