In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Update the base path to your dataset location
base_path = "/content/drive/Othercomputers/My Laptop/New small dataset"

In [None]:
# Implementing U-Net for crack segmentation
# Custom Dataset Class
class CrackDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = [f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg'))]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        mask_path = os.path.join(self.mask_dir, img_name)

        # Load image and mask
        image = Image.open(img_path).convert('L')  # Grayscale
        mask = Image.open(mask_path).convert('L')  # Grayscale mask

        # Transform
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        # Ensure mask is binary
        mask = (mask > 0.5).float()  # Threshold to 0 or 1
        return image, mask

In [5]:
import torch
import torch.nn as nn

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        # Encoder
        self.enc1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)  # Input: 1 channel (grayscale), Output: 64 channels
        self.enc2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # Input: _pct channels, Output: 128 channels
        self.enc3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # Input: 128 channels, Output: 256 channels
        # Decoder
        self.dec1 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)  # Input: 256 channels, Output: 128 channels
        self.conv_after_skip1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)  # After skip connection: 128+128=256 channels -> 128 channels
        self.dec2 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)  # Input: 128 channels, Output: 64 channels
        self.conv_after_skip2 = nn.Conv2d(128, 64, kernel_size=3, padding=1)  # After skip connection: 64+64=128 channels -> 64 channels
        self.dec4 = nn.Conv2d(64, 1, kernel_size=3, padding=1)  # Input: 64 channels, Output: 1 channel
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Encoder
        e1 = self.relu(self.enc1(x))  # [batch, 64, 448, 448]
        e2 = self.pool(e1)  # [batch, 64, 224, 224]
        e3 = self.relu(self.enc2(e2))  # [batch, 128, 224, 224]
        e4 = self.pool(e3)  # [batch, 128, 112, 112]
        e5 = self.relu(self.enc3(e4))  # [batch, 256, 112, 112]
        # Decoder
        d1 = self.dec1(e5)  # [batch, 128, 224, 224]
        d2 = torch.cat([d1, e3], dim=1)  # [batch, 128+128=256, 224, 224]
        d3 = self.relu(self.conv_after_skip1(d2))  # [batch, 128, 224, 224]
        d4 = self.dec2(d3)  # [batch, 64, 448, 448]
        d5 = torch.cat([d4, e1], dim=1)  # [batch, 64+64=128, 448, 448]
        d6 = self.relu(self.conv_after_skip2(d5))  # [batch, 64, 448, 448]
        d7 = self.sigmoid(self.dec4(d6))  # [batch, 1, 448, 448]
        return d7

In [6]:
# Transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # For grayscale
])

In [7]:
# Load Datasets
train_dataset = CrackDataset(
    image_dir='/content/drive/Othercomputers/My Laptop/New small dataset/train/images',
    mask_dir='/content/drive/Othercomputers/My Laptop/New small dataset/train/masks',
    transform=transform
)
test_dataset = CrackDataset(
    image_dir='/content/drive/Othercomputers/My Laptop/New small dataset/test/images',
    mask_dir='/content/drive/Othercomputers/My Laptop/New small dataset/test/masks',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

In [8]:
# Initialize Model, Loss, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UNet().to(device)
criterion = nn.BCELoss()  # Binary cross-entropy for segmentation
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training Loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

Epoch 1/50, Loss: 0.6090927918752035
Epoch 2/50, Loss: 0.2543838496009509
Epoch 3/50, Loss: 0.16732763995726904
Epoch 4/50, Loss: 0.11665660639603932
Epoch 5/50, Loss: 0.08551945785681407
Epoch 6/50, Loss: 0.08706501871347427
Epoch 7/50, Loss: 0.0784023900826772
Epoch 8/50, Loss: 0.06605982035398483
Epoch 9/50, Loss: 0.06753902261455853
Epoch 10/50, Loss: 0.06334218010306358
Epoch 11/50, Loss: 0.061021625995635986
Epoch 12/50, Loss: 0.06272335598866145
Epoch 13/50, Loss: 0.055942535400390625
Epoch 14/50, Loss: 0.053370303163925804
Epoch 15/50, Loss: 0.04932753990093867
Epoch 16/50, Loss: 0.0463192289074262
Epoch 17/50, Loss: 0.04627600063880285
Epoch 18/50, Loss: 0.04674041147033373
Epoch 19/50, Loss: 0.048343509435653687
Epoch 20/50, Loss: 0.04590291902422905


In [None]:
# Save Model
torch.save(model.state_dict(), 'unet_crack_segmentation.pth')

In [None]:
# After training, load the model and predict masks for test images:
model.eval()
with torch.no_grad():
    for images, masks in test_loader:
        images, masks = images.to(device), masks.to(device)
        outputs = model(images)
        outputs = (outputs > 0.5).float()  # Threshold to binary
        # Compute IoU or other metrics

In [None]:
import os
label_dir = "/content/drive/Othercomputers/My Laptop/New small dataset/train/labels/"
for label_file in os.listdir(label_dir)[:5]:
    with open(os.path.join(label_dir, label_file), 'r') as f:
        print(f"Label {label_file}: {f.read().strip()}")

Label CFD_008.txt: 0 0.49888392857142855 0.203125 0.9977678571428571 0.39285714285714285
Label CFD_012.txt: 0 0.49888392857142855 0.4732142857142857 0.9977678571428571 0.45535714285714285
Label CFD_009.txt: 0 0.49888392857142855 0.4888392857142857 0.9977678571428571 0.10267857142857142
Label CFD_003.txt: 0 0.6696428571428571 0.5323660714285714 0.65625 0.9263392857142857
Label CFD_015.txt: 0 0.38839285714285715 0.5033482142857143 0.7767857142857143 0.04999999999999995


In [None]:
# Converting Masks to Bounding Boxes
import os
import numpy as np
from PIL import Image

def mask_to_yolo_bbox(mask_path, output_label_path, img_width=448, img_height=448, min_size=0.05):
    mask = np.array(Image.open(mask_path).convert('L'))
    mask = (mask > 128).astype(np.uint8) if mask.max() > 1 else (mask > 0).astype(np.uint8)

    rows, cols = np.where(mask == 1)
    if len(rows) == 0:  # No crack
        with open(output_label_path, 'w') as f:
            pass
        return

    x_min, x_max = cols.min(), cols.max()
    y_min, y_max = rows.min(), rows.max()

    # Ensure minimum size for bounding box
    width = (x_max - x_min) / img_width
    height = (y_max - y_min) / img_height
    if width < min_size:
        x_center = (x_min + x_max) / 2
        x_min = max(0, x_center - (min_size * img_width / 2))
        x_max = min(img_width, x_center + (min_size * img_width / 2))
        width = (x_max - x_min) / img_width
    if height < min_size:
        y_center = (y_min + y_max) / 2
        y_min = max(0, y_center - (min_size * img_height / 2))
        y_max = min(img_height, y_center + (min_size * img_height / 2))
        height = (y_max - y_min) / img_height

    x_center = (x_min + x_max) / 2 / img_width
    y_center = (y_min + y_max) / 2 / img_height

    with open(output_label_path, 'w') as f:
        f.write(f"0 {x_center} {y_center} {width} {height}\n")

# Regenerate labels
base_path = "/content/drive/Othercomputers/My Laptop/New small dataset"
for split in ['train', 'test']:
    image_dir = f"{base_path}/{split}/images/"
    mask_dir = f"{base_path}/{split}/masks/"
    label_dir = f"{base_path}/{split}/labels/"
    os.makedirs(label_dir, exist_ok=True)
    for img_name in os.listdir(image_dir):
        if not img_name.endswith(('.png', '.jpg')):
            continue
        mask_path = os.path.join(mask_dir, img_name)
        label_path = os.path.join(label_dir, img_name.replace('.png', '.txt').replace('.jpg', '.txt'))
        mask_to_yolo_bbox(mask_path, label_path)

In [None]:
train_images_dir = "/content/drive/Othercomputers/My Laptop/New small dataset/train/images/"
noncrack_images = [img for img in os.listdir(train_images_dir) if img.startswith("noncrack")]
print(f"Number of noncrack images: {len(noncrack_images)}")
print(f"Total images: {len(os.listdir(train_images_dir))}")

Number of noncrack images: 0
Total images: 10


In [None]:
import shutil
train_images_dir = "/content/drive/Othercomputers/My Laptop/New small dataset/train/images/"
train_masks_dir = "/content/drive/Othercomputers/My Laptop/New small dataset/train/masks/"
train_labels_dir = "/content/drive/Othercomputers/My Laptop/New small dataset/train/labels/"
noncrack_images = [img for img in os.listdir(train_images_dir) if img.startswith("noncrack")]
for img in noncrack_images[int(len(noncrack_images) * 0.1):]:  # Keep only 10%
    os.remove(os.path.join(train_images_dir, img))
    os.remove(os.path.join(train_masks_dir, img))
    os.remove(os.path.join(train_labels_dir, img.replace('.png', '.txt').replace('.jpg', '.txt')))

In [None]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.152-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [None]:
import yaml

# Define the data.yaml content with absolute paths
data_yaml_content = {
    "train": "/content/drive/Othercomputers/My Laptop/New small dataset/train/images/",
    "val": "/content/drive/Othercomputers/My Laptop/New small dataset/test/images/",
    "nc": 1,
    "names": ["crack"]
}

# Save the data.yaml file
with open("/content/data.yaml", "w") as f:
    yaml.dump(data_yaml_content, f)

# Verify the file exists
import os
print(os.path.exists("/content/data.yaml"))  # Should print True

True


In [None]:
!yolo detect train model=yolov8s.pt data=/content/data.yaml epochs=50 imgsz=448 lr0=0.005 patience=50

Ultralytics 8.3.152 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=448, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.005, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=50, perspective=0.0, plots=True, pose=12.0, pretrained

In [None]:
from PIL import Image, ImageEnhance
import os

for split in ['train', 'test']:
    images_dir = f"/content/drive/Othercomputers/My Laptop/New small dataset/{split}/images/"
    for img_name in os.listdir(images_dir):
        img_path = os.path.join(images_dir, img_name)
        img = Image.open(img_path)
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(2.0)  # Increase contrast
        img.save(img_path)

In [None]:
!yolo detect train model=yolov8n.pt data=/content/data.yaml epochs=50 imgsz=448

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...
100% 6.25M/6.25M [00:04<00:00, 1.51MB/s]
Ultralytics 8.3.152 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=448, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train3, 

In [None]:
!yolo detect predict model=runs/detect/train/weights/best.pt source="/content/drive/Othercomputers/My Laptop/New small dataset/test/images/" save=True

Ultralytics 8.3.152 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 72 layers, 11,125,971 parameters, 0 gradients, 28.4 GFLOPs

image 1/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_001.jpg: 448x448 24 cracks, 387.9ms
image 2/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_007.jpg: 448x448 8 cracks, 332.9ms
image 3/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_011.jpg: 448x448 44 cracks, 329.8ms
image 4/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_013.jpg: 448x448 31 cracks, 346.1ms
image 5/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_014.jpg: 448x448 26 cracks, 328.2ms
image 6/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_019.jpg: 448x448 19 cracks, 328.7ms
image 7/10 /content/drive/Othercomputers/My Laptop/New small dataset/test/images/CFD_037.jpg: 448x448 17 cracks,

In [None]:
import pandas as pd
results = pd.read_csv("runs/detect/train/results.csv")
print(results.tail())  # Check the last few epochs

    epoch     time  train/box_loss  train/cls_loss  train/dfl_loss  \
56     57  1029.27         1.02249         1.10951         1.20013   
57     58  1046.45         1.05903         1.17267         1.15806   
58     59  1068.38         0.96677         1.04143         1.09664   
59     60  1088.04         0.97679         1.22508         1.06456   
60     61  1105.73         1.05872         1.39120         1.14657   

    metrics/precision(B)  metrics/recall(B)  metrics/mAP50(B)  \
56               0.40204                0.3           0.28409   
57               0.40204                0.3           0.28409   
58               0.40204                0.3           0.28409   
59               0.31411                0.4           0.21882   
60               0.31411                0.4           0.21882   

    metrics/mAP50-95(B)  val/box_loss  val/cls_loss  val/dfl_loss    lr/pg0  \
56              0.12165       1.94123       2.94284       1.83577  0.000499   
57              0.12165       

In [None]:
#import matplotlib.pyplot as plt
import torch
import numpy as np
from torchvision import transforms

# Assuming test_loader, model, and device are defined
model.eval()
transform = transforms.Compose([transforms.ToTensor()])

with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        outputs = (outputs > 0.5).float()

        for i in range(images.shape[0]):  # Process each image in the batch
            img = images[i, 0].cpu().numpy()
            mask = outputs[i, 0].cpu().numpy()

            # Find crack location
            crack_rows = np.where(mask.sum(axis=1) > 10)[0]
            if len(crack_rows) > 0:
                clusters = []
                current_cluster = [crack_rows[0]]
                for j in range(1, len(crack_rows)):
                    if crack_rows[j] == current_cluster[-1] + 1:
                        current_cluster.append(crack_rows[j])
                    else:
                        clusters.append(current_cluster)
                        current_cluster = [crack_rows[j]]
                clusters.append(current_cluster)

                clusters = [cluster for cluster in clusters if len(cluster) >= 10]
                if clusters:
                    cluster_sums = [mask[cluster].sum() for cluster in clusters]
                    largest_cluster = clusters[np.argmax(cluster_sums)]
                    y_min, y_max = min(largest_cluster), max(largest_cluster)
                    crack_center_y = (y_min + y_max) // 2
                    crack_height = y_max - y_min + 1
                    crack_desc = f"Crack is visible: A horizontal crack is detected from y={y_min} to y={y_max}, centered at y={crack_center_y}, spanning most of the image width."
                else:
                    crack_desc = "No significant crack detected."
            else:
                crack_desc = "No crack detected."

            # Visualize
            plt.figure(figsize=(10, 5))
            plt.subplot(1, 2, 1)
            plt.imshow(img, cmap='gray')
            if len(crack_rows) > 0 and clusters:
                plt.axhline(y_min, color='red', linestyle='--', label='Crack Top')
                plt.axhline(y_max, color='red', linestyle='--', label='Crack Bottom')
                plt.legend()
            plt.title("Input Image")

            plt.subplot(1, 2, 2)
            plt.imshow(mask, cmap='gray')
            plt.title("U-Net Predicted Mask")
            plt.show()

            print(crack_desc)
        break  # Remove this to process all images