In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install segmentation_models_pytorch
from IPython.display import clear_output
clear_output()

In [None]:
import os
import cv2
import numpy as np
from PIL import Image
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
import segmentation_models_pytorch as smp
import random  # Import the random module
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision import transforms
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import torch.optim as optim
import albumentations as A

  check_for_updates()


In [None]:
#Creating augmnation for a better model
# Define the root directories
root_directory = "/content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code"
image_folder = os.path.join(root_directory, "image")
mask_folder = os.path.join(root_directory, "mask")

# Define transformations
transform = A.OneOf([  # Apply one of these transformations randomly
    A.HorizontalFlip(p=1.0),  # Flip horizontally
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=1.0),  # Shift, scale, rotate
    A.RandomCrop(height=1600, width=2400, p=1.0)  # Random cropping for size adjustment
], p=1.0)  # Always apply one transformation


# Get the list of images
image_files = sorted(os.listdir(image_folder))

# Augment each image and save with a new name
for image_name in image_files:
    if image_name.endswith(".jpg"):
        image_path = os.path.join(image_folder, image_name)
        mask_path = os.path.join(mask_folder, image_name)  # Assume masks have the same name

        # Load the image and mask
        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path)

        if image is None or mask is None:
            print(f"Skipping {image_name} due to loading error.")
            continue

        # Apply transformation
        augmented = transform(image=image, mask=mask)
        aug_image = augmented['image']
        aug_mask = augmented['mask']

        # Save the augmented image and mask with a new name
        aug_image_name = image_name.replace('.jpg', '_2.jpg')
        aug_mask_name = image_name.replace('.jpg', '_2.jpg')

        cv2.imwrite(os.path.join(image_folder, aug_image_name), aug_image)  # Save augmented image
        cv2.imwrite(os.path.join(mask_folder, aug_mask_name), aug_mask)  # Save augmented mask

        print(f"Augmented frame saved as {aug_image_name} and {aug_mask_name}.")


Augmented frame saved as FRAME 1_2.jpg and FRAME 1_2.jpg.
Augmented frame saved as FRAME 10_2.jpg and FRAME 10_2.jpg.
Augmented frame saved as FRAME 100_2.jpg and FRAME 100_2.jpg.
Augmented frame saved as FRAME 100_2_2.jpg and FRAME 100_2_2.jpg.
Augmented frame saved as FRAME 101_2.jpg and FRAME 101_2.jpg.
Augmented frame saved as FRAME 101_2_2.jpg and FRAME 101_2_2.jpg.
Augmented frame saved as FRAME 102_2.jpg and FRAME 102_2.jpg.
Augmented frame saved as FRAME 102_2_2.jpg and FRAME 102_2_2.jpg.
Augmented frame saved as FRAME 103_2.jpg and FRAME 103_2.jpg.
Augmented frame saved as FRAME 103_2_2.jpg and FRAME 103_2_2.jpg.
Augmented frame saved as FRAME 104_2.jpg and FRAME 104_2.jpg.
Augmented frame saved as FRAME 104_2_2.jpg and FRAME 104_2_2.jpg.
Augmented frame saved as FRAME 105_2.jpg and FRAME 105_2.jpg.
Augmented frame saved as FRAME 105_2_2.jpg and FRAME 105_2_2.jpg.
Augmented frame saved as FRAME 106_2.jpg and FRAME 106_2.jpg.
Augmented frame saved as FRAME 106_2_2.jpg and FRAME

In [None]:
#Create Data lables
# Define directories
scaler = MinMaxScaler()
root_directory = "/content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code"
desired_shape = (160, 256)  # Specify the desired shape
output_label_folder = "/content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/saved_labels"  # Folder to save labels

# Create the label folder if it doesn't exist
os.makedirs(output_label_folder, exist_ok=True)

image_folder = os.path.join(root_directory, "image")
mask_folder = os.path.join(root_directory, "mask")

# Read images and corresponding masks
image_files = sorted(os.listdir(image_folder))  # Sort to maintain order

for image_name in image_files:
    if image_name.endswith(".jpg"):
        image_path = os.path.join(image_folder, image_name)
        mask_path = os.path.join(mask_folder, image_name)  # Assume masks have the same name

        print(f"Processing image: {image_path}")
        print(f"Processing mask: {mask_path}")

        # Check if corresponding mask exists
        if os.path.exists(mask_path):
            msk = cv2.imread(mask_path, 1)

            if msk is not None:
                # Resize mask
                msk = cv2.resize(msk, desired_shape)
                msk = cv2.cvtColor(msk, cv2.COLOR_BGR2RGB)

                # Convert mask to binary label
                pomegranate = [255, 255, 255]  # White for pomegranates
                label = np.zeros(msk.shape[:2], dtype=np.uint8)
                label[np.all(msk == pomegranate, axis=-1)] = 1  # 1 for pomegranates, 0 for background

                # Save the label as an `.npy` file
                label_save_path = os.path.join(output_label_folder, image_name.replace('.jpg', '.npy'))
                np.save(label_save_path, label)
                print(f"Saved label for {image_name} at {label_save_path}")
            else:
                print(f"Error loading mask: {mask_path}")
        else:
            print(f"Mask not found for image: {image_path}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processing mask: /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/mask/FRAME 155.jpg
Saved label for FRAME 155.jpg at /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/saved_labels/FRAME 155.npy
Processing image: /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/image/FRAME 155_2.jpg
Processing mask: /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/mask/FRAME 155_2.jpg
Saved label for FRAME 155_2.jpg at /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/saved_labels/FRAME 155_2.npy
Processing image: /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/image/FRAME 155_2_2.jpg
Processing mask: /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/mask/FRAME 155_2_2.jpg
Saved label for FRAME 155_2_2.jpg at /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/saved_labels/FRAME 155_2_2.npy
Processing image: /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/image/FRAME 156.jpg
Processing mas

In [None]:
#Data Loder
class PomegranateDataset(Dataset):
    def __init__(self, image_paths, label_paths, transform=None, target_size=(160, 256)):
        self.image_paths = image_paths
        self.label_paths = label_paths
        self.transform = transform
        self.target_size = target_size  # Resize to target size

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load image
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Load label (from .npy file)
        label = np.load(self.label_paths[idx])

        # Resize both image and label to the target size
        image = cv2.resize(image, self.target_size)
        label = cv2.resize(label, self.target_size, interpolation=cv2.INTER_NEAREST)

        # Apply transformation to the image (e.g., ToTensor and normalization)
        if self.transform:
            image = self.transform(image)

        # Convert label to a tensor
        label = torch.tensor(label, dtype=torch.long)

        return image, label


# Define Transformations for Images
transform = transforms.Compose([
    transforms.ToTensor()  # Convert to Tensor
])

# Get paths for images and labels
image_folder = "/content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/image"
label_folder = "/content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/saved_labels"

image_paths = sorted([os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".jpg")])
label_paths = sorted([os.path.join(label_folder, f) for f in os.listdir(label_folder) if f.endswith(".npy")])

# Split the dataset into training and testing
from sklearn.model_selection import train_test_split

image_train_paths, image_test_paths, label_train_paths, label_test_paths = train_test_split(
    image_paths, label_paths, test_size=349, random_state=42, shuffle=True
)

# Create Datasets and DataLoaders
train_dataset = PomegranateDataset(image_train_paths, label_train_paths, transform=transform, target_size=(256, 256))
test_dataset = PomegranateDataset(image_test_paths, label_test_paths, transform=transform, target_size=(256, 256))

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Print dataset stats to confirm
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of testing samples: {len(test_dataset)}")


Number of training samples: 1500
Number of testing samples: 349


In [None]:
#Model Run Unet image size of 160 hight and 256 width
NUM_CLASSES = 2  # 2 classes: background and solar panel
GPU = 0
SEED = 2023

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
os.environ['PYTHONHASHSEED'] = str(SEED)
torch.cuda.manual_seed_all(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model
model = smp.Unet("resnet152", classes=NUM_CLASSES, in_channels=3, encoder_weights="imagenet")
model.to(device)

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001,weight_decay=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

# Target size for images and masks (divisible by 32 for Unet)
target_size = (160, 256)  # Replace with the desired size divisible by 32

# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, lables in train_dataloader:
        # Move images and masks to the device
        images = images.to(device, dtype=torch.float)

        # Convert masks to float for interpolation, then back to long after resizing
        lables = lables.to(device, dtype=torch.float)
        images = F.interpolate(images, size=target_size, mode="bilinear", align_corners=False)
        lables = F.interpolate(lables.unsqueeze(1), size=target_size, mode="nearest").squeeze(1)
        lables = lables.to(dtype=torch.long)  # Convert back to long

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute loss
        loss = criterion(outputs, lables)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    # Compute average loss for the epoch
    epoch_loss = running_loss / len(train_dataloader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    # Step the scheduler
    scheduler.step(epoch_loss)


In [None]:
# Path to save the model in your Google Drive
save_path = "/content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/unet_model_improve_resnet152.pth"

# Save the model
torch.save(model.state_dict(), save_path)

print(f"Model saved to {save_path}")


Model saved to /content/drive/MyDrive/Thesis/POM-IMG/pom_seg_img_code/unet_model_improve_efficientnet-b7.pth


In [None]:
from tqdm import tqdm
# Define a function to calculate mIoU
def calculate_miou(y_true, y_pred, num_classes):
    y_true = y_true.flatten()
    y_pred = y_pred.flatten()
    cm = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))
    iou = np.diag(cm) / (cm.sum(axis=1) + cm.sum(axis=0) - np.diag(cm))
    return np.nanmean(iou)

# Validation function
def validate(model, dataloader, device, num_classes):
    model.eval()
    all_preds = []
    all_masks = []
    with torch.no_grad():
        for images, masks in tqdm(dataloader, desc="Validating"):
            images, masks = images.to(device, dtype=torch.float), masks.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_preds.append(preds.cpu().numpy())
            all_masks.append(masks.cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_masks = np.concatenate(all_masks)

    # Calculate mIoU
    miou = calculate_miou(all_masks, all_preds, num_classes)

    # Calculate precision, recall, and F1-score
    precision = precision_score(all_masks.flatten(), all_preds.flatten())
    recall = recall_score(all_masks.flatten(), all_preds.flatten())
    f1 = f1_score(all_masks.flatten(), all_preds.flatten())

    return miou, precision, recall, f1

# Assuming the model is already defined and trained, and the test_dataloader is set up
num_classes = 2  # Set this to the number of classes in your dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Validate the model on the first test set
miou1, precision1, recall1, f1_1 = validate(model, test_dataloader, device, num_classes)
print(f"Dataset 1 - mIoU: {miou1:.4f}, Precision: {precision1:.4f}, Recall: {recall1:.4f}, F1-Score: {f1_1:.4f}")

Validating: 100%|██████████| 88/88 [00:51<00:00,  1.71it/s]


Dataset 1 - mIoU: 0.8458, Precision: 0.8913, Recall: 0.7871, F1-Score: 0.8360


In [None]:
def plot_predictions(model, dataloader, device, num_images=5):
    model.eval()
    images, masks = next(iter(dataloader))
    images, masks = images.to(device, dtype=torch.float), masks.to(device)

    with torch.no_grad():
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)

    images = images.cpu().numpy()
    masks = masks.cpu().numpy()
    preds = preds.cpu().numpy()

    # Limit the number of images to the batch size
    num_images = min(num_images, len(images))

    fig, axes = plt.subplots(num_images, 3, figsize=(15, num_images * 5))
    for i in range(num_images):
        axes[i, 0].imshow(images[i].transpose(1, 2, 0))
        axes[i, 0].set_title('Test Image')
        axes[i, 0].axis('off')

        axes[i, 1].imshow(masks[i], cmap='gray')
        axes[i, 1].set_title('Ground Truth Mask')
        axes[i, 1].axis('off')

        axes[i, 2].imshow(preds[i], cmap='gray')
        axes[i, 2].set_title('Predicted Mask')
        axes[i, 2].axis('off')

    plt.tight_layout()
    plt.show()

# Plot predictions for a few test images
plot_predictions(model, test_dataloader, device, num_images=5)

Output hidden; open in https://colab.research.google.com to view.