In [9]:
import os
import numpy as np
import glob
import PIL.Image as Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import models
from torchsummary import summary
import torch.optim as optim
from time import time

import matplotlib.pyplot as plt
from IPython.display import clear_output
import xml.etree.ElementTree as ET
import matplotlib.patches as patches
import cv2

import os
import sys
sys.path.append('/zhome/65/e/156416/E24/IDLCV/Detection_deeplearning_in_computervision')
sys.path.append('/zhome/b6/d/154958/Potholes_boxing/Detection_deeplearning_in_computervision')
data_path = '/zhome/b6/d/154958/Potholes_boxing/Detection_deeplearning_in_computervision/Potholes/Potholes/annotated-images'

from Simple_cnn import train, Simple_CNN
from frednet import PotholeDataset, plot_image_with_bboxes

In [10]:
import torch.nn as nn

class Simple_CNN(nn.Module):
    def __init__(self, dropOutVal=0.2):
        super(Simple_CNN, self).__init__()
        self.network = nn.Sequential(
            # First Convolutional Block
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 128x128x3 -> 64x64x64

            # Second Convolutional Block
            nn.Dropout2d(dropOutVal),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 64x64x128 -> 32x32x128

            # Third Convolutional Block
            nn.Dropout2d(dropOutVal),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 32x32x128 -> 16x16x128

            # Fourth Convolutional Block
            nn.Dropout2d(dropOutVal),
            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 16x16x64 -> 8x8x64

            # Flatten and Fully Connected Layers
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 256),  # Adjusted to the correct dimension after pooling
            nn.ReLU(),
            nn.Linear(256, 2),  # Output layer for binary classification
        )

    def forward(self, x):
        return self.network(x)


In [11]:
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader,Subset
import os
import random
from sklearn.model_selection import train_test_split


class EdgeBoxDataset(Dataset):
    def __init__(self, image_paths, label_paths=None, transform=None, edgebox_params=None, model_path=None):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.image_paths = image_paths
        self.label_paths = label_paths
        self.transform = transform
        self.edgebox_params = edgebox_params if edgebox_params else {'maxBoxes': 100}
        self.edge_box_detector = cv2.ximgproc.createEdgeBoxes(**self.edgebox_params)
        
        # Initialize structured edge detector for orientation map if model path is provided
        if model_path:
            self.edge_detector = cv2.ximgproc.createStructuredEdgeDetection(model_path)
        else:
            self.edge_detector = None
            print("Warning: No model path provided for edge orientation. EdgeBoxes proposals may fail.")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load image
        image_path = self.image_paths[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Generate Edge Boxes proposals
        edges = cv2.Canny(image, 50, 150).astype(np.float32)
        orientation_map = self.edge_detector.computeOrientation(edges)
        proposals, scores = self.edge_box_detector.getBoundingBoxes(edges, orientation_map)
        
        # Convert proposals to tensor
        proposals_tensor = torch.tensor(proposals, dtype=torch.float32).to(self.device)
        scores_tensor = torch.tensor(scores, dtype=torch.float32).to(self.device)

        # Load corresponding label
        if self.label_paths:
            label_path = self.label_paths[idx]
            labels = self._load_label(label_path)
            target = torch.tensor([label['name'] for label in labels], dtype=torch.long)
        else:
            target = torch.tensor([])  # Handle case where no labels are available

        if self.transform:
            image = self.transform(image)

        return image, proposals_tensor, target  # Include target in return

    
    def _load_label(self, label_path):
        tree = ET.parse(label_path)
        root = tree.getroot()
        
        boxes = []
        for obj in root.findall('object'):
            name = obj.find('name').text
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            
            # Append the box as a dictionary
            boxes.append({
                "name": name,
                "xmin": xmin,
                "ymin": ymin,
                "xmax": xmax,
                "ymax": ymax
            })
        
        return boxes

image_paths = [data_path+"/"+file for file in os.listdir(data_path) if file.endswith(".jpg")]
label_paths = [data_path+"/"+file for file in os.listdir(data_path) if file.endswith(".xml")]


random.seed(42)  # Ensure reproducibility
indices = list(range(len(image_paths)))
random.shuffle(indices)

model_path ="/zhome/b6/d/154958/Potholes_boxing/Detection_deeplearning_in_computervision/model.yml.gz"
#70/15/15 SPLIT
train_idx, test_idx = train_test_split(indices, test_size=0.3, random_state=42)
val_idx, test_idx = train_test_split(test_idx, test_size=0.5, random_state=42)

train_dataset = Subset(EdgeBoxDataset(image_paths, label_paths, model_path=model_path), train_idx)
val_dataset = Subset(EdgeBoxDataset(image_paths, label_paths,model_path=model_path), val_idx)
test_dataset = Subset(EdgeBoxDataset(image_paths, label_paths,model_path=model_path), test_idx)


batch_size = 20 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

import torch.nn.functional as F
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    correct_predictions = 0
    total_samples = 0
    cumulative_loss = 0.0
    
    for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        
        # Accumulate loss for averaging
        cumulative_loss += loss.item()
        
        # Calculate number of correct predictions
        predicted = output.argmax(dim=1)
        correct_predictions += (predicted == target).sum().item()
        total_samples += target.size(0)
    
    # Calculate average loss and accuracy
    accuracy = correct_predictions / total_samples
    avg_loss = cumulative_loss / len(train_loader)

    return accuracy, avg_loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from tqdm import tqdm
model = Simple_CNN()
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_acc = []
train_loss = []
num_epochs = 20

best_acc = 0.0
best_model = None

for epoch in range(1, num_epochs + 1):
    train_accuracy, train_avg_loss = train(model, device, train_loader, optimizer, epoch)
    train_acc.append(train_accuracy)
    train_loss.append(train_avg_loss)
    
    print(f"Epoch {epoch} Train Accuracy: {100 * train_accuracy:.1f}%, Train Loss: {train_avg_loss:.4f}")
    
    # Evaluate on the validation set
    val_correct = 0
    total_val_samples = 0
    for data, target in val_loader:
        data, target = data.to(device), target.to(device)
        with torch.no_grad():
            output = model(data)
        predicted = output.argmax(1)
        val_correct += (target == predicted).sum().item()
        total_val_samples += target.size(0)

    val_acc = val_correct / total_val_samples
    print(f"Epoch {epoch} Validation Accuracy: {100 * val_acc:.1f}%")

    # Update best model if validation accuracy improves
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model


0it [00:17, ?it/s]


ValueError: too many dimensions 'str'