In [None]:
import yaml
import torch
from torch.utils.data import Dataset
from tqdm.notebook import tqdm
import numpy as np

config = {
    "batchsize_train": 30,
    "batchsize_val": 30,
    "shuffle_train": True,
    "shuffle_val": False,
    "num_workers_train_loader": 4,
    "num_workers_val_loader": 4,
    "epochs": 4,
    "lr": 0.0001,
}

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from scipy.optimize import linear_sum_assignment
import h5py
from tqdm import tqdm

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch
import torch.nn as nn
import torch.nn.functional as F


class GateDetectionNet(nn.Module):
    def __init__(self, num_gates=6, num_outputs_per_gate=13, conf_threshold=0.5):
        super(GateDetectionNet, self).__init__()
        # Define the convolutional layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        # Max pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layers
        # Calculate the flattened size after convolution and pooling layers.
        # Assuming input size of (3, 480, 640)
        flattened_size = 64 * (480 // 8) * (640 // 8)

        self.fc1 = nn.Linear(flattened_size, 1024)
        self.fc11 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)

        # Predict gate attributes (x, y, visibility) for each gate
        self.gate_pred_layer = nn.Linear(128, num_gates * num_outputs_per_gate)

        self.num_gates = num_gates
        self.num_outputs_per_gate = num_outputs_per_gate

    def forward(self, x):
        # Apply convolutional layers with ReLU activations and max pooling
        x = self.pool(F.relu(self.conv1(x)))  # Apply max pooling after relu
        x = self.pool(F.relu(self.conv2(x)))  # Apply max pooling again after relu
        x = self.pool(F.relu(self.conv3(x)))  # Apply max pooling again after relu

        # Flatten the feature maps to pass into the fully connected layers
        x = torch.flatten(x, 1)  # Flatten all dimensions except batch

        # Fully connected layers with ReLU
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc11(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        # Predict gate attributes
        gate_pred = self.gate_pred_layer(x)

        # Reshape predictions to (batch_size, num_gates, num_outputs_per_gate)
        gate_pred = gate_pred.view(x.size(0), self.num_gates, self.num_outputs_per_gate)

        # Apply constraints to the x, y, and visibility values
        gate_pred[..., 0::3] = torch.sigmoid(
            gate_pred[..., 0::3]
        )  # Constrain all x values to [0, 1]
        gate_pred[..., 1::3] = torch.sigmoid(
            gate_pred[..., 1::3]
        )  # Constrain all y values to [0, 1]
        gate_pred[..., 2::3] = (
            torch.sigmoid(gate_pred[..., 2::3]) * 2
        )  # Constrain visibility to [0, 2]

        return gate_pred

In [None]:
import h5py
import hdf5plugin

In [None]:
import torch

model = GateDetectionNet()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# force cpu as dvice
# device = torch.device("cpu")
model = model.to(device)
print(f"Device: {device}")

In [None]:
import h5py
from torch.utils.data import Dataset
import torch


class GateDetectionDataset(Dataset):
    def __init__(self, h5_file_path):
        """
        Initialize the dataset by loading the HDF5 file.

        Args:
            h5_file_path (str): Path to the HDF5 file containing images and targets.
        """
        # Open the HDF5 file
        self.h5f = h5py.File(h5_file_path, "r")

        # Load images and targets into the dataset
        self.images = self.h5f["images"]
        self.targets = [
            self.h5f[f"targets/{i:05d}"][()] for i in range(len(self.images))
        ]

    def __len__(self):
        """Returns the number of samples in the dataset."""
        return len(self.images)

    def __getitem__(self, idx):
        """
        Retrieve a sample from the dataset.

        Args:
            idx (int): Index of the sample to retrieve.

        Returns:
            image (torch.Tensor): The image as a torch tensor.
            target (torch.Tensor): The corresponding target for the image.
        """
        # Load image and target
        image = self.images[idx]  # Image as numpy array
        target = self.targets[idx]  # Target as numpy array

        # Convert to torch tensors
        image = torch.tensor(image, dtype=torch.float32)  # Convert to float tensor
        target = torch.tensor(target, dtype=torch.float32)  # Convert to float tensor

        ones = torch.ones(
            (target.shape[0], 1), dtype=torch.float32
        )  # Create a tensor of ones
        target = torch.cat(
            [target, ones], dim=1
        )  # Append the ones to make (nr_of_gates, 13)

        return image, target

    def __del__(self):
        """Close the HDF5 file when the object is deleted."""
        self.h5f.close()

In [None]:
from torch.utils.data import DataLoader, random_split, Dataset

file_paths = "/kaggle/input/mission-impassable/autonomous_flight-01a-ellipse.h5"
#   "/kaggle/input/mission-impassable/piloted_flight-08p-lemniscate.h5",
#     "/kaggle/input/mission-impassable/autonomous_flight-15a-trackRATM.h5",


dataset = GateDetectionDataset(file_paths)

In [None]:
from torch.utils.data import DataLoader, random_split, Dataset


# Split the dataset into training and validation sets
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


# Create training set loader
train_loader = DataLoader(
    train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=config["num_workers_train_loader"],
)

# Create validation set loader
val_loader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=config["num_workers_val_loader"],
)
optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])

In [None]:
import torch
import torch.nn.functional as F


def matching_loss(
    pred,
    target,
    coord_loss_weight=1,
    missing_target_penalty=16,  # Penalty for missing a target gate
):
    """
    Custom loss function using soft matching (softmax) to match predicted gates with target gates,
    ensuring that each target gate is predicted at least once.

    Args:
        pred (torch.Tensor): Predicted tensor of shape (batch_size, 6, 13) with 6 predicted gates.
        target (torch.Tensor): Target tensor of shape (batch_size, n_targets, 13), where n_targets <= 6.
        coord_loss_weight (float): Weight for the coordinate loss.
        penalty (float): Penalty to apply when fewer predicted gates are matched than target gates.
        missing_target_penalty (float): Penalty for missing a target gate.

    Returns:
        loss (torch.Tensor): Calculated loss for the batch.
    """
    batch_size = pred.size(0)
    total_loss = torch.zeros(1, device=pred.device, requires_grad=True)
    for i in range(batch_size):
        pred_gates = pred[i].to(pred.device)  # Ensure it's on the correct device
        target_gates = target[i].to(pred.device)  # Ensure it's on the correct device

        n_targets = target_gates.size(0)

        # Extract x and y coordinates (x1, y1, x2, y2, ...)
        pred_coords = pred_gates[..., [0, 1, 3, 4, 6, 7, 9, 10]]  # Shape: (6, 8)
        target_coords = target_gates[
            ..., [0, 1, 3, 4, 6, 7, 9, 10]
        ]  # Shape: (n_targets, 8)

        pred_vis = pred_gates[..., [2, 5, 8, 11]]
        target_vis = target_gates[..., [2, 5, 8, 11]]

        #         print("shape pred", pred_coords.shape, "shape target", target_coords.shape)

        coord_dist = torch.sum(
            (pred_coords.unsqueeze(1) - target_coords.unsqueeze(0)) ** 2, dim=-1
        )  # Shape: (6, n_targets)
        #         print(f"coord dist",coord_dist)

        # Shape: (num_predicted_gates, num_target_gates)
        vis_dist = torch.sum(
            torch.abs(pred_vis.unsqueeze(1) - target_vis.unsqueeze(0)), dim=-1
        )  # Shape: (6, n_targets)

        # Total cost matrix based on coordinate distances
        cost_matrix = coord_loss_weight * coord_dist + vis_dist / 20
        #         print(f"cost matrix",cost_matrix)

        best_matches = torch.argmin(
            cost_matrix, dim=1
        )  # Shape: (6,), contains indices of closest target for each prediction

        #         print(f"best matches",best_matches)

        # Now compute the matching loss using the argmin assignments
        assigned_costs = cost_matrix[
            torch.arange(cost_matrix.size(0)), best_matches
        ]  # Shape: (6,), get the corresponding costs

        #         print(f"assigned costs", assigned_costs)

        # Sum the assigned costs
        matching_loss = torch.sum(assigned_costs)

        temperature = 0.1
        # Softmax over the target gates for each predicted gate
        soft_assignment = F.softmax(-cost_matrix, dim=1)

        # Calculate the matching loss (soft assignment weighted by the cost)
        matching_loss = torch.sum(soft_assignment * cost_matrix)

        # Enforce that the maximum assignment value for each target must be >= 0.7
        max_assignment_per_target = torch.max(soft_assignment, dim=0)[
            0
        ]  # Get maximum assignment for each target gate

        # Penalize if any maximum assignment is less than the threshold (0.7)
        missing_target_penalty_term = (
            torch.sum((max_assignment_per_target < 0.8).float().to(pred.device))
            * missing_target_penalty
        )

        # Combine losses
        total_loss = total_loss + matching_loss + missing_target_penalty_term

    # Average loss over the batch
    total_loss = total_loss / batch_size
    return total_loss

In [None]:
def evaluate(val_loader, model, device):
    model.eval()
    val_loss = 0.0
    val_error_tensor = []
    val_pred = []

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            # Get model predictions and confidence scores
            outputs = model(inputs)

            # Post-process predictions before calculating the loss

            # Calculate loss using the loss function
            loss = matching_loss(outputs, targets)

            val_loss += loss.item() * inputs.size(0)

            # Append loss to error tensor for tracking performance
            val_error_tensor.append(loss.item())
            val_pred.append(outputs)

    # Calculate average loss for the epoch
    val_loss /= len(val_loader.dataset)

    # Convert error tensor to a torch tensor
    val_error_tensor = torch.tensor(val_error_tensor, device=device)
    val_mean = val_error_tensor.mean().item()
    val_median = val_error_tensor.median().item()

    val_perf = (val_mean, val_median, val_error_tensor)

    return val_loss, val_perf, val_pred

In [None]:
import time
import torch
from tqdm import tqdm


def train_epoch(train_loader, model, optimizer, device):
    model.train()
    train_loss = 0.0
    train_error_tensor = []

    total_batches = len(train_loader)
    print_interval = max(total_batches // 20, 1)  # Print every 5% of total batches

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        # Move data to the GPU
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = matching_loss(outputs, targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track loss
        train_loss += loss.item() * inputs.size(0)
        train_error_tensor.append(loss.item())

        # Print progress every 5% of the training loop
        if (batch_idx + 1) % print_interval == 0:
            print(
                f"Progress: {(batch_idx + 1) / total_batches * 100:.2f}% - Loss: {loss.item():.4f}"
            )

    # Calculate average loss for the epoch
    train_loss /= len(train_loader.dataset)
    train_error_tensor = torch.tensor(train_error_tensor, device=device)
    train_mean = train_error_tensor.mean().item()
    train_median = train_error_tensor.median().item()
    train_perf = (train_mean, train_median, train_error_tensor)

    return train_loss, train_perf

In [None]:
# Example usage with the provided epoch loop
# Assuming you have defined the necessary elements: train_loader, val_loader, model, optimizer, device, and config
exp_name = "NEWMODEL"
epochs = config["epochs"]
print(f"learning rate: {config['lr']}")

for epoch in tqdm(range(epochs), desc="Epochs"):
    # Train the model for one epoch
    train_loss, train_perf = train_epoch(train_loader, model, optimizer, device)
    train_mean, train_median, train_error_tensor = train_perf

    # Evaluate the model on validation set
    val_loss, val_perf, val_pred = evaluate(val_loader, model, device)
    val_mean, val_median, val_error_tensor = val_perf

    print("val_loss_mean", val_mean)
    print("train_loss_mean", train_mean)

    # Clear GPU cache
    torch.cuda.empty_cache()

# Clear GPU cache
torch.cuda.empty_cache()