In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torch.optim.lr_scheduler import StepLR
#from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models import resnet34, ResNet34_Weights
from torchvision.ops.box_iou import box_iou
from torchvision.transforms import transforms, v2
from torchvision.io import read_image, ImageReadMode
from torchvision.utils import draw_bounding_boxes
from torchmetrics.detection import MeanAveragePrecision
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import torch.cuda as cuda


In [None]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Set PYTORCH_CUDA_ALLOC_CONF environment variable
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


In [None]:
class FetusDetector(nn.Module):
    def __init__(self):
        super(FetusDetector, self).__init__()
        resnet = resnet34(weights=ResNet34_Weights.DEFAULT)
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))

    def forward(self, image, box):
        x = self.features1(image)
        x = self.features2(x)
        x = F.leaky_relu(x)
        x = nn.AdaptiveAvgPool2d((1, 1))(x)
        x = x.view(x.shape[0], -1)
        # x = torch.cat((x, box), dim=1)
        bbox_logits = self.bb(x)
        return bbox_logits



In [None]:
# import os
# import torch
# from PIL import Image
# from torchvision.io import read_image

# data_dir = 'Dataset for Fetus Framework\Training\Standard'
# labels_file = 'ObjectDetection.xlsx'


# # Assuming labels_df contains information about the dataset
# # and self.data_dir is the directory where images are stored
# labels_df = pd.read_excel(labels_file)

# # Filter out rows with non-existing image files
# labels_df = labels_df[labels_df.apply(lambda x: os.path.exists(os.path.join(data_dir, x["fname"])), axis=1)]
# labels_df = labels_df[labels_df.apply(lambda x: x["structure"] == 'thalami', axis=1)]

# # Initialize variables to store minimum height and width
# min_height = float('inf')
# min_width = float('inf')

# # Loop through each image in the dataset
# for idx in range(len(labels_df)):
#     # Read the image tensor
#     image_tensor = read_image(path=os.path.join(data_dir, labels_df.iloc[idx, 0]), mode=ImageReadMode.RGB).float().cuda()

#     # Get the height and width of the image tensor
#     _, height, width = image_tensor.shape

#     # Update minimum height and width if necessary
#     min_height = min(min_height, height)
#     min_width = min(min_width, width)

# # Print the size of the smallest image tensor
# print("Smallest image tensor size:", min_height, "x", min_width)


In [None]:
class FetusDataset(Dataset):
    def __init__(self, data_dir, labels_file, transform=None):
        self.data_dir = data_dir
        self.labels_df = pd.read_excel(labels_file)
        self.transform = transform
        self.label_map = {
            'thalami': 0,
            'nasal bone': 1,
            'palate': 2,
            'nasal skin': 3,
            'nasal tip': 4,
            'midbrain': 5,
            'NT': 6,
            'IT': 7,
            'CM': 8
        }

        self.transform_PIL = transforms.Compose([
            transforms.ToPILImage()
        ])

        self.transform_tensor = transforms.Compose([
            transforms.PILToTensor()
        ])


        # Filter out rows with non-existing image files
        self.labels_df = self.labels_df[self.labels_df.apply(lambda x: os.path.exists(os.path.join(self.data_dir, x["fname"])), axis=1)]
        self.labels_df = self.labels_df[self.labels_df.apply(lambda x: x["structure"] == 'thalami', axis=1)]

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        image = read_image(path=os.path.join(self.data_dir, self.labels_df.iloc[idx, 0]), mode=ImageReadMode.RGB).float().cuda()
        # print(image_og)
        # print("I am imahe tensor in training before transformation",image)

        og_h, og_w = image.shape[1:]
        image = self.transform(image)
        # print(image)
        n_h, n_w = image.shape[1:]
        # print("I am imahe tensor in training after transformation",image)

        rows = self.labels_df[self.labels_df.iloc[:, 0] == self.labels_df.iloc[idx, 0]]


        for _, row in rows.iterrows():
            h_min, w_min, h_max, w_max = row[2:6].values.astype(float)
            #image_box_unscaled = draw_bounding_boxes(image_og, torch.tensor([[w_min, h_min, w_max, h_max]], dtype=torch.float32), labels=[self.labels_df.iloc[idx, 1]],font=rf"Ariel\arial.ttf",font_size=30 ,width=3, colors="red")
            w_min *= (n_w / og_w)
            h_min *= (n_h / og_h)
            w_max *= (n_w / og_w)
            h_max *= (n_h / og_h)
            boxes = torch.tensor([[w_min, h_min, w_max, h_max]], dtype=torch.float32)
            # label_id = self.label_map.get(row[1])


        # image_s = image.type(torch.uint8)
        # image_box_scaled = draw_bounding_boxes(image, bboxes, labels=[self.labels_df.iloc[idx, 1]],font=rf"\Ariel\arial.ttf",font_size=30 ,width=3, colors="red")

        # img1 = transforms.ToPILImage()(image_box_unscaled)
        # img2 = transforms.ToPILImage()(image_box_scaled)
        # img1.show()
        # img2.show()


        return image, boxes


In [None]:
# Define paths to your data and labels file
data_dir = 'Dataset for Fetus Framework\Training\Standard'
labels_file = 'ObjectDetection.xlsx'

# Define transform for data augmentation and normalization
transform = v2.Compose([
    v2.Resize(size=(470, 650))
])

# Load the dataset and split into training and validation sets
dataset = FetusDataset(data_dir, labels_file, transform=transform)

# Check if any samples are left in the dataset
if not dataset:
    raise ValueError("No valid samples found in the dataset")


In [None]:
train_dataset, ground_dataset = random_split(dataset, [0.8, 0.2])

train_loader = DataLoader(dataset,batch_size=16, shuffle=True)#, num_workers=12, persistent_workers=True)

val_loader = DataLoader(ground_dataset, batch_size=16, shuffle=True)#, num_workers=12, persistent_workers=True)

In [None]:
def validate(model, val_loader, device, map_metric):
    model.eval()
    map_metric.reset()

    with torch.no_grad():
        for image, boxes in val_loader:
            image = image.to(device)
            boxes = boxes.squeeze(1).to(device)

            bbox_logits = model(image, boxes)
            bbox_sizes = (bbox_logits[:, 2] - bbox_logits[:, 0]) * (bbox_logits[:, 3] - bbox_logits[:, 1])
            scores = 1 - (bbox_sizes - bbox_sizes.min()) / (bbox_sizes.max() - bbox_sizes.min())
            scores_tensor = torch.tensor(scores, dtype=torch.float32, device=device)
            scores_tensor = scores_tensor.squeeze()

            preds_list = []
            target_list = []

            label_tensor = torch.tensor([0], device=device)

            for pred_box, target_box, score in zip(bbox_logits, boxes, scores_tensor):
                pred_dict = {
                    "boxes": pred_box.unsqueeze(0),
                    "scores": torch.tensor([score], dtype=torch.float32, device=device),
                    "labels": label_tensor.repeat(len(pred_box.unsqueeze(0)))
                }
                preds_list.append(pred_dict)

                target_dict = {
                    "boxes": target_box.unsqueeze(0),
                    "labels": label_tensor.repeat(len(target_box.unsqueeze(0)))
                }
                target_list.append(target_dict)

            map_metric.update(preds=preds_list, target=target_list)

    map_value = map_metric.compute()
    map_metric.reset()

    return map_value


In [None]:
def smooth_l1_loss(prediction, target, beta=1.0):
    diff = torch.abs(prediction - target)
    smooth_l1_loss = torch.where(diff < beta, 0.5 * diff ** 2 / beta, diff - 0.5 * beta)
    return smooth_l1_loss.mean()

In [None]:
def train_model_mean(model, train_loader, optimizer, scheduler, device,num_epochs=10):
    num_epochs = 10
    map_metric = MeanAveragePrecision(box_format='xyxy', iou_type='bbox')

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False)

        for batch_idx, (image, boxes) in enumerate(pbar):
            optimizer.zero_grad()
            label_tensor = torch.tensor([0], device='cuda:0')
            image = image.cuda().float()
            boxes = boxes.squeeze(1).cuda().float()
            label_tensor = label_tensor.cuda().float()

            bbox_logits = model(image, boxes)
            bbox_sizes = (bbox_logits[:, 2] - bbox_logits[:, 0]) * (bbox_logits[:, 3] - bbox_logits[:, 1])
            scores = 1 - (bbox_sizes - bbox_sizes.min()) / (bbox_sizes.max() - bbox_sizes.min())
            scores_tensor = torch.tensor(scores, dtype=torch.float32, device=device)
            scores_tensor = scores_tensor.squeeze()

            preds_list = []
            target_list = []

            with torch.no_grad():
                for pred_box, target_box, score in zip(bbox_logits, boxes, scores_tensor):
                    pred_dict = {
                        "boxes": pred_box.unsqueeze(0),
                        "scores": torch.tensor([score], dtype=torch.float32, device=device),
                        "labels": label_tensor.repeat(len(pred_box.unsqueeze(0)))
                    }
                    preds_list.append(pred_dict)

                    target_dict = {
                        "boxes": target_box.unsqueeze(0),
                        "labels": label_tensor.repeat(len(target_box.unsqueeze(0)))
                    }
                    target_list.append(target_dict)

            map_metric.update(preds=preds_list, target=target_list)

            loss_bbox = smooth_l1_loss(bbox_logits, boxes)
            loss = loss_bbox

            loss.backward()
            running_loss += loss.item()

            optimizer.step()
            scheduler.step()

            loss = loss.detach().cpu().item()
            pbar.set_postfix(loss=loss)

        # Compute mAP after each epoch
        print(map_metric)
        map_value = map_metric.compute()
        map_metric.reset()

        avg_loss = running_loss / len(train_loader.dataset)

        print(f"Epoch {epoch+1}, Train Avg. Loss: {avg_loss}, Mean Average Precision: {map_value}")


In [None]:
def val_bbox_metrics(model, val_loader):
    model.eval()
    total = 0
    sum_iou = 0
    for image, boxes in val_loader:
        image = image.cuda().float()
        boxes = boxes.squeeze(1).cuda().float()
        pred_boxes = model(image, boxes)
        batch = boxes.shape[0]
        for pred_box, true_box in zip(pred_boxes, boxes):
            iou = box_iou(pred_box.unsqueeze(0), true_box.unsqueeze(0))
            sum_iou += iou.item()
        total += batch
    return sum_iou / total


In [None]:
# def val_metrics(model, val_loader, batch_size):
#     model.eval()
#     total = 0
#     correct = 0
#     for image, boxes in val_loader:
#         image = image.cuda()
#         boxes = boxes.squeeze(1).cuda()
#         out_bb = model(image, boxes)
#         pred_bb = out_bb.round()
#         pred_bb_cpu = pred_bb.cpu()
#         boxes_cpu = boxes.cpu()
#         correct += (pred_bb_cpu == boxes_cpu).all(dim=1).sum().item()
#         total += batch_size
#     return correct / total

In [None]:
# def val_metrics(model, val_loader, batch_size):
#     model.eval()
#     total_correct = 0
#     total_samples = 0

#     with torch.no_grad():
#         for images, boxes in val_loader:
#             images = images.cuda().float()
#             boxes = boxes.cuda().float()

#             out_bb = model(images, boxes)
#             pred_bb = torch.round(out_bb)
#             correct = torch.all(pred_bb == boxes, dim=1).sum().item()

#             total_correct += correct
#             total_samples += images.size(0)

#     accuracy = total_correct / total_samples if total_samples > 0 else 0
#     return accuracy


In [None]:
def train_model(model, train_loader, optimizer, scheduler, device,num_epochs=10):

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        cuda.empty_cache()
        torch.cuda.memory_reserved()

        pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False)

        for batch_idx, (image, boxes) in enumerate(pbar):
            optimizer.zero_grad()
            label_tensor = torch.tensor([0], device=device)
            image = image.cuda().float()
            boxes = boxes.squeeze(1).cuda().float()
            label_tensor = label_tensor.cuda()

            bbox_logits = model(image, boxes)

            loss_bbox = smooth_l1_loss(bbox_logits, boxes)
            loss = loss_bbox

            loss.backward()
            running_loss += loss.item()

            optimizer.step()
            scheduler.step()

            loss = loss.detach().cpu().item()
            pbar.set_postfix(loss=loss)

        cuda.empty_cache()
        torch.cuda.memory_reserved()

        accuracy = val_bbox_metrics(model, val_loader)

        torch.save(model.state_dict(), 'lmao6.pt')
        print(f"Epoch {epoch+1}, Validation Accuracy: {accuracy}")


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FetusDetector().to(device)

model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

train_model(model, train_loader, optimizer, scheduler, device, num_epochs=10)
#train_model_mean(model, train_loader, optimizer, scheduler, device, num_epochs=10)

  return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
                                                                     

Epoch 1, Validation Accuracy: 1059.0742268880208


                                                                     

In [None]:
# state_dict = torch.load('lmao6.pt')
# model.load_state_dict(state_dict)
# val_metrics(model, val_loader, 16)

RuntimeError: Error(s) in loading state_dict for FetusDetector:
	Unexpected key(s) in state_dict: "features1.4.0.conv3.weight", "features1.4.0.bn3.weight", "features1.4.0.bn3.bias", "features1.4.0.bn3.running_mean", "features1.4.0.bn3.running_var", "features1.4.0.bn3.num_batches_tracked", "features1.4.0.downsample.0.weight", "features1.4.0.downsample.1.weight", "features1.4.0.downsample.1.bias", "features1.4.0.downsample.1.running_mean", "features1.4.0.downsample.1.running_var", "features1.4.0.downsample.1.num_batches_tracked", "features1.4.1.conv3.weight", "features1.4.1.bn3.weight", "features1.4.1.bn3.bias", "features1.4.1.bn3.running_mean", "features1.4.1.bn3.running_var", "features1.4.1.bn3.num_batches_tracked", "features1.4.2.conv3.weight", "features1.4.2.bn3.weight", "features1.4.2.bn3.bias", "features1.4.2.bn3.running_mean", "features1.4.2.bn3.running_var", "features1.4.2.bn3.num_batches_tracked", "features1.5.0.conv3.weight", "features1.5.0.bn3.weight", "features1.5.0.bn3.bias", "features1.5.0.bn3.running_mean", "features1.5.0.bn3.running_var", "features1.5.0.bn3.num_batches_tracked", "features1.5.1.conv3.weight", "features1.5.1.bn3.weight", "features1.5.1.bn3.bias", "features1.5.1.bn3.running_mean", "features1.5.1.bn3.running_var", "features1.5.1.bn3.num_batches_tracked", "features1.5.2.conv3.weight", "features1.5.2.bn3.weight", "features1.5.2.bn3.bias", "features1.5.2.bn3.running_mean", "features1.5.2.bn3.running_var", "features1.5.2.bn3.num_batches_tracked", "features1.5.3.conv3.weight", "features1.5.3.bn3.weight", "features1.5.3.bn3.bias", "features1.5.3.bn3.running_mean", "features1.5.3.bn3.running_var", "features1.5.3.bn3.num_batches_tracked", "features2.0.0.conv3.weight", "features2.0.0.bn3.weight", "features2.0.0.bn3.bias", "features2.0.0.bn3.running_mean", "features2.0.0.bn3.running_var", "features2.0.0.bn3.num_batches_tracked", "features2.0.1.conv3.weight", "features2.0.1.bn3.weight", "features2.0.1.bn3.bias", "features2.0.1.bn3.running_mean", "features2.0.1.bn3.running_var", "features2.0.1.bn3.num_batches_tracked", "features2.0.2.conv3.weight", "features2.0.2.bn3.weight", "features2.0.2.bn3.bias", "features2.0.2.bn3.running_mean", "features2.0.2.bn3.running_var", "features2.0.2.bn3.num_batches_tracked", "features2.0.3.conv3.weight", "features2.0.3.bn3.weight", "features2.0.3.bn3.bias", "features2.0.3.bn3.running_mean", "features2.0.3.bn3.running_var", "features2.0.3.bn3.num_batches_tracked", "features2.0.4.conv3.weight", "features2.0.4.bn3.weight", "features2.0.4.bn3.bias", "features2.0.4.bn3.running_mean", "features2.0.4.bn3.running_var", "features2.0.4.bn3.num_batches_tracked", "features2.0.5.conv3.weight", "features2.0.5.bn3.weight", "features2.0.5.bn3.bias", "features2.0.5.bn3.running_mean", "features2.0.5.bn3.running_var", "features2.0.5.bn3.num_batches_tracked", "features2.1.0.conv3.weight", "features2.1.0.bn3.weight", "features2.1.0.bn3.bias", "features2.1.0.bn3.running_mean", "features2.1.0.bn3.running_var", "features2.1.0.bn3.num_batches_tracked", "features2.1.1.conv3.weight", "features2.1.1.bn3.weight", "features2.1.1.bn3.bias", "features2.1.1.bn3.running_mean", "features2.1.1.bn3.running_var", "features2.1.1.bn3.num_batches_tracked", "features2.1.2.conv3.weight", "features2.1.2.bn3.weight", "features2.1.2.bn3.bias", "features2.1.2.bn3.running_mean", "features2.1.2.bn3.running_var", "features2.1.2.bn3.num_batches_tracked". 
	size mismatch for features1.4.0.conv1.weight: copying a param with shape torch.Size([64, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for features1.4.1.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for features1.4.2.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for features1.5.0.conv1.weight: copying a param with shape torch.Size([128, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 64, 3, 3]).
	size mismatch for features1.5.0.downsample.0.weight: copying a param with shape torch.Size([512, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 64, 1, 1]).
	size mismatch for features1.5.0.downsample.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for features1.5.0.downsample.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for features1.5.0.downsample.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for features1.5.0.downsample.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for features1.5.1.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for features1.5.2.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for features1.5.3.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for features2.0.0.conv1.weight: copying a param with shape torch.Size([256, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 128, 3, 3]).
	size mismatch for features2.0.0.downsample.0.weight: copying a param with shape torch.Size([1024, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 128, 1, 1]).
	size mismatch for features2.0.0.downsample.1.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for features2.0.0.downsample.1.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for features2.0.0.downsample.1.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for features2.0.0.downsample.1.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for features2.0.1.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for features2.0.2.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for features2.0.3.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for features2.0.4.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for features2.0.5.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for features2.1.0.conv1.weight: copying a param with shape torch.Size([512, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 3, 3]).
	size mismatch for features2.1.0.downsample.0.weight: copying a param with shape torch.Size([2048, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 1, 1]).
	size mismatch for features2.1.0.downsample.1.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for features2.1.0.downsample.1.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for features2.1.0.downsample.1.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for features2.1.0.downsample.1.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for features2.1.1.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 512, 3, 3]).
	size mismatch for features2.1.2.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 512, 3, 3]).

In [None]:
# num_epochs = 10
# map_metric = MeanAveragePrecision(box_format='xyxy', iou_type='bbox')

# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     scores_list = []

#     pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False)

#     for batch_idx, (image, boxes) in enumerate(pbar):

#         optimizer.zero_grad()

#         label_tensor = torch.tensor([0], device='cuda:0')

#         image = image.to(device)
#         boxes = boxes.squeeze(1).to(device)
#         #boxes = boxes.to(device)
#         label_tensor = label_tensor.to(device)

#         # batch_index = torch.tensor([[batch_idx] * len(boxes)]).to(device)
#         # batch_index = batch_index.view(-1, 1)
#         # boxes_with_index = torch.cat((batch_index, boxes), dim=1).to(device)

#         # print(image)
#         # print(boxes_with_index)

#         # bbox_logits = model(image, boxes_with_index)
#         bbox_logits = model(image, boxes)
#         # print(bbox_logits.shape)
#         # print(boxes.shape)

#         bbox_sizes = (bbox_logits[:, 2] - bbox_logits[:, 0]) * (bbox_logits[:, 3] - bbox_logits[:, 1])

#         # print(bbox_logits)
#         # print(bbox_sizes)
#         # print(bbox_sizes.min())
#         # print(bbox_sizes.max())


#         scores = 1 - (bbox_sizes - bbox_sizes.min()) / (bbox_sizes.max() - bbox_sizes.min())
#         scores_tensor = torch.tensor(scores, dtype=torch.float32, device=device)
#         scores_tensor = scores_tensor.squeeze()
#         # print(scores_tensor.shape)
#         # print(scores_list)

#         # print("I am score tensor",scores_list)
#         preds_list = []
#         target_list = []

#         with torch.no_grad():

#             for pred_box, target_box, score in zip(bbox_logits, boxes, scores_tensor):
#                 # print(pred_box.unsqueeze(0).shape)
#                 # print(score.shape)

#                 pred_dict = {
#                     "boxes": pred_box.unsqueeze(0),
#                     "scores": torch.tensor([score], dtype=torch.float32, device=device),
#                     "labels": label_tensor.repeat(len(pred_box.unsqueeze(0)))
#                 }
#                 preds_list.append(pred_dict)
#                 # print(label_tensor.shape)

#                 # print(target_box.unsqueeze(0).shape)

#                 target_dict = {
#                     "boxes": target_box.unsqueeze(0),
#                     "labels": label_tensor.repeat(len(target_box.unsqueeze(0)))
#                 }
#                 target_list.append(target_dict)
#                 # print(label_tensor.shape)

#         # print("I am pred", preds_list)
#         # print("I am target", target_dict)
#         map_metric.update(preds=preds_list, target=target_list)

#         loss_bbox = smooth_l1_loss(bbox_logits, boxes)
#         loss = loss_bbox

#         loss.backward()
#         running_loss += loss.item()

#         optimizer.step()
#         scheduler.step()

#         loss = loss.detach().cpu().item()
#         pbar.set_postfix(loss=loss)

#     map_value = map_metric.compute()
#     map_metric.reset()

#     avg_loss = running_loss / len(train_loader.dataset)

#     print(f"Epoch {epoch+1}, Train Avg. Loss: {avg_loss}, Mean Average Precision: {map_value}")


Epoch 1/10:   0%|          | 0/66 [00:00<?, ?it/s]

  scores_tensor = torch.tensor(scores, dtype=torch.float32, device=device)
                                                                     

Epoch 1, Train Avg. Loss: 16.606184547061012, Mean Average Precision: {'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(0.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(0.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.), 'classes': tensor(0, dtype=torch.int32)}


                                                                     

Epoch 2, Train Avg. Loss: 16.60822525751023, Mean Average Precision: {'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(0.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(0.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.), 'classes': tensor(0, dtype=torch.int32)}


                                                                     

Epoch 3, Train Avg. Loss: 16.601668584914435, Mean Average Precision: {'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(0.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(0.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.), 'classes': tensor(0, dtype=torch.int32)}


Epoch 4/10:  33%|███▎      | 22/66 [06:02<12:30, 17.05s/it, loss=266]

In [None]:

# # Validation
# model.eval()  # Set model to evaluation mode
# val_loss = 0.0
# map_metric.reset()  # Reset Mean Average Precision metric for validation

# with torch.no_grad():
#     for image_dict in val_loader:
#         images = []
#         boxes = []
#         labels = []

#         for label_id, data in image_dict.items():
#             for image, box in zip(data['images'], data['bboxes']):
#                 images.append(image)
#                 boxes.append(box)
#                 labels.append(label_id)

#         images_batch = torch.stack(images).to(device)  # Stack images into a batch
#         boxes_batch = torch.cat(boxes).to(device)     # Concatenate bounding boxes into a batch
#         labels_batch = torch.tensor(labels).to(device)  # Convert labels to tensor

#         # Forward pass
#         cls_logits, bbox_logits = model(images_batch, boxes_batch)

#         # Calculate classification and bounding box regression losses
#         loss_cls = F.cross_entropy(cls_logits, labels_batch)
#         loss_bbox = smooth_l1_loss(bbox_logits, boxes_batch)

#         # Update validation loss
#         val_loss += (loss_cls + loss_bbox).item()

#         # Compute accuracy
#         _, predicted = torch.max(cls_logits, 1)
#         correct = (predicted == labels_batch).sum().item()
#         accuracy = correct / labels_batch.size(0)
#         val_accuracy += accuracy

#         # Update Mean Average Precision metric
#         preds_list = []
#         target_list = []

#         # Assume single class for simplicity
#         class_label = 0

#         for pred_box, label in zip(bbox_logits, labels):
#             pred_dict = {
#                 "boxes": pred_box.unsqueeze(0),  # unsqueeze to add batch dimension
#                 "scores": torch.ones(1).to(device),  # placeholder scores
#                 "labels": torch.tensor([class_label], dtype=torch.int64).to(device)
#             }
#             preds_list.append(pred_dict)

#             target_dict = {
#                 "boxes": label.unsqueeze(0),  # unsqueeze to add batch dimension
#                 "labels": torch.tensor([class_label], dtype=torch.int64).to(device)
#             }
#             target_list.append(target_dict)

#         # Update the Mean Average Precision metric
#         map_metric.update(preds=preds_list, target=target_list)

# # Calculate Mean Average Precision
# map_value = map_metric.compute()

# # Calculate average validation loss and accuracy per epoch
# avg_val_loss = val_loss / len(val_loader.dataset)

# print(f"Epoch {epoch+1}, Validation Avg. Loss: {avg_val_loss}, Mean Average Precision: {map_value}")
