Identification

In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import draw_bounding_boxes
from torchvision.models import resnet34, ResNet34_Weights
from torchvision.ops import RoIPool
from torch.optim.lr_scheduler import StepLR
from PIL import Image
import pandas as pd
from torch.utils.data import random_split
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

In [2]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Set PYTORCH_CUDA_ALLOC_CONF environment variable
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


In [3]:
class FetusDetector(nn.Module):
    def __init__(self, num_classes):
        super(FetusDetector, self).__init__()
        resnet = resnet34(weights=ResNet34_Weights.DEFAULT)
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        self.roi_pool = RoIPool(output_size=(1, 1), spatial_scale=1.0)
        self.classifier = nn.Sequential(nn.BatchNorm1d(516).double(), nn.Linear(516, num_classes).double())
        self.bb = nn.Sequential(nn.BatchNorm1d(516).double(), nn.Linear(516, 4).double())

    def forward(self, image, box):
        x = self.features1(image)
        x = self.features2(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1, 1))(x)
        x = x.view(x.shape[0], -1)
        box = box.view(box.size(0), -1)
        x = torch.cat((x, box), dim=1)
        cls_logits = self.classifier(x.double())
        bbox_logits = self.bb(x.double())
        return cls_logits, bbox_logits


In [4]:
class FetusDataset(Dataset):
    def __init__(self, data_dir, labels_file, transform=None):
        self.data_dir = data_dir
        self.labels_df = pd.read_excel(labels_file)
        self.transform = transform
        self.label_map = {
            'thalami': 0,
            'nasal bone': 1,
            'palate': 2,
            'nasal skin': 3,
            'nasal tip': 4,
            'midbrain': 5,
            'NT': 6,
            'IT': 7,
            'CM': 8
        }

        self.transform_PIL = transforms.Compose([
            transforms.ToPILImage()
        ])

        self.transform_tensor = transforms.Compose([
            transforms.PILToTensor()
        ])

        self.labels_df = self.labels_df[self.labels_df.apply(lambda x: os.path.exists(os.path.join(self.data_dir, x["fname"])), axis=1)]


    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_dir, self.labels_df.iloc[idx, 0])

        # Check if the image file exists
        if not os.path.exists(img_name):
            return None

        # Load the original image
        image = Image.open(img_name).convert('RGB')
        image_T = self.transform_tensor(image)

        h_min, w_min, h_max, w_max = self.labels_df.iloc[idx, 2:].values.astype(float)
        box = torch.tensor([w_min,h_min,w_max,h_max]).unsqueeze(0)

        label_id = self.label_map.get(self.labels_df.iloc[idx, 1])


        # Rescale the bounding box coordinates
        _, original_image_height, original_image_width = image_T.shape

        # image_box_unscaled = draw_bounding_boxes(image_T, box, labels=[self.labels_df.iloc[idx, 1]],font=rf"Ariel\arial.ttf",font_size=30 ,width=3, colors="red")

        if self.transform:
            image = self.transform(image)

         # Rescale the bounding box coordinates
        _, new_image_height, new_image_width = image.shape
        scaled_box = box.clone()
        scaled_box[0, 0] *= (new_image_width / original_image_width)  # Scale x_min
        scaled_box[0, 1] *= (new_image_height / original_image_height)  # Scale y_min
        scaled_box[0, 2] *= (new_image_width / original_image_width)  # Scale x_max
        scaled_box[0, 3] *= (new_image_height / original_image_height)  # Scale y_max

        # image_s = image.type(torch.uint8)
        # image_box_scaled = draw_bounding_boxes(image_s, scaled_box, labels=[self.labels_df.iloc[idx, 1]],font=rf"\Ariel\arial.ttf",font_size=30 ,width=3, colors="red")

        # img1 = transforms.ToPILImage()(image_box_unscaled)
        # img2 = transforms.ToPILImage()(image_box_scaled)
        # img1.show()
        # img2.show()


        return image, scaled_box, label_id


In [5]:
# Define paths to your data and labels file
data_dir = 'Dataset for Fetus Framework\Training\Standard'
labels_file = 'ObjectDetection.xlsx'

# Define transform for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((470, 650)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the dataset and split into training and validation sets
dataset = FetusDataset(data_dir, labels_file, transform=transform)

# Check if any samples are left in the dataset
if not dataset:
    raise ValueError("No valid samples found in the dataset")


In [6]:
train_dataset, ground_dataset = random_split(dataset, [0.8, 0.2])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)#, num_workers=12, persistent_workers=True)

val_loader = DataLoader(ground_dataset, batch_size=32, shuffle=True)#, num_workers=12, persistent_workers=True)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FetusDetector(num_classes=9).to(device)

model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

  return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)


In [8]:
# Define smooth L1 loss for bounding box regression
def smooth_l1_loss(prediction, target, beta=1.0):
    diff = torch.abs(prediction - target)
    smooth_l1_loss = torch.where(diff < beta, 0.5 * diff ** 2 / beta, diff - 0.5 * beta)
    return smooth_l1_loss.mean()

In [9]:
num_epochs = 40
best_val_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0

    for image, box, label in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        optimizer.zero_grad()

        # Forward pass
        cls_logits, bbox_logits = model(image, box)

        # Calculate classification and bounding box regression losses
        loss_cls = F.cross_entropy(cls_logits, label)
        loss_bbox = smooth_l1_loss(bbox_logits, box)

        # Total loss
        loss = loss_cls + loss_bbox

        # Backward pass
        loss.backward()

        # Update parameters
        optimizer.step()

        # Compute accuracy
        _, predicted = torch.max(cls_logits, 1)
        correct = (predicted == label).sum().item()
        accuracy = correct / label.size(0)

        # Update running loss
        running_loss += loss.item()
        running_accuracy += accuracy

    # Calculate average loss per epoch
    avg_loss = running_loss / len(train_loader)
    avg_acc = running_accuracy / len(train_loader)

    print(f"Epoch {epoch+1}, Avg. Loss: {avg_loss}, Avg. accuracy: {avg_acc}")

    scheduler.step()


Epoch 1/40:   0%|          | 0/221 [00:00<?, ?it/s]

: 

In [None]:
torch.save(model.state_dict(), 'lmao4.pt')

NameError: name 'model' is not defined

In [None]:
# Validation loop with visualization
model.eval()
with torch.no_grad():
    for images, labels, boxes in tqdm(val_loader, desc='Validation'):
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        # Convert images to numpy arrays
        np_images = images.numpy()
        np_images = np.transpose(np_images, (0, 2, 3, 1))

        # Iterate over each image in the batch
        for i in range(len(images)):
            # Convert tensor to numpy array
            img_np = np_images[i]
            img_np = 0.5 * (img_np + 1)  # Unnormalize image

            # Display the image
            plt.imshow(img_np)
            plt.axis('off')

            # Get bounding box coordinates
            box = boxes[i].numpy()
            x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]

            # Add bounding box to the image
            plt.plot([x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min], color='red', linewidth=2)

            # Display class label
            plt.text(x_min, y_min, f'Class: {labels[i]}', color='red', fontsize=12, verticalalignment='top')

            # Show the image with bounding box
            plt.show()


Classification