In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torch.optim as optim
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import random_split
from collections import Counter
import seaborn as sns
from sklearn.metrics import confusion_matrix
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import box_convert

In [None]:
SEED = 265
torch.manual_seed(SEED)
torch.set_default_dtype(torch.double)
device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))

In [None]:
device

## 2 Object Localization
#### First we load and inspect the localization_*** datasets

In [None]:
train_data = torch.load('data/localization_train.pt')
val_data = torch.load('data/localization_val.pt')
test_data = torch.load('data/localization_test.pt')

In [None]:
print(f'Train data size: {len(train_data)}')
print(f'Val data size: {len(val_data)}')
print(f'Test data size: {len(test_data)}')

In [None]:
first_img, first_label = train_data[0]

print(f'Shape of first image: {first_img.shape}')
print(f'Type of first image: {type(first_img)}')

print(f'\nShape of first label: {first_label.shape}')
print(f'Type of first label: {type(first_label)})')
first_label

In [None]:
def count_instances(data, data_name=None) -> None:
    """Counts the number of instances of each class in a dataset"""
    counter = Counter([int(label[-1]) for _, label in data])
    sorted_counter = dict(sorted(counter.items()))
    if data_name is not None:
        print(f'Class distribution in {data_name}')
    for key, value in sorted_counter.items():
        print(f'{key}: {value}')

count_instances(train_data, 'Training Data')
count_instances(val_data, 'Validation Data')
count_instances(test_data, 'Test Data')

In [None]:
train_data[0][0].shape
#høyde bredde

In [None]:
first_label[1:5]

In [None]:
first_img.shape

#### Plotting one image from each class

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(8,3))

for i, ax in enumerate(axes.flat): 
    img, bbox = next((img, label[1:5]) for img, label in train_data if int(label[-1]) == i)
    img_height, img_width = train_data[0][0].shape[-2], train_data[0][0].shape[-1]

    img = (img * 255).byte()

    bbox[0] *= img_width
    bbox[1] *= img_height
    bbox[2] *= img_width
    bbox[3] *= img_height

    bbox = bbox.type(torch.uint8)

    converted_bbox = box_convert(bbox, in_fmt='cxcywh', out_fmt='xyxy')

    img_with_bbox = draw_bounding_boxes(img, converted_bbox.unsqueeze(0), colors='red')
    img_with_bbox  = img_with_bbox.numpy().transpose((1, 2, 0))
    ax.imshow(img_with_bbox, cmap='gray')
    ax.set_title(i)
    ax.axis('off')

In [None]:
def plot_class(data:torch.tensor, class_label:int, start_idx:int=0) -> None:
    """Plots a subplot with 10 images from a given class, starting at a chosen index"""
    class_images = [img for img, label in data if int(label[-1]) == class_label]
    bboxes = [label[1:5] for img, label in data if int(label[-1]) == class_label]
    _, axes = plt.subplots(nrows=2, ncols=5, figsize=(8,3))

    for i, ax in enumerate(axes.flat):

        idx = start_idx + i
        img = class_images[idx]
        bbox = bboxes[idx]

        img_height, img_width = train_data[0][0].shape[-2], train_data[0][0].shape[-1]

        img = (img * 255).byte()

        bbox[0] *= img_width
        bbox[1] *= img_height
        bbox[2] *= img_width
        bbox[3] *= img_height

        bbox = bbox.type(torch.uint8)

        converted_bbox = box_convert(bbox, in_fmt='cxcywh', out_fmt='xyxy')

        img_with_bbox = draw_bounding_boxes(img, converted_bbox.unsqueeze(0), colors='red')
        img_with_bbox  = img_with_bbox.numpy().transpose((1, 2, 0))
        ax.imshow(img_with_bbox, cmap='gray')
        plt.suptitle(f'CLASS {class_label} - Image {start_idx} to {idx}')
        ax.axis('off')

    plt.show()

plot_class(train_data, 3, 10)

#### Defining a normalizer and a preprocessor TBD

In [None]:
imgs = torch.stack([img for img, _ in train_data])

# Define normalizer
normalizer_pipe = transforms.Normalize(
    imgs.mean(dim=(0, 2, 3)), 
    imgs.std(dim=(0, 2, 3))
    )

# Definer preprocessor including the normalizer
preprocessor = transforms.Compose([
            transforms.ToTensor(),
            normalizer_pipe
        ])

In [None]:
train_data = torch.load('data/localization_train.pt')
val_data = torch.load('data/localization_val.pt')
test_data = torch.load('data/localization_test.pt')

#### Defining the loss function

In [None]:
class LossFn(nn.Module):
    """Custom loss function"""
    def __init__(self):
        super().__init__()
        self.L_a = nn.BCEWithLogitsLoss()  # detection loss
        self.L_b = nn.MSELoss()  # localization loss
        self.L_c = nn.CrossEntropyLoss()  # classification loss

    def forward(self, y_pred, y_true):
        #print(f'Ypred{y_pred}\nYtrue{y_true}\nYpred[0]{y_pred[0]}')
        L_a = self.L_a(y_pred[0][0], y_true[0][0])
        
        if y_pred[0][0] <= 0.5:
            return L_a

        L_b = self.L_b(y_pred[0][1:5], y_true[0][1:5])
        L_c = self.L_c(y_pred[0][5:], y_true[0][-1].long())

        return L_a + L_b + L_c

#### Defining models

In [None]:
class MyCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Define convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1, device=device, dtype=torch.double)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, device=device, dtype=torch.double)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, device=device, dtype=torch.double)
        # Define fully connected layers
        self.fc1 = nn.Linear(12*15*64, 15, device=device)  # Adjust input size based on your input image size
        self.flat = nn.Flatten()

    
    def forward(self, x):
        # Apply convolutional layers with activation functions
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2, stride=2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2, stride=2)
        out = F.relu(self.conv3(out))
        #out = F.max_pool2d(out, 2)
        # Flatten the output from convolutional layers
        out = self.flat(out)
        # Apply fully connected layers with activation functions
        out = self.fc1(out)

        return out

In [None]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False)

model = MyCNN()

optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = LossFn()

In [None]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    
    n_batch = len(train_loader)
    losses_train = []
    model.train()
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1):
        
        loss_train = 0.0
        for imgs, labels in train_loader:

            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device, dtype=torch.double)

            outputs = model(imgs)
            
            loss = loss_fn(outputs, labels)
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()

            loss_train += loss.item()
            
        losses_train.append(loss_train / n_batch)

        print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(datetime.now().time(), epoch, loss_train / n_batch))

            
    return losses_train

In [None]:
loss = train(
    n_epochs=10,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader
)