In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import box_convert
from torch.utils.data import TensorDataset


from project_functions import *
from project_objects import *
from project_models import *
from project_constants import DEVICE as device
from project_constants import SEED

In [None]:
torch.manual_seed(SEED)
torch.set_default_dtype(torch.double)

## 2 Object Localization
#### First we load and inspect the localization datasets

In [None]:
loc_train = torch.load('data/localization_train.pt')
loc_val = torch.load('data/localization_val.pt')
loc_test = torch.load('data/localization_test.pt')

In [None]:
print(f'Train data size: {len(loc_train)}')
print(f'Val data size: {len(loc_val)}')
print(f'Test data size: {len(loc_test)}')

In [None]:
first_img, first_label = loc_train[0]

print(f'Shape of first image: {first_img.shape}')
print(f'Type of first image: {type(first_img)}')

print(f'\nShape of first label: {first_label.shape}')
print(f'Type of first label: {type(first_label)})')
first_label

In [None]:
# Assuming train_data, val_data, and test_data are defined elsewhere
count_instances(loc_train, 'Training Data')
count_instances(loc_val, 'Validation Data')
count_instances(loc_test, 'Test Data')


#### Plotting one image from each class

In [None]:
plot_localization_data(loc_train)

In [None]:
plot_localization_data(loc_train, class_label=3, start_idx=10)

#### Defining a normalizer and a preprocessor

In [None]:
imgs = torch.stack([img for img, _ in loc_train])

# Define normalizer
normalizer_pipe = transforms.Normalize(
    imgs.mean(dim=(0, 2, 3)), 
    imgs.std(dim=(0, 2, 3))
    )

# Define preprocessor including the normalizer
preprocessor = transforms.Compose([
            normalizer_pipe
        ])

In [None]:
loc_train_norm = [(preprocessor(img), label) for img, label in loc_train]
loc_val_norm = [(preprocessor(img), label) for img, label in loc_val]
loc_test_norm = [(preprocessor(img), label) for img, label in loc_test]

In [None]:
train_loader = torch.utils.data.DataLoader(loc_train_norm, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(loc_val_norm, batch_size=64, shuffle=False)

loss_fn = LocalizationLoss()

#### Defining models

In [None]:
model_name = 'test'

torch.manual_seed(SEED)
model = LocCNN1((48,60,1))
model.to(device=device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001)

loss_train, loss_val, train_perform, val_perform, losses_separated = train(
    n_epochs=2,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader,
    val_loader=val_loader,
    performance_calculator=localization_performance
)

plot_loss(loss_train, loss_val, model_name, save_model=True)
plot_lists(losses_separated, ['detection loss', 'localization loss', 'classification loss'], model_name, save_model=False)
y_true, y_pred = predict(model, val_loader)

#### Selecting the best model

In [None]:
best_model, best_performance = model_selector([model], [val_perform[2]])

# Print additional details of the best model
print("Best Model Details\n--------------------------------------------------------------")
print(f"Network architecture/ layout: {best_model}\n")
#print(f"Optimizer Parameters: {best_data.optimizer")
print(f"Validation Performance: {best_performance}")
#print(f"Validation Accuracy {round(best_data['model_man_val_accuracy'], 2)}")

#### Evaluating the best model on unseen data TBD

In [None]:
test_loader = torch.utils.data.DataLoader(loc_test_norm, batch_size=64, shuffle=False)

test_performance = localization_performance(best_model, test_loader)
print(10*'-'+'Test Performance' + 10*'-')
print(f"Test Accuracy: {test_performance[0]}\nTest IOU: {test_performance[1]}\nOverall Performance: {test_performance[2]}")

In [None]:
y_true, y_pred = predict(best_model, test_loader)

In [None]:
plot_predictions(loc_test, y_true, y_pred, label=3, start_idx=0)

# 3 Object Detection

#### Loading the data and inspecting the data

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import box_convert
from torch.utils.data import TensorDataset


from project_functions import *
from project_objects import *
from project_models import *
from project_constants import DEVICE as device
from project_constants import SEED

torch.set_default_dtype(torch.double)

In [None]:
train_labels = torch.load('data/list_y_true_train.pt')
val_labels = torch.load('data/list_y_true_val.pt')
test_labels = torch.load('data/list_y_true_test.pt')

In [None]:
print(f'Train label size: {len(train_labels)}')
print(f'Val label size: {len(val_labels)}')
print(f'Test label size: {len(test_labels)}')

In [None]:
train_imgs = torch.load('data/detection_train.pt')
val_imgs = torch.load('data/detection_val.pt')
test_imgs = torch.load('data/detection_test.pt')

In [None]:
train_labels_local = prepare_labels(train_labels, (2,3,6))
val_labels_local = prepare_labels(val_labels, (2,3,6))
test_labels_local = prepare_labels(test_labels, (2,3,6))

In [None]:
def local_to_global_list(input_tensor):

    returned_list = []

    h_size = input_tensor.shape[1]
    w_size = input_tensor.shape[2]

    for h in range(h_size):


        for w in range(w_size):

            input_tensor[:,h,w,1] /=3
            input_tensor[:,h,w,2] /=2

            input_tensor[:,h,w,1] += (w*1)/w_size
            input_tensor[:,h,w,2] += (h*1)/h_size

            input_tensor[:,h,w,3] *=3
            input_tensor[:,h,w,4] *=2

    new_tensor = input_tensor.view(-1, input_tensor.size(-1), input_tensor.size(-1))
    mask = new_tensor[:, :, 0] != 0
    filtered_tensors = [new_tensor[i][mask[i]] for i in range(new_tensor.size(0))]

    for each in filtered_tensors:
        inner = []
        for i in each:
            inner.append(i)
        returned_list.append(inner)

    return returned_list



In [None]:
det_train = merge_datasets(train_imgs, train_labels_local)
det_val = merge_datasets(val_imgs, val_labels_local)
det_test = merge_datasets(test_imgs, test_labels_local)

#### Plotting some images from the detection dataset

In [None]:
imgs = [img for img,_ in train_imgs]
plot_detection_data(imgs, train_labels, start_idx=10)

#### Normalizing the dataset TBD

In [None]:
train_loader = torch.utils.data.DataLoader(det_train, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(det_val, batch_size=64, shuffle=False)

loss_fn = DetectionLoss()

#### Defining models

In [None]:
def calculate_iou(outputs, labels):
    """
    Calculate IoU between ground truth and predicted boxes.
    """

    bbox_pred = outputs[:, 1:5]
    bbox_true = labels[:, 1:5]
    #print(bbox_pred.shape)

    converted_bbox_pred = box_convert(bbox_pred, in_fmt='cxcywh', out_fmt='xyxy')
    converted_bbox_true = box_convert(bbox_true, in_fmt='cxcywh', out_fmt='xyxy')
    #print(converted_bbox_pred.shape)

    bbox_iou = box_iou(converted_bbox_pred,converted_bbox_true)
    #print(bbox_iou.shape)
    
    iou = bbox_iou.diag()
    #print(iou.shape)
    
    return iou

In [None]:
def calculate_ap(outputs, labels):
    """

    """
    #test = outputs_reshaped.reshape(64, 2, 3, 6)
    treshold = 0.5
    outputs_reshaped = outputs.reshape(-1, outputs.size(-1))
    labels_reshaped = labels.reshape(-1, labels.size(-1))


    confidence = F.sigmoid(outputs_reshaped[:, 0])
    iou = calculate_iou(outputs_reshaped, labels_reshaped)
    tp = torch.where(iou >= treshold, 1, 0)
    fp = torch.where(iou < treshold, 1, 0)

    _, indices = torch.sort(confidence, dim = 0, descending=True) 
    ground_truths = (labels_reshaped[:,0] == 1).sum().item()

    tensor_length = len(indices)

    recall = torch.zeros(tensor_length)
    precision = torch.zeros(tensor_length)
    acc_tp = torch.zeros(tensor_length)
    acc_fp = torch.zeros(tensor_length)

    counter = 0

    for i in indices:
        if counter == 0:
            acc_tp[counter] = tp[i]
            acc_fp[counter] = fp[i]

        else:
            acc_tp[counter] = tp[i]+acc_tp[counter-1]
            acc_fp[counter] = fp[i]+acc_fp[counter-1]

        precision[counter] = acc_tp[counter]/(acc_tp[counter]+acc_fp[counter])

        recall[counter] = acc_tp[counter]/ground_truths

        counter += 1
        
    interpolated = 0
    

    r = torch.arange(0, 1.1, 0.1)
    # mask = recall <= 0.1
    # print(torch.argmax(precision[mask]))

    for each in r:
        mask = recall <= each
        print(torch.argmax(precision[mask]))

    return interpolated/11
        

In [None]:
detection_performance(model, val_loader)

In [None]:
def detection_performance(model, loader):
    '''
    Description
    '''
    model.eval()
    ap_sum = 0
    total = 0
    with torch.inference_mode():
        for imgs, labels in loader:
            imgs = imgs.to(device=device, dtype=torch.double)
            labels = labels.to(device=device, dtype=torch.double)

            outputs = model(imgs)

            ap_sum += calculate_ap(outputs.permute(0,2,3,1), labels.permute(0,2,3,1))
            total += 1

    return ap_sum/total


In [None]:
model_name = 'test'
torch.manual_seed(SEED)
model = DetCNN1()
model.to(device=device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001)

losses_train, losses_val, train_performance, val_performance, losses_separated= train(
    n_epochs=2,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader,
    val_loader=val_loader,
    performance_calculator=detection_performance
)

plot_loss(loss_train, loss_val, model_name, save_model=False)
plot_lists(losses_separated, ['detection loss', 'localization loss', 'classification loss'], model_name, save_model=False)

In [None]:
y_true, y_pred = predict(model, val_loader)
# imgs = [img for img,_ in val_imgs]
# y_pred_global = local_to_global(y_pred)
# plot_detection_data(imgs, val_labels, y_pred, start_idx=10)

In [None]:
y_true