In [1]:
from collections import OrderedDict
from torch.optim.lr_scheduler import StepLR
import torch
import torchvision
from torchvision.ops import nms
from torchvision.ops.boxes import box_convert,box_iou


from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import os
import cv2
import random
from dataset import ObjectDetectionDataset
from dataset_helper import get_train_data_loader
from conf import *

torch.manual_seed = 0

model_path = "../models/epoch-4_model.pth"

In [2]:

def get_object_detection_model(num_classes=NUMBER_OF_CLASSES):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    state_dict= torch.load(model_path)
    updated_state = {k.replace("module.", ""): v for k,v in state_dict.items()}
    model.load_state_dict(updated_state)
    return model


def save_test_img(img, target, prefix):
    # img = img.permute(2,0,1).cpu().numpy()  # Convert to (height, width, channels)
    img = img.cpu().numpy()  # Convert to (height, width, channels)

    # img = img.astype('uint8')
    # img = img
    # Draw bounding boxes on the image
    print(target)
    for box, label in zip(target['boxes'], target['labels']):
        x, y, w, h = box.tolist()
        x, y, w, h = int(x), int(y), int(w), int(h)
        box_color = BOX_COLOR[label.item()]
        cv2.rectangle(img, (x, y), (w, h), box_color, 2)

    # Save the image with bounding boxes
    if not os.path.exists(os.path.join(os.getcwd(), 'test_output')):
        os.makedirs(os.path.join(os.getcwd(), 'test_output'))
    # cv2.imshow(img)
    img_path = f"./test_output/output_image_{prefix}.png"
    cv2.imwrite(img_path, img)
    return img_path

def clean_targets(targets):
    cleaned_targets = {}

    
    # Filter out invalid boxes
    valid_boxes_mask = (targets['boxes'].sum(axis=1) > 2)    
    valid_labels_mask = (targets['labels'] != CLASSES_TO_IDX["background"])
    valid_area_mask = (targets['area'] >= 1)

    # Combine all conditions using "&"
    final_mask = valid_boxes_mask & valid_labels_mask & valid_area_mask
    # Apply the final mask
    for key, target_tensor in targets.items():
        if key == "idx":
            cleaned_targets[key] = target_tensor
            continue
        cleaned_targets[key] = target_tensor[final_mask]
    return cleaned_targets


def inference_filter_prediction(output, iou_threshold=0.25, confidence_threshold=0.50):

    cleaned_output = []
    for predicted_dict in output:
        mask = predicted_dict["scores"] >= confidence_threshold
        predicted_dict = {k: v[mask] for k,v in predicted_dict.items()}
        predicted_boxes, scores = predicted_dict["boxes"], predicted_dict["scores"]
        nms_indices = nms(predicted_boxes, scores, iou_threshold)
        print(nms_indices)
        predicted_dict = {k: v[nms_indices] for k,v in predicted_dict.items()}

        cleaned_output.append(predicted_dict)
    return cleaned_output

def get_images(image_path):
        # reading the images and converting them to correct size and color
        original_image = cv2.imread(image_path)
        grayscale = to_grayscale(original_image)
        grayscale = normalize_image(grayscale)
        grayscale = torch.from_numpy(grayscale).float()
        grayscale = grayscale.unsqueeze(0)
        
        return grayscale, torch.from_numpy(original_image)

def to_grayscale(image):	
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return gray

def normalize_image(img):
    return img / 255


def train(model, train_loader, optimizer, epoch):
    device = torch.device("cpu")
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader, 1):
        data = list(image.to(device) for image in data)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        output = model(data, targets)
        print(f"=====[ epoch {epoch} batch {batch_idx}  output of the model: {output}")

        loss = output["loss_classifier"]
        loss.backward()
        optimizer.step()

def run(model, train_loader):
    epochs = 2
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    for epoch in range(1, epochs + 1):
        train(model, train_loader, optimizer, epoch)
        scheduler.step()



In [3]:
model = get_object_detection_model()
# dataset = get_train_data_loader(1, "../die")
# print(model)
# run(model, dataset)
model.eval()
original_image_sizes = []



In [5]:
image_dir = "../die/street2.jpg"
grayscale, image = get_images(image_dir)
grayscale = grayscale.unsqueeze(0)

output = model(grayscale)
output = inference_filter_prediction(output)
# print("OUTPUT: ", output)
it = random.randint(0, 1000)
prefix=f"detections-{it}"
print(prefix)
# output = clean_targets(targets[0])
save_test_img(image, output[0], prefix)
# x = image[0].permute(2,1,0).numpy()





tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 24, 25, 27, 30, 31, 32, 33, 34, 35, 36, 38, 40])
detections-331
{'boxes': tensor([[179.7976, 457.4123, 210.7576, 485.2520],
        [204.6704, 493.8415, 239.8764, 523.7800],
        [240.7986, 588.1495, 324.5114, 668.8118],
        [525.4587, 544.5194, 580.9304, 576.6167],
        [724.6764, 656.5187, 870.8930, 714.3874],
        [780.2134, 678.4894, 905.0087, 744.9536],
        [481.3517, 504.8663, 528.1044, 535.6287],
        [519.5519, 503.5792, 561.9777, 532.4686],
        [662.1407, 543.5578, 725.0336, 577.6752],
        [474.0178, 537.5209, 535.0876, 576.2452],
        [829.0297, 591.0197, 922.8293, 633.6086],
        [598.9414, 497.6663, 641.1106, 528.5678],
        [569.0800, 540.7796, 629.8018, 572.8284],
        [907.9136, 624.7111, 959.6132, 664.7378],
        [562.5197, 498.9753, 607.6439, 522.8589],
        [783.0757, 544.1405, 840.8433, 577.8701],
        [375.2706,

'./test_output/output_image_detections-331.png'