In [1]:
from collections import OrderedDict
from torch.optim.lr_scheduler import StepLR
import torch
import torchvision
from torchvision.ops import nms
from torchvision.ops.boxes import box_convert,box_iou


from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import os
import cv2
import random
from dataset import ObjectDetectionDataset
from helper import get_train_data_loader, filter_prediction, clean_targets
from conf import *

torch.manual_seed = 0
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.models.detection import ssd, ssd300_vgg16, SSD300_VGG16_Weights
import datetime




In [4]:

def get_object_detection_model(num_classes=NUMBER_OF_CLASSES):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    for param in model.parameters():
        param.requires_grad = False
    for param in model.roi_heads.parameters():
        param.requires_grad = True
    for idx, param in enumerate(model.backbone.parameters()):
        if(idx > 56):
            param.requires_grad = True
    return model

def get_ssd_detection_model(num_classes=NUMBER_OF_CLASSES):
    ssd_model = ssd300_vgg16(pretrained=True)
    classification_head = ssd_model.head.classification_head
    freeze_layers = [
        ssd_model.backbone.features,    # Freeze the VGG16 backbone
        ssd_model.backbone.extra,       # Optionally, freeze extra layers
        ssd_model.anchor_generator,     # Freeze the anchor generator
    ]
    for layer in classification_head.module_list:
        layer = torch.nn.Conv2d(layer.in_channels, num_classes * 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    for layer in freeze_layers:
        for param in layer.parameters():
            param.requires_grad = False
    return ssd_model


def get_yolo_model(num_classes=NUMBER_OF_CLASSES):
    model = torch.hub.load('cd ', 'yolov5s', pretrained=True, classes=num_classes+1)
    return model

def save_test_img(img, target, prefix):
    # img = img.permute(2,0,1).cpu().numpy()  # Convert to (height, width, channels)
    img = img.cpu().numpy()  # Convert to (height, width, channels)

    # img = img.astype('uint8')
    # img = img
    # Draw bounding boxes on the image
    print(target)
    for box, label in zip(target['boxes'], target['labels']):
        x, y, w, h = box.tolist()
        x, y, w, h = int(x), int(y), int(w), int(h)
        box_color = BOX_COLOR[label.item()]
        cv2.rectangle(img, (x, y), (w, h), box_color, 2)

    # Save the image with bounding boxes
    if not os.path.exists(os.path.join(os.getcwd(), 'test_output')):
        os.makedirs(os.path.join(os.getcwd(), 'test_output'))
    # cv2.imshow(img)
    img_path = f"./test_output/output_image_{prefix}.png"
    cv2.imwrite(img_path, img)
    return img_path


def get_images(image_path):
        # reading the images and converting them to correct size and color
        original_image = cv2.imread(image_path)
        grayscale = to_grayscale(original_image)
        grayscale = normalize_image(grayscale)
        grayscale = torch.from_numpy(grayscale).float()
        grayscale = grayscale.unsqueeze(0)
        
        return grayscale, torch.from_numpy(original_image)

def to_grayscale(image):	
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return gray

def normalize_image(img):
    return img / 255


def train(model, train_loader, optimizer, epoch):
    device = torch.device("cpu")
    model.to(device)
    model.train()
    for batch_idx, (data, targets,_) in enumerate(train_loader, 1):
        # Print bounding boxes for debugging
        print(f"=====[ epoch {epoch} batch {batch_idx}  data: {data}")
        print(f"=====[ epoch {epoch} batch {batch_idx}  targets: {targets}")
        data = torch.stack(data).to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        print(f"=====[ epoch {epoch} batch {batch_idx}  before model parse")
        data = data.permute(0, 3, 1, 2)

        output = model(data, targets)
        # output = model(data, targets)

        print(f"=====[ epoch {epoch} batch {batch_idx}  output of the model: {output}")

        total_loss = sum(output.values())
        total_loss.backward()
        optimizer.step()

def run(model, train_loader):
    epochs = 10
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    for epoch in range(1, epochs + 1):
        train(model, train_loader, optimizer, epoch)
        scheduler.step()



In [6]:
# fasterrcnn = get_object_detection_model()
dataset = get_train_data_loader(16, "../../train_set/")
# timer1 = datetime.datetime.now()
# run(fasterrcnn, dataset)
# timer1 = datetime.datetime.now() - timer1

ssd =  get_yolo_model()
timer2 = datetime.datetime.now()
run(ssd, dataset)
timer2 = datetime.datetime.now() - timer2

# model.eval()
# original_image_sizes = []

=====[INFO] Get train data loader training_dir: ../../train_set/
['002617_jpeg.rf.308888677160f69185401587a1a12706.jpg', '002618_jpeg.rf.cdcb81d424cc89c56bba1273e0077a3b.jpg', '002620_jpeg.rf.73de144c2b0fc0aab57cc2f1243738d0.jpg', '002621_jpeg.rf.1b03ca0c27c30fd6e3fd6f90fb1e6b77.jpg', '002623_jpeg.rf.486fb1779c961ff0156b7f58224617bb.jpg', '002624_jpeg.rf.45b41eef08ec17c520d5f1852b396a08.jpg', '002628_jpeg.rf.ba1d06049268e86f60834ec60a2de7eb.jpg', '002629_jpeg.rf.37c8c6a6b015cccfbfec6674e538e1fc.jpg', '002631_jpeg.rf.b8eac78ea2a0e8002c327da82aeadbef.jpg', '002632_jpeg.rf.e92e0c378a99dde283a6aa51624e0a39.jpg', '002635_jpeg.rf.d9da20a79ab8d6db3ab354893be4f9cd.jpg', '002636_jpeg.rf.ce2b1a476030fd81552857fe57102830.jpg', '002637_jpeg.rf.9e96c3eb88276e4310d5300d5a111d15.jpg', '002639_jpeg.rf.4bb7a82efc513e915602eb716f6abdef.jpg', '002640_jpeg.rf.9edf2730ee169d83d4655a3ab1653d0a.jpg', '002642_jpeg.rf.e22cec2f3889ced1f3fc8bb369a273d2.jpg', '002643_jpeg.rf.c6fcdfd696d6c840e7587f4955ce0969.jpg',

Using cache found in /Users/viktor/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-12-11 Python-3.10.12 torch-2.1.0 CPU

Overriding model.yaml nc=80 with nc=6

                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 

=====[ epoch 1 batch 1  data: [tensor([[[0.00000, 0.00000, 0.00000],
         [0.00000, 0.00392, 0.00392],
         [0.00000, 0.00784, 0.00784],
         ...,
         [0.29804, 0.34118, 0.30980],
         [0.27843, 0.32549, 0.30196],
         [0.29020, 0.33725, 0.31373]],

        [[0.00000, 0.00392, 0.00392],
         [0.00000, 0.00784, 0.00784],
         [0.00392, 0.01176, 0.01176],
         ...,
         [0.29020, 0.33333, 0.30196],
         [0.26667, 0.31373, 0.29020],
         [0.27843, 0.32549, 0.30196]],

        [[0.00784, 0.01569, 0.01569],
         [0.00392, 0.01176, 0.01176],
         [0.00392, 0.01176, 0.01176],
         ...,
         [0.28235, 0.32549, 0.29412],
         [0.25490, 0.30196, 0.27843],
         [0.25882, 0.30588, 0.28235]],

        ...,

        [[0.47059, 0.43137, 0.40392],
         [0.47059, 0.43137, 0.40392],
         [0.47059, 0.43137, 0.40392],
         ...,
         [0.00784, 0.00784, 0.00784],
         [0.00392, 0.00392, 0.00392],
         [0.00784, 

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 88 but got size 76 for tensor number 1 in the list.

: 