In [1]:
import torch
from data.VOCDataset import CustomDataset
from model.utils import *
from model.RPN import RegionProposalNetwork
from model.ROI_Detector import ROI_Detector
from model.Faster_RCNN import FasterRCNN

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device = "mps" if torch.backends.mps.is_available() else "cpu"

In [3]:
train_dataset = CustomDataset('/Users/h383kim/pytorch/data')

In [5]:
from torch.utils.data import DataLoader

BATCH_SIZE = 1

train_dataloader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
                              

In [7]:
faster_rcnn = FasterRCNN().to(device)

In [8]:
faster_rcnn

FasterRCNN(
  (shared_backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): C

In [12]:
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, faster_rcnn.parameters()),
                            lr=0.001,
                            momentum=0.9,
                            weight_decay=0.0005)


In [14]:
from tqdm import tqdm

def train(model, dataloader, optimizer, loss_dict):
    model.train()
    
    train_loss = 0.0
    for image, target in tqdm(dataloader):
        image = image.to(device)
        target['bboxes'] = target['bboxes'].to(device)
        target['labels'] = target['labels'].to(device)

        # Forward Pass
        rpn_output, roi_detector_output = model(image, target)
        
        # Calculate the loss
        rpn_loss = rpn_output['cls_loss'] + rpn_output['localization_loss']
        detector_loss = roi_detector_output['cls_loss'] + roi_detector_output['localization_loss']
        loss = rpn_loss + detector_loss
        # Storing losses info
        loss_dict['rpn_cls_loss'].append(rpn_loss['cls_loss'].item())
        loss_dict['rpn_loc_loss'].append(rpn_loss['loc_loss'].item())
        loss_dict['detector_cls_loss'].append(detector_loss['cls_loss'].item())
        loss_dict['detector_loc_loss'].append(detector_loss['loc_loss'].item())
        train_loss += loss

        # Optimizer zero_grad
        optimizer.zero_grad()

        # Loss backward
        loss.backward()

        # Optimizer step
        optimizer.step()
        
    train_loss /= len(dataloader)
    return train_loss, loss_dict

In [19]:
from time import time

def baseline(model, dataloader, optimizer, num_epochs=1):
    loss_dict = {
        'rpn_cls_loss': [],
        'rpn_loc_loss': [],
        'detector_cls_loss': [],
        'detector_loc_loss': []
    }
    
    for epoch in range(1, num_epochs + 1):
        start = time()
        train_loss, loss_dict = train(model, dataloader, optimizer, loss_dict)
        
        loss_output = f"---------- Epoch {epoch}: {((time() - start) / 60):.2f} min taken ----------"
        loss_output += 'RPN Classification Loss : {:.4f}'.format(np.mean(rpn_classification_losses))
        loss_output += ' | RPN Localization Loss : {:.4f}'.format(np.mean(rpn_localization_losses))
        loss_output += ' | FRCNN Classification Loss : {:.4f}'.format(np.mean(frcnn_classification_losses))
        loss_output += ' | FRCNN Localization Loss : {:.4f}'.format(np.mean(frcnn_localization_losses))
        print(loss_output)

        # Save weights / model
        torch.save(model.state_dict(), 'model.pth')

    return model

In [21]:
faster_rcnn_baseline = baseline(faster_rcnn, train_dataloader, optimizer, 1)

  0%|                                                  | 0/1000 [00:00<?, ?it/s]


TypeError: normalize_resize_image_and_boxes() missing 2 required positional arguments: 'image_mean' and 'image_std'