In [7]:
# Train torchvision with our dataset:
# https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
# https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

from __future__ import print_function
from __future__ import division

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import json

# Torch enable cuda
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print("=======================================================================")
print(" PyTorch version:     ", torch.__version__)
print(" Torchvision version: ", torchvision.__version__)
print(" Device:              ", device)
print("=======================================================================")


 PyTorch version:      1.12.1+cu116
 Torchvision version:  0.13.1+cu116
 Device:               cuda:0


### Constants

In [9]:
CLASSES = [5, 10, 25, 50, 100] 						# We DON'T have background class...
"""All the classes that are in the dataset"""

NUM_CLASSES = len(CLASSES)
"""Number of classes in the dataset"""


'Number of classes in the dataset'

# Create the Dataset

Create a `torch.data.Dataset` class to represent the dataset that we are using. 

This class implements both `__len__` and `__getitem__` to allow for consumers to obtain dataset items.

* `__len__` should return the number of items in the dataset. This is the number of annotated images in the VIA json.
* `__getitem__` returns a tuple containing the image data as well as its metadata. For now, we will return the bounding boxes of items in the dataset as well as its labels.

In [3]:
import money_counter

In [4]:
#from utils import collate_fn
from torch import utils
from torch.utils import data

from utils import collate_fn

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((1500, 2000))
])

# use our dataset and defined transformations
dataset_train = CoinsDataset('../assets/dataset/TCC_MBA_Coins.json', '../assets/dataset/moedas/', transform=transform)
dataset_test = CoinsDataset('../assets/dataset/TCC_MBA_Coins.json', '../assets/dataset/moedas/', transform=transform)

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset_train)).tolist()
dataset_train = data.Subset(dataset_train, indices[:-50])
dataset_test = data.Subset(dataset_test, indices[-50:])

# Print the sizes of the datasets
print(f'Train dataset size: {len(dataset_train)}')
print(f'Test dataset size: {len(dataset_test)}')

# define training and validation data loaders
data_loader_train = data.DataLoader(
    dataset_train, batch_size=2,#)#, shuffle=True, num_workers=2)
    collate_fn=collate_fn)

data_loader_test = data.DataLoader(
    dataset_test, batch_size=2,#), shuffle=False, num_workers=2)
    collate_fn=collate_fn)


Train dataset size: 97
Test dataset size: 50


# Finetuning a model


In [5]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained on COCO
weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = len(CLASSES)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [6]:
# from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# import torchvision

# weights = torchvision.models.detection.MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT

# def get_instance_segmentation_model(num_classes):
#     # load an instance segmentation model pre-trained on COCO
#     model = torchvision.models.detection.maskrcnn_resnet50_fpn(
#         pretrained=weights)

#     # get the number of input features for the classifier
#     in_features = model.roi_heads.box_predictor.cls_score.in_features
#     # replace the pre-trained head with a new one
#     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

#     # now get the number of input features for the mask classifier
#     in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
#     hidden_layer = 256
#     # and replace the mask predictor with a new one
#     model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
#                                                        hidden_layer,
#                                                        num_classes)

#     return model


Now let's instantiate the model and the optimizer

In [7]:
from torch.optim.lr_scheduler import StepLR

device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')

# get the model using our helper function
#model = get_instance_segmentation_model(num_classes)
# move model to the device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)


In [8]:
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)

In [9]:
def load_images(device: torch.device, image_list: List[torch.Tensor]):
    idx = 0

    for image in image_list:
        idx += 1
        print(f'moving image {idx} to device')
        yield image.to(device)


In [10]:
from typing import Dict
from torch import Tensor

import sys
import math


def train_one_epoch(model: torch.nn.Module, optimizer: torch.optim.Optimizer, data_loader: data.DataLoader[DatasetItem], device: torch.device, epoch: int, print_freq: int):
    #metric_logger = utils.MetricLogger(delimiter="  ")
    #metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    #header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None

    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(
            optimizer, warmup_iters, warmup_factor)

    # metric_logger.log_every(data_loader, print_freq, header):

    model.train()

    print('Iterating through data loader')
    for item in data_loader:
        # All this effort to type the goddamn thing
        images: List[Tensor] = item[0]

        print('images_type', type(images))
        print('images', images)

        targets: List[Dict[str, Tensor]] = item[1]

        print('targets_type:', type(targets))
        print('targets:', targets)

        images = list(load_images(device, images))
        targets = [{k: v.to(device) for k, v in target.items()}
                   for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        print('here3')

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        #metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        # metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    # return metric_logger


In [11]:
# let's train it for 10 epochs
from engine import evaluate

for epoch in range(NUM_EPOCHS):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader_train,
                    device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

    print(f'Epoch {epoch} finished.')


Iterating through data loader
Returning items: torch.Size([3, 1500, 2000]) boxes: torch.Size([21, 4]) labels torch.Size([21])
Returning items: torch.Size([3, 1500, 2000]) boxes: torch.Size([37, 4]) labels torch.Size([37])
images_type <class 'tuple'>
images (tensor([[[0.6332, 0.6529, 0.6480,  ..., 0.4528, 0.4792, 0.4824],
         [0.6322, 0.6521, 0.6446,  ..., 0.4823, 0.4824, 0.4936],
         [0.6398, 0.6693, 0.6535,  ..., 0.4865, 0.5230, 0.4907],
         ...,
         [0.2736, 0.3204, 0.2635,  ..., 0.5286, 0.5238, 0.5277],
         [0.3080, 0.2832, 0.3033,  ..., 0.5298, 0.5064, 0.4835],
         [0.3245, 0.2991, 0.3062,  ..., 0.5352, 0.5157, 0.5271]],

        [[0.6802, 0.7000, 0.6911,  ..., 0.4763, 0.5027, 0.5059],
         [0.6793, 0.6992, 0.6878,  ..., 0.5059, 0.5060, 0.5171],
         [0.6868, 0.7164, 0.6967,  ..., 0.5100, 0.5465, 0.5142],
         ...,
         [0.2423, 0.2811, 0.2243,  ..., 0.5600, 0.5551, 0.5591],
         [0.2766, 0.2439, 0.2523,  ..., 0.5612, 0.5377, 0.5149

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.