In [None]:
# Installations
!pip install -U trashdetect_engine --quiet
!pip install gdown wandb --quiet
!pip install efficientnet_pytorch --quiet

[K     |████████████████████████████████| 1.8 MB 8.2 MB/s 
[K     |████████████████████████████████| 147 kB 25.8 MB/s 
[K     |████████████████████████████████| 181 kB 69.9 MB/s 
[K     |████████████████████████████████| 63 kB 2.3 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone


In [None]:
# Download dataset and annotations
!gdown --folder 1Nsik0VXf8tfqllIsJNgUsipaF6Nm18jU
!gdown --id 1W-3pMS0k7DDdSw2HiTCy1hJWgJUkvddw
!unzip  -q taco_dataset.zip -d /content/taco_images

# https://drive.google.com/file/d/1W-3pMS0k7DDdSw2HiTCy1hJWgJUkvddw/view?usp=sharing

Retrieving folder list
Processing file 1D-M55oeRuH_tp3FldzELNIlGmkfjBMLW annotations_binary_test.json
Processing file 1sbZADak_vaigZ95l44lNQvQq5YZIhK-C annotations_binary_train.json
Retrieving folder list completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1D-M55oeRuH_tp3FldzELNIlGmkfjBMLW
To: /content/annotations_with_segmask/annotations_binary_test.json
100% 541k/541k [00:00<00:00, 108MB/s]
Downloading...
From: https://drive.google.com/uc?id=1sbZADak_vaigZ95l44lNQvQq5YZIhK-C
To: /content/annotations_with_segmask/annotations_binary_train.json
100% 2.29M/2.29M [00:00<00:00, 83.7MB/s]
Download completed
Downloading...
From: https://drive.google.com/uc?id=1W-3pMS0k7DDdSw2HiTCy1hJWgJUkvddw
To: /content/taco_dataset.zip
100% 2.62G/2.62G [00:10<00:00, 242MB/s]


In [None]:
import time
import os
import torch
import argparse
from datetime import datetime
from pathlib import Path

import trashdetect_engine
# from trashdetect_engine.engine import train_one_epoch, evaluate
from trashdetect_engine import utils
from trashdetect_engine.data import build
from trashdetect_engine.models.segmentation_models import (
    get_instance_segmentation_model,
)


In [None]:
import math
import sys
import time
import torch

import torchvision.models.detection.mask_rcnn
from trashdetect_engine.data import get_coco_api_from_dataset
from trashdetect_engine.coco_eval import CocoEvaluator
from trashdetect_engine import utils


def train_one_epoch(
    model, optimizer, data_loader, device, epoch, print_freq, exp_logger=None
):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = "Epoch: [{}]".format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    # Training loop
    for images, targets in metric_logger.log_every(data_loader, print_freq, header):

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        # loss_dict: ['loss_classifier', 'loss_box_reg', 'loss_mask', 'loss_objectness', 'loss_rpn_box_reg']
        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        if exp_logger is not None:
            exp_logger.log({"train/loss": loss_value})

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])


def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types


@torch.no_grad()
def evaluate(model, data_loader, device, exp_logger=None):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test:"

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)
        # if exp_logger is not None:
        #     exp_logger.log_metric({"valid/loss": loss_value})
        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs)
        }
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator


In [None]:
def get_args_parser():
    parser = argparse.ArgumentParser(
        "Prepare instance segmentation task with Mask R-CNN"
    )
    parser.add_argument(
        "--output_dir",
        help="path to save checkpoints",
        default="/content/output",
        type=str,
    )
    parser.add_argument(
        "--images_dir",
        help="path to images directory",
        default="/content/taco_images/data",
        type=str,
    )
    parser.add_argument(
        "--anno_name",
        help="path to annotation json (part name)",
        default="/content/annotations_with_segmask/annotations_binary",
        type=str,
    )
    parser.add_argument("--resume", default="", help="resume from checkpoint")
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )
    # Devices
    parser.add_argument("--batch_size", default=1, type=int)
    parser.add_argument("--num_workers", default=4, type=int)
    parser.add_argument("--gpu_id", default=0, type=int)

    # Learning
    parser.add_argument("--num_epochs", default=26, type=int)
    parser.add_argument("--lr", default=0.005, type=float)
    parser.add_argument("--weight_decay", default=0.0005, type=float)
    parser.add_argument(
        "--lr-step-size", default=0, type=int, help="decrease lr every step-size epochs"
    )
    parser.add_argument(
        "--lr-steps",
        default=[16, 22],
        nargs="+",
        type=int,
        help="decrease lr every step-size epochs",
    )
    parser.add_argument(
        "--lr-gamma",
        default=0.1,
        type=float,
        help="decrease lr by a factor of lr-gamma",
    )
    parser.add_argument(
        "--optimizer",
        help="Chose type of optimization algorithm, SGD as default",
        default="SGD",
        choices=["AdamW", "SGD"],
        type=str,
    )
    # Model
    parser.add_argument("--num_classes", default=2, type=int)
    parser.add_argument(
        "--model",
        default="maskrcnn_resnet50_fpn",
        type=str,
        choices=[
            "maskrcnn_resnet50_fpn",
            "fasterrcnn_resnet50_fpn",
            "fasterrcnn_mobilenet_v3_large_fpn",
            "fasterrcnn_mobilenet_v3_large_320_fpn",
            "retinanet_resnet50_fpn",
            "efficientnet-b0",
            "efficientnet-b1",
            "efficientnet-b2",
            "efficientnet-b3",
            "efficientnet-b4",
            "efficientnet-b5",
            "efficientnet-b6",
        ],
    )
    ##
    parser.add_argument("--wandb", action="store_true")

    return parser

In [None]:
parser = get_args_parser()
args, _ = parser.parse_known_args()

In [None]:
start_epoch = 0
return_masks = False

In [None]:
# Defaulr hyper-parameters
args

Namespace(anno_name='/content/annotations_with_segmask/annotations_binary', batch_size=1, gpu_id=0, images_dir='/content/taco_images/data', lr=0.005, lr_gamma=0.1, lr_step_size=0, lr_steps=[16, 22], model='maskrcnn_resnet50_fpn', num_classes=2, num_epochs=26, num_workers=4, optimizer='SGD', output_dir='/content/output', resume='', test_only=False, wandb=False, weight_decay=0.0005)

In [None]:
args.batch_size = 4 # Cannot run with batch_size=32
args.gpu_id = 0

args.model = 'maskrcnn_resnet50_fpn'
args.lr = 0.005
args.lr_steps = [16, 22] # If we use learning rate scheduler


args.num_classes = 2
args.wandb = True
args.num_workers = 3
args.num_epochs = 26
args.run_name = utils.generate_datetime()
args

Namespace(anno_name='/content/annotations_with_segmask/annotations_binary', batch_size=4, gpu_id=0, images_dir='/content/taco_images/data', lr=0.005, lr_gamma=0.1, lr_step_size=0, lr_steps=[16, 22], model='maskrcnn_resnet50_fpn', num_classes=2, num_epochs=26, num_workers=3, optimizer='SGD', output_dir='/content/output', resume='', run_name='07-21-2022_02-21-50', test_only=False, wandb=True, weight_decay=0.0005)

In [None]:
# %%html
# <iframe src="https://wandb.ai/nma2022-wastedetect/wastedetect/runs" width="2000" height="1000"></iframe>

In [None]:
if args.wandb and (not args.resume):
    import wandb

    exp_logger = wandb.init(
        project="wastedetect",
        entity="nma2022-wastedetect",
        name=f"experiment_{args.run_name}",
    )
    wandb.config = vars(args)

else:
    exp_logger = None

output_dir = Path(args.output_dir)
os.makedirs(output_dir, exist_ok=True)
if args.model.startswith("mask"):
    return_masks = True

# use our dataset and defined transformations
dataset_train = build("train", args.images_dir, args.anno_name, return_masks)
dataset_val = build("val", args.images_dir, args.anno_name, return_masks)

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset_train,
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.num_workers,
    collate_fn=utils.collate_fn,
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_val,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.num_workers,
    collate_fn=utils.collate_fn,
)

# define model
device = (
    torch.device(f"cuda:{args.gpu_id}")
    if torch.cuda.is_available()
    else torch.device("cpu")
)

# our dataset has two classes only - background and waste
num_classes = args.num_classes

# get the model using our helper function
model = get_instance_segmentation_model(num_classes, args.model)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
if args.optimizer == "AdamW":
    optimizer = torch.optim.AdamW(
        params, lr=args.lr, weight_decay=args.weight_decay
    )
if args.optimizer == "SGD":
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=0.9, weight_decay=args.weight_decay
    )


# and a learning rate scheduler
if args.lr_step_size != 0:
    lr_scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma
    )
else:
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=args.lr_steps, gamma=args.lr_gamma
    )

if args.resume:
    checkpoint = torch.load(args.resume, map_location=device)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    start_epoch = checkpoint["epoch"] + 1

if args.test_only:
    # evaluate on the test dataset
    print("Start evaluating")
    dataset_val = build("test", args.images_dir, args.anno_name)
    data_loader_test = torch.utils.data.DataLoader(
        dataset_val,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
        collate_fn=utils.collate_fn,
    )
    evaluate(model, data_loader_test, device=device)
else:
    print("Start training")
    start_time = time.time()
    for epoch in range(start_epoch, args.num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(
            model,
            optimizer,
            data_loader,
            device,
            epoch,
            print_freq=20,
            exp_logger=exp_logger,
        )
        # update the learning rate
        lr_scheduler.step()
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
            },
            os.path.join(output_dir, f"checkpoint{epoch:04}.pth"),
        )
        # evaluate on the test dataset
        coco_evaluator = evaluate(model, data_loader_test, device=device)

        if exp_logger is not None:
            exp_logger.log(
                {
                    "valid/bbox-mAP@0.5:0.95": coco_evaluator.coco_eval[
                        "bbox"
                    ].stats[0],
                    "valid/bbox-mAP@0.5": coco_evaluator.coco_eval["bbox"].stats[1],
                }
            )
            if "segm" in coco_evaluator.coco_eval:
                exp_logger.log(
                    {
                        "valid/segm-mAP@0.5:0.95": coco_evaluator.coco_eval[
                            "segm"
                        ].stats[0],
                        "valid/segm-mAP@0.5": coco_evaluator.coco_eval[
                            "segm"
                        ].stats[1],
                    }
                )
    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


  cpuset_checked))
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth


  0%|          | 0.00/170M [00:00<?, ?B/s]

Start training
Epoch: [0]  [  0/301]  eta: 1:37:01  lr: 0.000022  loss: 4.9895 (4.9895)  loss_classifier: 0.8156 (0.8156)  loss_box_reg: 0.3442 (0.3442)  loss_mask: 2.0895 (2.0895)  loss_objectness: 1.5752 (1.5752)  loss_rpn_box_reg: 0.1650 (0.1650)  time: 19.3420  data: 7.0142  max mem: 7058
Epoch: [0]  [ 20/301]  eta: 0:13:23  lr: 0.000355  loss: 2.1307 (2.3151)  loss_classifier: 0.2679 (0.4455)  loss_box_reg: 0.1970 (0.2062)  loss_mask: 0.7971 (1.1412)  loss_objectness: 0.1671 (0.4543)  loss_rpn_box_reg: 0.0339 (0.0678)  time: 2.0357  data: 0.2325  max mem: 8938
Epoch: [0]  [ 40/301]  eta: 0:11:07  lr: 0.000688  loss: 0.8441 (1.6052)  loss_classifier: 0.1562 (0.3080)  loss_box_reg: 0.1405 (0.1858)  loss_mask: 0.2949 (0.7531)  loss_objectness: 0.1045 (0.3044)  loss_rpn_box_reg: 0.0115 (0.0539)  time: 2.2433  data: 0.4119  max mem: 8938
Epoch: [0]  [ 60/301]  eta: 0:10:13  lr: 0.001021  loss: 0.7569 (1.3468)  loss_classifier: 0.1445 (0.2563)  loss_box_reg: 0.2073 (0.1909)  loss_mask: 