In [1]:
pwd

'/Data2/hm22/Faster-RCNN-with-torchvision-master'

In [2]:
import utils
import dataset.transforms as T
import datetime
import os

import time
import matplotlib.pyplot as plt ###add
#%matplotlib inline

import torch
import torch.utils.data
from torch import nn
from dataset.coco_utils import get_coco, get_coco_kp
from rapid import train_one_epoch, evaluate
from dataset.group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
import argparse
import torchvision

import cv2
import random

In [3]:
data_path = '/Data2/hm22/Faster-RCNN-with-torchvision-master/data/coco/'
model_name = 'fasterrcnn_resnet50_fpn_v2'
dataset_name = 'coco'
device_type = 'cuda'
batch_size = 1 #8
epochs = 20
workers = 1
lr = 0.02
momentum = 0.9 
weight_decay = 0.0001
print_freq = 20
lr_step_size = 8
lr_steps = [8,11]
lr_gamma = 0.1
resume = ''
test_only = True
output_dir = '/Data2/hm22/Faster-RCNN-with-torchvision-master/result'
aspect_ratio_group_factor = 0
pretrained = True
distributed = False
parallel = False
world_size =1
dist_url = 'env://'
attack = True ########################################## ATTACK OR NOT (default = False)
dpatch = False ########################################## DPATCH OR Robust-DPATCH (default = False = Robust-DPATCH)
multi = True ########################################## SINGLE OR MULTI (default = False = Single patch)

In [4]:
def get_dataset(name, image_set, transform):
    paths = {
        "coco": ('/Data2/hm22/Faster-RCNN-with-torchvision-master/data/coco/', get_coco, 91),
        "coco_kp": ('/datasets01/COCO/022719/', get_coco_kp, 2)
    }
    p, ds_fn, num_classes = paths[name]

    ds = ds_fn(p, image_set=image_set, transforms=transform)
    return ds, num_classes

In [5]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [6]:
!nvidia-smi

Sun Oct 29 23:52:07 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:1A:00.0 Off |                    0 |
| 30%   28C    P8    27W / 300W |   3797MiB / 46068MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000    On   | 00000000:1B:00.0 Off |                    0 |
| 40%   67C    P2   159W / 300W |  22951MiB / 46068MiB |      2%      Default |
|       

In [7]:
def main():
    import os
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print ('Available devices ', torch.cuda.device_count())
    
    GPU_NUM = 5 # 원하는 GPU 번호 입력
    device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(device) # change allocation of current GPU
    
    print('Device', device)
    
    if output_dir:
        utils.mkdir(output_dir)    

    # Data loading
    print("Loading data")
    dataset, num_classes = get_dataset(dataset_name, "train", get_transform(train=True))
    dataset_test, _ = get_dataset(dataset_name, "val", get_transform(train=False)) 
    
    

    print("Creating data loaders")
    if distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    if aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=workers,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=batch_size,
        sampler=test_sampler, num_workers=workers,
        collate_fn=utils.collate_fn)
    

    # Print shape of the first batch of images in the data loader
    #images, _ = next(iter(data_loader_test))
    #for image in images:
    #    print("Image shape:", image.shape)

    
    # Model creating
    print("Creating model")
    # model = models.__dict__[model](num_classes=num_classes, pretrained=pretrained)   
    model = torchvision.models.detection.__dict__[model_name](num_classes=num_classes,
                                                              pretrained=pretrained)

    device = torch.device(device_type)
    model.to(device)

    # Distribute
    model_without_ddp = model
    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
        model_without_ddp = model.module    

    # Parallel
    if parallel:
        print('Training parallel')
        model = torch.nn.DataParallel(model).cuda()
        model_without_ddp = model.module

    # Optimizer
    params = [p for p in model.parameters() if p.requires_grad]

    optimizer = torch.optim.SGD(
        params, lr=lr, momentum=momentum, weight_decay=weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_steps, gamma=lr_gamma)

    # Resume training
    if resume:
        print('Resume training')
        checkpoint = torch.load(resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])

    if test_only:
        evaluate(model, data_loader_test, device=device, dpatch=dpatch, attack=attack, multi=multi, defense=defense)
        return

    # Training
    print('Start training')
    start_time = time.time()
    for epoch in range(epochs):
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq)
        lr_scheduler.step()
        if output_dir:
            utils.save_on_master({
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict()
                },
                os.path.join(output_dir, 'model_{}.pth'.format(epoch)))

        # evaluate after every epoch
        #evaluate(model, data_loader_test, device=device, dpatch=False, attack=True, multi=True, defense=True)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

In [None]:
main()