In [1]:
import torch
import torch.nn as nn
import torchvision

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
import os

from torch.utils.data import Dataset, DataLoader
# from torch.utils.data.sampler import Sampler
import torch.optim as optim
import sys
sys.path.append('../')
sys.path.append('../../')

from dataset import CocoDetection, train_transforms, val_transforms, test_transforms
from visualize import visualize
# from rcnn_model import fasterrcnn_resnet201_fpn, FastRCNNPredictor
from engine import evaluate
import utils
from models.swin import *

In [2]:
# !set PYTHONPATH='/home/beomgon/pytorch/LBP_scl/GenObjectDetect/'
# !export PYTHONPATH='/home/beomgon/pytorch/LBP_scl/GenObjectDetect'

In [3]:
import easydict 
args = easydict.EasyDict({ "batch_size": 4, 
                          "epochs": 90, 
                          "data": 0, 
                          'lr':0.002,
                         'momentum':0.9,
                         'weight_decay':1e-4,
                         'start_epoch':0,
                         'gpu':2,
                          'workers':16,
                         'print_freq':1000,
                         'output_dir':'../trained_models/retinanet_swin_t_fpn/'})

In [4]:
from pathlib import Path
path = Path(args.output_dir.split('checkpoint')[0])
path.mkdir(parents=True, exist_ok=True)  

In [5]:
ngpus_per_node = torch.cuda.device_count()
print(ngpus_per_node)
GPU_NUM = args.gpu # 원하는 GPU 번호 입력
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)
print(device)

3
cuda:2


In [6]:
from models.detection.faster_rcnn import fasterrcnn_swin_t_fpn
NUM_CLASS = 91
IMG_SIZE = 448*2
model = fasterrcnn_swin_t_fpn(pretrained=False, min_size=IMG_SIZE, max_size=IMG_SIZE, num_classes=NUM_CLASS)

device = torch.device('cuda')
model.to(device)
print('model is loaded to gpu')

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


return_layers {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
model is loaded to gpu


In [7]:
from dataset import CocoDetection, train_transforms, val_transforms, test_transforms
train_dataset = CocoDetection(root='/home/beomgon/Dataset/scl/', annFile='../../data/train.json', 
                              transforms=train_transforms)
test_dataset = CocoDetection(root='/home/beomgon/Dataset/scl/', annFile='../../data/test.json', 
                              transforms=val_transforms)

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [8]:
image, target = next(iter(train_dataset))
target
                    

{'boxes': tensor([[472.5000, 667.1875, 542.9375, 743.3125]]),
 'category_id': tensor([1]),
 'labels': tensor([1]),
 'image_id': tensor([1]),
 'area': tensor([5362.0547]),
 'iscrowd': tensor([0])}

In [9]:
train_sampler = torch.utils.data.RandomSampler(train_dataset)
test_sampler = torch.utils.data.SequentialSampler(test_dataset)

train_loader = DataLoader(
    train_dataset, batch_size=args.batch_size,
    sampler=train_sampler, num_workers=args.workers,
    collate_fn=utils.collate_fn)

test_loader = DataLoader(
    test_dataset, batch_size=args.batch_size,
    sampler=test_sampler, num_workers=args.workers,
    collate_fn=utils.collate_fn)

In [10]:
params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)
optimizer = torch.optim.SGD(
       params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15, 30, 45, 60, 75], 
                                                    gamma=0.5)

In [11]:
from engine import train_one_epoch

start_time = time.time()
for epoch in range(args.epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch, args.print_freq)
    lr_scheduler.step()
    
    if epoch > 60 and epoch % 5 == 0 :
        if args.output_dir:
            checkpoint = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'checkpoint.pth'))

    if epoch > 5 and epoch % 5 == 0 :
        # evaluate after every epoch
        evaluate(model, test_loader, device=device)    
print('total time is {}'.format(time.time() - start_time))    

Epoch: [0]  [   0/1549]  eta: 1:39:00  lr: 0.000004  loss: 5.2863 (5.2863)  loss_classifier: 4.5537 (4.5537)  loss_box_reg: 0.0002 (0.0002)  loss_objectness: 0.7281 (0.7281)  loss_rpn_box_reg: 0.0043 (0.0043)  time: 3.8349  data: 2.2072  max mem: 8985
Epoch: [0]  [1000/1549]  eta: 0:06:56  lr: 0.002000  loss: 0.1655 (0.2881)  loss_classifier: 0.0600 (0.1617)  loss_box_reg: 0.0390 (0.0229)  loss_objectness: 0.0554 (0.0965)  loss_rpn_box_reg: 0.0044 (0.0070)  time: 0.7492  data: 0.0179  max mem: 9283
Epoch: [0]  [1548/1549]  eta: 0:00:00  lr: 0.002000  loss: 0.1807 (0.2504)  loss_classifier: 0.0786 (0.1318)  loss_box_reg: 0.0567 (0.0341)  loss_objectness: 0.0344 (0.0779)  loss_rpn_box_reg: 0.0038 (0.0065)  time: 0.7458  data: 0.0171  max mem: 9283
Epoch: [0] Total time: 0:19:33 (0.7574 s / it)
Epoch: [1]  [   0/1549]  eta: 1:18:25  lr: 0.002000  loss: 0.2701 (0.2701)  loss_classifier: 0.1216 (0.1216)  loss_box_reg: 0.1035 (0.1035)  loss_objectness: 0.0355 (0.0355)  loss_rpn_box_reg: 0.00

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f8f0385ed30>
Traceback (most recent call last):
  File "/home/beomgon/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/beomgon/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1301, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/home/beomgon/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/home/beomgon/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/home/beomgon/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/home/beomgon/anaconda3/envs/pytorch/lib/python3.8/selectors.py", line 415, in se

KeyboardInterrupt: 

In [None]:
model