In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchvision import transforms
import torchvision.transforms.functional as FT
from torchvision import utils
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
from PIL import Image
from torchsummary import summary
import xml.etree.ElementTree as Et
from typing import Any, Callable, Dict, Optional, Tuple, List
import collections
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import imgaug as ia  # imgaug
from imgaug import augmenters as iaa

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: GeForce RTX 2080 Ti


In [None]:
print(len(os.listdir("../../../../../../data/test/Annotations")))
print(len(os.listdir("../../../../../../data/test/JPEGImages")))


path = "../../../../../../data/train/JPEGImages/WN4_142.jpg"


In [3]:
def xml_parser(xml_path):
  xml_path = xml_path
  xml = open(xml_path, "r")
  tree = Et.parse(xml)
  root = tree.getroot()
  size = root.find("size")
  file_name = root.find("filename").text
  object_name = []
  bbox = []
  objects = root.findall("object")
  for _object in objects:
      name = _object.find("name").text
      object_name.append(name)
      bndbox = _object.find("bndbox")
      one_bbox = []
      xmin = bndbox.find("xmin").text
      one_bbox.append(int(float(xmin)))
      ymin = bndbox.find("ymin").text
      one_bbox.append(int(float(ymin)))
      xmax = bndbox.find("xmax").text
      one_bbox.append(int(float(xmax)))
      ymax = bndbox.find("ymax").text
      one_bbox.append(int(float(ymax)))
      bbox.append(one_bbox)
  return file_name, object_name, bbox

In [4]:
def makeBox(voc_im,bbox,objects):
  image = voc_im.copy()
  for i in range(len(objects)):
    cv2.rectangle(image,(int(bbox[i][0]),int(bbox[i][1])),(int(bbox[i][2]),int(bbox[i][3])),color = (0,255,0),thickness = 1)
    cv2.putText(image, objects[i], (int(bbox[i][0]), int(bbox[i][1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2) # 크기, 색, 굵기
  return image

In [5]:
xml_list = os.listdir("VOCdevkit/VOC2012/Annotations")
xml_list.sort()

label_set = set()

for i in range(len(xml_list)):
  xml_path = "VOCdevkit/VOC2012/Annotations/"+str(xml_list[i])
  file_name, object_name, bbox = xml_parser(xml_path)
  for name in object_name:
    label_set.add(name)

label_set = sorted(list(label_set))

label_dic = {}
for i, key in enumerate(label_set):
  label_dic[key] = (i+1)
print(label_dic)

{'aeroplane': 1, 'bicycle': 2, 'bird': 3, 'boat': 4, 'bottle': 5, 'bus': 6, 'car': 7, 'cat': 8, 'chair': 9, 'cow': 10, 'diningtable': 11, 'dog': 12, 'horse': 13, 'motorbike': 14, 'person': 15, 'pottedplant': 16, 'sheep': 17, 'sofa': 18, 'train': 19, 'tvmonitor': 20}


In [6]:
class Pascal_Voc(Dataset):
    
  def __init__(self,xml_list,len_data):

    self.xml_list = xml_list
    self.len_data = len_data
    self.to_tensor = transforms.ToTensor()
    self.flip = iaa.Fliplr(0.5)
    self.resize = iaa.Resize({"shorter-side": 600, "longer-side": "keep-aspect-ratio"})

  def __len__(self):
    return self.len_data

  def __getitem__(self, idx):

    xml_path = "VOCdevkit/VOC2012/Annotations/"+str(xml_list[idx])

    file_name, object_name, bbox = xml_parser(xml_path)
    image_path = "VOCdevkit/VOC2012/JPEGImages/"+str(file_name)
    image = Image.open(image_path).convert("RGB")
    image = np.array(image)

    image, bbox = self.flip(image = image, bounding_boxes = np.array([bbox]))
    image, bbox = self.resize(image = image,bounding_boxes = bbox)
    bbox = bbox.squeeze(0).tolist()
    image = self.to_tensor(image)

    targets = []
    d = {}
    d['boxes'] = torch.tensor(bbox,device=device)
    d['labels'] = torch.tensor([label_dic[x] for x in object_name],dtype=torch.int64,device = device)
    targets.append(d)

    return image, targets

In [7]:
backbone = torchvision.models.vgg16(pretrained=True).features[:-1]
backbone_out = 512
backbone.out_channels = backbone_out

anchor_generator = torchvision.models.detection.rpn.AnchorGenerator(
    sizes=((128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),))

resolution = 7
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
    featmap_names=['0'], output_size=resolution, sampling_ratio=2)

box_head = torchvision.models.detection.faster_rcnn.TwoMLPHead(
    in_channels=backbone_out*(resolution**2), representation_size=4096)
box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
    4096, 21)  # 21개 class

model = torchvision.models.detection.FasterRCNN(backbone, num_classes=None,
                                                min_size=600, max_size=1000,
                                                rpn_anchor_generator=anchor_generator,
                                                rpn_pre_nms_top_n_train=6000, rpn_pre_nms_top_n_test=6000,
                                                rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300,
                                                rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7,  rpn_bg_iou_thresh=0.3,
                                                rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
                                                box_roi_pool=roi_pooler, box_head=box_head, box_predictor=box_predictor,
                                                box_score_thresh=0.05, box_nms_thresh=0.7, box_detections_per_img=300,
                                                box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
                                                box_batch_size_per_image=128, box_positive_fraction=0.25
                                                )
#roi head 있으면 num_class = None으로 함

for param in model.rpn.parameters():
  torch.nn.init.normal_(param, mean=0.0, std=0.01)

for name, param in model.roi_heads.named_parameters():
  if "bbox_pred" in name:
    torch.nn.init.normal_(param, mean=0.0, std=0.001)
  elif "weight" in name:
    torch.nn.init.normal_(param, mean=0.0, std=0.01)
  if "bias" in name:
    torch.nn.init.zeros_(param)

In [8]:
writer = SummaryWriter("runs/Faster_RCNN")
%load_ext tensorboard
%tensorboard --logdir="runs"

2022-02-11 19:19:39.808426: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/joo/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:/usr/local/cuda-10.2/lib64
2022-02-11 19:19:39.808444: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [9]:
def Total_Loss(loss):
  loss_objectness = loss['loss_objectness']
  loss_rpn_box_reg = loss['loss_rpn_box_reg']
  loss_classifier = loss['loss_classifier']
  loss_box_reg = loss['loss_box_reg']

  rpn_total = loss_objectness + 10*loss_rpn_box_reg
  fast_rcnn_total = loss_classifier + 1*loss_box_reg

  total_loss = rpn_total + fast_rcnn_total

  return total_loss

In [10]:
import time

total_epoch = 40

len_data = 15000
term = 1000

loss_sum = 0

model.to(device)

optimizer = torch.optim.SGD(params = model.parameters(),lr = 0.001, momentum = 0.9, weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,total_epoch,eta_min=0.00001)

try:
  check_point = torch.load("runs/Faster_RCNN/Check_point.pth") 
  start_epoch = check_point['epoch']
  start_idx = check_point['iter']
  model.load_state_dict(check_point['state_dict'])
  optimizer.load_state_dict(check_point['optimizer'])
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,total_epoch,eta_min=0.00001,last_epoch = start_epoch)
  scheduler.load_state_dict(check_point['scheduler'])

  if start_idx == len_data: 
    start_idx = 0
    start_epoch = start_epoch + 1

except:
  print("check point load error!")
  start_epoch = 0
  start_idx = 0

print("start_epoch = {} , start_idx = {}".format(start_epoch,start_idx))

print("Training Start")
model.train()
start = time.time()

for epoch in range(start_epoch,total_epoch):
  
  writer.add_scalar('Learning Rate',scheduler.get_last_lr()[0], epoch)

  dataset = Pascal_Voc(xml_list[:len_data],len_data - start_idx)
  dataloader = DataLoader(dataset,shuffle=True)

  for i, (image,targets)in enumerate(dataloader,start_idx):

    optimizer.zero_grad()

    targets[0]['boxes'].squeeze_(0)
    targets[0]['labels'].squeeze_(0)
    
    loss = model(image.to(device),targets)
    total_loss = Total_Loss(loss)
    loss_sum += total_loss

    if (i+1) % term == 0:
      end = time.time()
      print("Epoch {} | Iter {} | Loss: {} | Duration: {} min".format(epoch,(i+1),(loss_sum/term).item(),int((end-start)/60)))
      writer.add_scalar('Training Loss',loss_sum / term, epoch * len_data + i)
      
      state = {
        'epoch': epoch,
        'iter' : i+1,
        'state_dict': model.state_dict(),
        'optimizer' : optimizer.state_dict(),
        'scheduler': scheduler.state_dict()
      }
      torch.save(state,"runs/Faster_RCNN/Check_point.pth")
     
      loss_sum = 0
      start = time.time()
    
    total_loss.backward()
    optimizer.step()

  start_idx = 0
  scheduler.step() 

  state = {
      'epoch': epoch,
      'iter' : i+1,
      'state_dict': model.state_dict(),
      'optimizer' : optimizer.state_dict(),
      'scheduler': scheduler.state_dict()
    }
  torch.save(state, "runs/Faster_RCNN/Check_point.pth")

  if (epoch+1) % 10 == 0: 
    torch.save(model.state_dict(),"runs/Faster_RCNN/Epoch{}.pth".format(epoch))

start_epoch = 0 , start_idx = 6000
Training Start


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch 0 | Iter 7000 | Loss: 2.4794702529907227 | Duration: 2 min
Epoch 0 | Iter 8000 | Loss: 2.569445848464966 | Duration: 2 min
Epoch 0 | Iter 9000 | Loss: 2.4855403900146484 | Duration: 2 min
Epoch 0 | Iter 10000 | Loss: 2.4337081909179688 | Duration: 2 min
Epoch 0 | Iter 11000 | Loss: 2.403154134750366 | Duration: 2 min
Epoch 0 | Iter 12000 | Loss: 2.318570375442505 | Duration: 2 min
Epoch 0 | Iter 13000 | Loss: 2.367945909500122 | Duration: 2 min
Epoch 0 | Iter 14000 | Loss: 2.4955968856811523 | Duration: 2 min
Epoch 0 | Iter 15000 | Loss: 2.357517957687378 | Duration: 2 min
Epoch 1 | Iter 1000 | Loss: 2.110621452331543 | Duration: 2 min
Epoch 1 | Iter 2000 | Loss: 2.2536275386810303 | Duration: 2 min
Epoch 1 | Iter 3000 | Loss: 2.0594537258148193 | Duration: 2 min
Epoch 1 | Iter 4000 | Loss: 2.139148473739624 | Duration: 2 min
Epoch 1 | Iter 5000 | Loss: 2.1133790016174316 | Duration: 2 min
Epoch 1 | Iter 6000 | Loss: 2.028985023498535 | Duration: 2 min
Epoch 1 | Iter 7000 | Loss:

In [11]:
targets[0]['boxes'].squeeze_(0)
targets[0]['labels'].squeeze_(0)

tensor(9, device='cuda:0')