In [1]:
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import os
import sys 
import time
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import numpy as np
import cv2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
if sys.version_info[0] == 2:
    import xml.etree.cElementTree as ET
else:
    import xml.etree.ElementTree as ET

In [4]:
VOC_CLASSES = ('line', 'background')

class VOCAnnotationTransform(object):
    
    def __init__(self, class_to_ind=None, keep_difficult=False):
        
        '''
        self.class_to_ind = class_to_ind or dict(
            zip(VOC_CLASSES, range(len(VOC_CLASSES))))
        '''
        self.class_to_ind = {'line' : 0, 'background' : 1}
        self.keep_difficult = keep_difficult
    def __call__(self, target, width, height):
        
        res = []
        for obj in target.iter('object'):
            difficult = int(obj.find('difficult').text) == 1
            if not self.keep_difficult and difficult:
                continue
            name = obj.find('name').text.lower().strip()
            bbox = obj.find('bndbox')

            pts = ['xmin', 'ymin', 'xmax', 'ymax']
            bndbox = []
            for i, pt in enumerate(pts):
                cur_pt = int(float(bbox.find(pt).text)) - 1
                # scale height or width
                cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
                bndbox.append(cur_pt)
            label_idx = self.class_to_ind[name]
            bndbox.append(label_idx)
            res += [bndbox]  # [xmin, ymin, xmax, ymax, label_ind]
            # img_id = target.find('filename').text[:-4]

        return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]

class VOCDetection(data.Dataset):
    def __init__(self, root, transform=None, 
                 target_transform=VOCAnnotationTransform(), 
                 dataset_name='line_dataset1'):
        
        self.root = root
        self.transform = transform
        self.target_transform = target_transform
        self.name = dataset_name
        self._annopath = os.path.join('%s', 'Annotations', '%s.xml')
        self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg')
        
        self.ids = list()
        for line in open(os.path.join(self.root, 'ImageSets', 'Main', 'default.txt')):
            self.ids.append((self.root, line.strip()))
    
    def __getitem__(self, index):
        im, gt, h, w = self.pull_item(index)
        #im, boxes, labels, h, w = self.pull_item(index)
        return im, gt
        #return im, boxes, labels
    
    def __len__(self):
        return len(self.ids)
    
    def pull_item(self, index):
        img_id = self.ids[index]
        
        target = ET.parse(self._annopath % img_id).getroot()
        img = cv2.imread(self._imgpath % img_id)
        height, width, channels = img.shape
        
        if self.target_transform is not None:
            target = self.target_transform(target, width, height)
        
        if self.transform is not None:
            target = np.array(target)
            img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
            # img, boxes, labels = self.transform(img, target[:4], target[4])
            # to rgb
            img = img[:, :, (2, 1, 0)]
            # img = img.transpose(2, 0, 1)
            boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
            boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
            target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
        return torch.from_numpy(img).permute(2, 0, 1), target, height, width
    
    def pull_image(self, index):
        # Return the original image object at index in PIL form
        img_id = self.ids[index]
        return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
    
    def pull_anno(self, index):
        # Returns the original annotation of image at index
        img_id = self.ids[index]
        anno = ET.parse(self._annopath % img_id).getroot()
        gt = self.target_transform(anno, 1, 1)
        return img_id[1], gt

    def pull_tensor(self, index):
        # Returns the original image at an index in tensor form
        return torch.Tensor(self.pull_image(index)).unsqueeze_(0)


In [5]:
dataset = VOCDetection(root='/home/broiron/broiron/model_train/ssd-pytorch-custom/line_dataset2/',
                        transform=SSDAugmentation(size=300, mean=(104, 117, 123)),
                       target_transform=VOCAnnotationTransform()
                      )

In [6]:
base_dir = '/home/broiron/broiron/model_train/ssd_pytorch/'

In [7]:
ssd_net = build_ssd('train', 300, num_classes=2)
net = ssd_net

In [8]:
vgg_weights = torch.load(os.path.join(base_dir, 'weights/vgg16_reducedfc.pth'))
print("loading base network...")
ssd_net.vgg.load_state_dict(vgg_weights)

net = net.to(device)

loading base network...


In [9]:
# setting with default value
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9,
                          weight_decay=5e-4) 
'''
criterion = MultiBoxLoss(num_classes=2, overlap_thresh=0.5, prior_for_matching=True, 
                         bkg_label=1, neg_mining=True, neg_pos=3, neg_overlap=0.5, 
                         encode_target=False, use_gpu=True)
'''

criterion = MultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, False, True)

In [10]:
def detection_collate(batch):
    """Custom collate fn for dealing with batches of images that have a different
    number of associated object annotations (bounding boxes).
    Arguments:
        batch: (tuple) A tuple of tensor images and lists of annotations
    Return:
        A tuple containing:
            1) (tensor) batch of images stacked on their 0 dim
            2) (list of tensors) annotations for a given image are stacked on 0 dim
    """
    targets = []
    imgs = []
    for sample in batch:
        imgs.append(sample[0])
        targets.append(torch.FloatTensor(sample[1]))
    return torch.stack(imgs, 0), targets

In [11]:
net.train()

loc_loss = 0
conf_loss = 0
epoch = 0
batch_size = 32

print('loading dataset...')

epoch_size = len(dataset) // batch_size

print('Training SSD on: ', dataset.name)

step_index = 0

data_loader = data.DataLoader(dataset, batch_size, num_workers=4, shuffle=False, collate_fn=detection_collate,
                             pin_memory=True)
print('Data loader length...', len(data_loader))

loading dataset...
Training SSD on:  line_dataset1
Data loader length... 8


In [12]:
batch_iterator = iter(data_loader)
for iteration in range(0, 120000):
    loc_loss = 0
    conf_loss = 0
    epoch += 1
    
    
    images, targets= next(batch_iterator)
    with torch.no_grad():
        images = Variable(images.to(device))
        targets = [Variable(ann.to(device)) for ann in targets]
    
    # forward
    t0 = time.time()
    out = net(images)
    
    # backward
    optimizer.zero_grad()
    loss_l, loss_c = criterion(out, targets)
    loss = loss_l + loss_c
    loss.backward()
    optimizer.step()
    t1 = time.time()

    loc_loss += loss_l.data.item()
    conf_loss += loss_c.data.item()
    
    if iteration % 1 == 0:
        print('timer: %.4f sec.' % (t1 - t0))
        print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ')
    
    if iteration != 0 and iteration % 5000 == 0:
        print('Saving state, iter:', iteration)
        torch.save(ssd_net.state_dict(), 'weights/ssd300_line_' +
                       repr(iteration) + '.pth')
torch.save(ssd_net.state_dict(), './weights/linedataset1'+ '.pth')

  mode = random.choice(self.sample_options)
  mode = random.choice(self.sample_options)


IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/broiron/anaconda3/envs/ssd_train/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/broiron/anaconda3/envs/ssd_train/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/broiron/anaconda3/envs/ssd_train/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_56242/1694714558.py", line 55, in __getitem__
    im, boxes, labels, h, w = self.pull_item(index)
  File "/tmp/ipykernel_56242/1694714558.py", line 75, in pull_item
    img, boxes, labels = self.transform(img, target[:4], target[4])
IndexError: index 4 is out of bounds for axis 0 with size 1


In [None]:
epoch = 100
for epoch in range(0, epoch):
    for batch, data in enumerate(data_loader, 1):
        loc_loss = 0
        conf_loss = 0
        epoch += 1
        
        images, targets = data[0], data[1]
        
        '''
        with torch.no_grad():
        images = Variable(images.to(device))
        targets = [Variable(ann.to(device)) for ann in targets]
        '''
        