In [102]:
import os
import sys
import cv2
import torch

import pybboxes as pbx

In [103]:
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))
# sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src/data')))
# sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src/utils')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src/vision')))

In [104]:
# import cv_utils
# import file_utils as futils
# import data_loader as dload
# import vision
from vision.engine import train_one_epoch, evaluate
from vision.v_utils import collate_fn

In [105]:
classes_list = ["Drone", "Background"]

In [106]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f551be579f0>

In [107]:
class YoloDroneTorchDataset(torch.utils.data.Dataset):
    """A class to construct a PyTorch dataset from a Drone Yolo dataset.
    
    Args:
        split: train, test or val
        transforms (None): a list of PyTorch transforms to apply to images and targets when loading
    """

    def __init__(
        self,
        dataset_path='.',
        split='test',
        transforms=None,
        classes=classes_list,
    ):
        self.split = split
        self.transforms = transforms
        self.classes = classes

        self.images_path = []
        self.labels_path = []

        if self.classes[0] != "background":
            self.classes = ["background"] + self.classes

        self.labels_map_rev = {c: i for i, c in enumerate(self.classes)}

        images_folder = os.path.join(dataset_path, 'images',split)
        labels_folder = os.path.join(dataset_path, 'labels', split)
        for image in os.listdir(images_folder):
          img_path = os.path.join(images_folder, image)
        
          label = image.split('.')[0] + '.txt'
          label_path = os.path.join(labels_folder, label)
          with open(label_path, 'r') as f:
            label_lines = f.readlines()
          if len(label_lines) != 0:   
            self.labels_path.append(label_path)
            self.images_path.append(img_path)

    def __getitem__(self, idx):
        # reading the images and converting them to correct color  
        img_path = self.images_path[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # # prepairing target
        label_path = self.labels_path[idx] 
        with open(label_path, 'r') as f:
            label_lines = f.readlines() 
        
        # cv2 image gives size as height x width    
        wt = img.shape[1]
        ht = img.shape[0]

        boxes = []
        labels = []

        # detections = sample[self.gt_field].detections
        for label in label_lines:
            class_id, x_center, y_center, bbox_width, bbox_height = map(float, label.split())
            yolo_bbox = (x_center, y_center, bbox_width, bbox_height)
            voc_bbox = pbx.convert_bbox(yolo_bbox, from_type="yolo", to_type="voc", image_size=(wt, ht))

            boxes.append([voc_bbox[0], voc_bbox[1], voc_bbox[2], voc_bbox[3]])
            # boxes.append([x_center * wt, y_center * ht, (x_center + bbox_width) * wt, (x_center + bbox_height) * ht])
            labels.append(1)  # drone class

        
        # applying augmentations
        if self.transforms is not None:
            transformed = self.transforms(image=img,bboxes=boxes, category_ids=labels)
            img = transformed["image"]
            boxes = transformed["bboxes"]
            labels = transformed["category_ids"]

        # convert boxes into a torch.Tensor                
        boxes = torch.as_tensor(boxes, dtype=torch.float32)                
            
        target = {}
        target["boxes"] = boxes
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        target["image_id"] = torch.as_tensor([idx])

        if len(boxes) != 0:
            # getting the areas of the boxes
            target["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        target["iscrowd"] = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        return img, target

    def __len__(self):
        return len(self.images_path)

    def get_classes(self):
        return self.classes
    


In [108]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [109]:
bbox_format = 'pascal_voc'

train_transform = A.Compose(
    [
        A.LongestMaxSize(320),
        # A.PadIfNeeded(min_height=320, min_width=320, border_mode=0),
        A.RandomSizedBBoxSafeCrop(width=300, height=300, erosion_rate=0.1),
     
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.3),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.3),
        A.ToFloat(max_value=255, p=1, always_apply=True),

        ToTensorV2(p=1.0)
    ],
    bbox_params=A.BboxParams(format=bbox_format, label_fields=['category_ids']),
)

test_transform = A.Compose(
    [
        A.LongestMaxSize(300),
        # A.PadIfNeeded(min_height=300, min_width=300, border_mode=0),
        A.ToFloat(max_value=255, p=1, always_apply=True),

        ToTensorV2(p=1.0)
    ],
    bbox_params=A.BboxParams(format=bbox_format, label_fields=['category_ids']),
)

In [110]:
yolo_train_dataset = YoloDroneTorchDataset('../dataset/yolo', 'train', test_transform) 
yolo_test_dataset = YoloDroneTorchDataset('../dataset/yolo', 'test', test_transform)
yolo_val_dataset = YoloDroneTorchDataset('../dataset/yolo', 'val')

In [111]:
print(len(yolo_train_dataset), len(yolo_test_dataset), len(yolo_val_dataset))

12630 2706 2708


In [112]:
import torchvision
model = torchvision.models.detection.ssd300_vgg16(pretrained=True)

In [113]:
from torchvision.models.detection.ssd import SSDHead

head = SSDHead(in_channels=[512, 1024, 512, 256, 256, 256] , num_anchors=[4,6,6,6,4,4] , num_classes=2)
model.head = head

In [114]:
bs = 2
test_bs = 1
num_epochs = 10
learning_rate = 0.0005

In [115]:
data_loader_train = torch.utils.data.DataLoader(
    yolo_train_dataset, batch_size=bs, shuffle=True, num_workers=2,
    collate_fn=collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    yolo_test_dataset, batch_size=test_bs, shuffle=False, num_workers=2,
    collate_fn=collate_fn)

# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Using device %s" % device)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=learning_rate,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

Using device cuda


In [122]:
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10)

    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

  target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
  target["labels"] = torch.as_tensor(labels, dtype=torch.int64)


Epoch: [0]  [   0/6315]  eta: 2:22:24  lr: 0.000000  loss: 3.8915 (3.8915)  bbox_regression: 1.5395 (1.5395)  classification: 2.3520 (2.3520)  time: 1.3531  data: 0.1175  max mem: 1412
Epoch: [0]  [  10/6315]  eta: 1:21:37  lr: 0.000000  loss: 3.7060 (3.6307)  bbox_regression: 1.4655 (1.3730)  classification: 2.2405 (2.2577)  time: 0.7767  data: 0.0195  max mem: 1412
Epoch: [0]  [  20/6315]  eta: 1:18:20  lr: 0.000000  loss: 3.5191 (3.6557)  bbox_regression: 1.2150 (1.3795)  classification: 2.2391 (2.2762)  time: 0.7163  data: 0.0087  max mem: 1412
Epoch: [0]  [  30/6315]  eta: 1:17:05  lr: 0.000000  loss: 3.6292 (3.9853)  bbox_regression: 1.3169 (1.6614)  classification: 2.2849 (2.3239)  time: 0.7135  data: 0.0080  max mem: 1412
Epoch: [0]  [  40/6315]  eta: 1:16:23  lr: 0.000001  loss: 3.5922 (3.9404)  bbox_regression: 1.3169 (1.6294)  classification: 2.2705 (2.3110)  time: 0.7134  data: 0.0079  max mem: 1412
Epoch: [0]  [  50/6315]  eta: 1:15:56  lr: 0.000001  loss: 3.1169 (3.7961) 

ValueError: Caught ValueError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_1073/955831022.py", line 72, in __getitem__
    transformed = self.transforms(image=img,bboxes=boxes, category_ids=labels)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/composition.py", line 346, in __call__
    self.preprocess(data)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/composition.py", line 380, in preprocess
    p.preprocess(data)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/utils.py", line 160, in preprocess
    data[data_name] = self.check_and_convert(data[data_name], image_shape, direction="to")
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/utils.py", line 174, in check_and_convert
    return process_func(data, image_shape)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/bbox_utils.py", line 155, in convert_to_albumentations
    return convert_bboxes_to_albumentations(data, self.params.format, image_shape, check_validity=True)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/augmentations/utils.py", line 180, in wrapper
    return func(array, *args, **kwargs)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/bbox_utils.py", line 294, in convert_bboxes_to_albumentations
    check_bboxes(converted_bboxes)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/augmentations/utils.py", line 180, in wrapper
    return func(array, *args, **kwargs)
  File "/home/jacksonrr3/hse/hse_dl_project/.venv/lib/python3.8/site-packages/albumentations/core/bbox_utils.py", line 371, in check_bboxes
    raise ValueError(
ValueError: Expected y_min for bbox [ 0.8979167  -0.02708333  0.9625      0.025       1.        ] to be in the range [0.0, 1.0], got -0.02708333358168602.


In [123]:
torch.save(model.state_dict(), "../models/ssd/ssd300_drone_10epochs.pth")

In [None]:
counter = 0
for s in ['train', 'test', 'val']:
    labels_folder = os.path.join('../dataset/yolo/labels/',  s)
    for label_file in os.listdir(labels_folder):
        label_path = os.path.join(labels_folder, label_file)
        with open(label_path, 'r') as f:
            label_lines = f.readlines() 

        for label in label_lines:
            class_id, x_center, y_center, bbox_width, bbox_height = map(float, label.split())
            yolo_bbox = (x_center, y_center, bbox_width, bbox_height)
            voc_bbox = pbx.convert_bbox(yolo_bbox, from_type="yolo", to_type="voc", image_size=(480, 480))
            for b in voc_bbox:
                if b > 0 and b < 1:
                    print(label_path)
                    print(voc_bbox)
                    counter += 1        



print(counter)

0
