In [1]:
# Thanks to: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
# Google colab implemntation: https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=6-NFR--2fXV3


from torch.utils.data import Dataset
import torch
import torchvision.transforms as T
from torch.utils.data import DataLoader
import pandas as pd
from PIL import Image, ImageDraw

In [2]:
class RipCurrentDataset(Dataset):
    """ Rip current detector dataset. """

    def __init__(self, dframe, image_dir, transform=None):
        """

        :param dframe: Dataframe object of csv file "aug_data_label.csv" generated by fix_size_and_aug
        :param image_dir: path where all fixed size and augment images are saved
        :param transform: the transform to be operated converting PIL image to torch tensor
        """
        super().__init__()

        self.df = dframe
        self.images_ids = self.df['Name'].unique()
        data_dict = {}
        
        for d in dframe.to_dict(orient='records'):
            data_dict[d['Name']] = {k: v if k=='Name' else int(v) for k, v in d.items()}
        
        self.data_dict = data_dict
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return self.images_ids.shape[0]

    def __getitem__(self, item):
        """
        Parameters
        ----------
        item : int
            id number to get one image from the dataset

        Returns
        -------

        img_tensor: torch.tensor
            Image as torch tensor object ready to be inserted into the Deep Neural Network

        target: dictionary
            Python dictionary contains image id, bounding box location (x1, y1, x2, y2) and label 0 - no rip, 1 - rip

        """
        img_name = self.images_ids[item]
        img_data = self.data_dict[img_name]
        
        box = [img_data[key] for key in ['x1', 'y1', 'x2', 'y2']]
        boxes = [box]
        
        area = abs(box[0] - box[2])*abs(box[1]-box[3])
        areas = [area]
        
        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        
        label = torch.tensor(img_data['label'], dtype=torch.int64)

        img = Image.open(self.image_dir + img_name).convert("RGB")
        img_tensor = self.transform(img) 
        
        target = {
            'image_id': torch.tensor([item]),
            'boxes': boxes, 
#            0 labels for pics without the rip current
            'labels': torch.tensor((img_data['label'],), dtype=torch.int64),
            'iscrowd': torch.tensor((0,), dtype=torch.uint8),   
            'area': areas
            
            
        }
        
        
        
        
#         x1, y1, x2, y2 = [torch.tensor(img_data[key], dtype=torch.int64) for key in ['x1', 'y1', 'x2', 'y2']]
        
        
        
        
#         img_data = self.df[self.df['Name'] == img_name]



#         x1, y1, x2, y2 = torch.tensor(img_data['x1'].values, dtype=torch.int64), torch.tensor(img_data['y1'].values, dtype=torch.int64), \
#                          torch.tensor(img_data['x2'].values, dtype=torch.int64), torch.tensor(img_data['y2'].values, dtype=torch.int64)
#         label = torch.tensor(img_data['label'].values, dtype=torch.int64)


#         target['image_id'] = torch.tensor(item)

#         if label == 1:
#             target['box'] = torch.cat((x1.unsqueeze(0), y1.unsqueeze(0), x2.unsqueeze(0), y2.unsqueeze(0)), dim=1)[0]
#         else:
#             target['box'] = torch.zeros((0, 4), dtype=torch.int64)

#         target['labels'] = label[0]

        return img_tensor, target


In [3]:
import torchvision
torchvision.__version__

'0.13.0+cu102'

## 1 - Finetuning from a pretrained model

In [4]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor,FasterRCNN_ResNet50_FPN_Weights

# load a model pre-trained on COCO
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

#This line is different from the toturial because of a deprecation warning
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (rip current) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

## 2 - Modifying the model to add a different backbone

In [5]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features

# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))


# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.

# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)



## Putting everything together

In [6]:
# import transforms as T

# def get_transform(train):
# #     transforms = []
#     transforms.append(T.ToTensor())
#     if train:
#         transforms.append(T.RandomHorizontalFlip(0.5))
#     return T.Compose(transforms)

### Testing forward() method

In [7]:
from pathlib import Path
from torch.utils.data import DataLoader
from vision_utils import utils
import torchvision.transforms as T

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
# data_loader = torch.utils.data.DataLoader(
#  dataset, batch_size=2, shuffle=True, num_workers=4,
#  collate_fn=utils.collate_fn)

# data_path = Path(r'..\Data')
data_path = Path('/home/giora/rip_current_detector/augmanted_training_data')

# df = pd.read_csv(data_path/'aug_data_labels.csv')
df = pd.read_csv('aug_data_labels.csv')
df = df[df['label'] == 1]

# img_dir = str(data_path/'fixed_data') + '\\'
img_dir = str(data_path) + '/'
trans = T.ToTensor()

train_ds = RipCurrentDataset(df, img_dir, trans)
data_loader = DataLoader(train_ds, batch_size=5, shuffle=True, collate_fn=utils.collate_fn)







In [8]:
data_path


PosixPath('/home/giora/rip_current_detector/augmanted_training_data')

In [9]:

# For Training
images,targets = next(iter(data_loader))

In [10]:

images = list(image for image in images)

targets = [{k: v for k, v in t.items()} for t in targets]

output = model(images,targets)   # Returns losses and detections

# For inference
model.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]




FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [11]:
x = [images[1], images[2]]
predictions= model(x)
# predictions

In [12]:
%matplotlib notebook

In [13]:
import torchvision.transforms as T
from PIL import Image, ImageDraw
from matplotlib.pyplot import imshow
import  matplotlib.pyplot as plt


def draw_rect(image, box):
    """Present image with bounding box on top

    Parameters
    ----------
    image : numpy.ndarray
        numpy image

    box: numpy.ndarray
        Numpy array containing bounding boxes of shape `1 X 4` and the bounding boxes are represented in the
        format `x1 y1 x2 y2`

    Returns
    -------

    open new figure with image and bounding box

    """
    box = box[0]
    x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
    
    img1 = ImageDraw.Draw(image)
    shape = [(x1, y1), (x2, y2)]
    img1.rectangle(shape, outline="red", width=4)
    
    display(image)




idx = 1
img = T.ToPILImage()(x[idx].squeeze_(0))

# draw_rect(img, new_targets[0]['boxes'])
for box, label in zip(predictions[idx]['boxes'], predictions[idx]['labels']):
    if label == 1:
        draw_rect(img, [box])


##  Putting everything together

In [25]:
dataset = RipCurrentDataset(df, img_dir, trans)


# split the dataset in train and test set
torch.manual_seed(1)

indices = torch.randperm(len(dataset)).tolist()

dataset = torch.utils.data.Subset(dataset, indices[:-200])

dataset_test = torch.utils.data.Subset(dataset, indices[-200:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)






##  instantiate the model and the optimizer

In [15]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
# model = get_instance_segmentation_model(num_classes)


# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

device

device(type='cuda')

In [16]:
import sys
if 'vision_utils' not in sys.path:
    sys.path.append('vision_utils')


In [None]:
# https://stackoverflow.com/questions/48111847/tensorflow-object-detection-api-what-do-the-losses-mean-in-the-object-detectio

In [26]:

# let's train it for 10 epochs
from torch.optim.lr_scheduler import StepLR
from engine import train_one_epoch, evaluate
num_epochs = 10
torch.cuda.empty_cache()

for epoch in range(2):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

Epoch: [0]  [   0/4453]  eta: 0:20:49  lr: 0.000010  loss: 0.1615 (0.1615)  loss_classifier: 0.0394 (0.0394)  loss_box_reg: 0.1207 (0.1207)  loss_objectness: 0.0010 (0.0010)  loss_rpn_box_reg: 0.0004 (0.0004)  time: 0.2807  data: 0.1428  max mem: 6724
Epoch: [0]  [  10/4453]  eta: 0:08:36  lr: 0.000060  loss: 0.1615 (0.1535)  loss_classifier: 0.0412 (0.0406)  loss_box_reg: 0.1051 (0.1004)  loss_objectness: 0.0012 (0.0082)  loss_rpn_box_reg: 0.0042 (0.0043)  time: 0.1162  data: 0.0150  max mem: 6724
Epoch: [0]  [  20/4453]  eta: 0:07:57  lr: 0.000110  loss: 0.1452 (0.1592)  loss_classifier: 0.0403 (0.0422)  loss_box_reg: 0.1039 (0.1061)  loss_objectness: 0.0029 (0.0072)  loss_rpn_box_reg: 0.0031 (0.0037)  time: 0.0990  data: 0.0021  max mem: 6724
Epoch: [0]  [  30/4453]  eta: 0:07:41  lr: 0.000160  loss: 0.1434 (0.1559)  loss_classifier: 0.0403 (0.0437)  loss_box_reg: 0.0965 (0.1034)  loss_objectness: 0.0029 (0.0057)  loss_rpn_box_reg: 0.0016 (0.0031)  time: 0.0980  data: 0.0020  max me

Epoch: [0]  [ 330/4453]  eta: 0:06:48  lr: 0.001658  loss: 0.1261 (0.1616)  loss_classifier: 0.0334 (0.0485)  loss_box_reg: 0.0889 (0.1049)  loss_objectness: 0.0013 (0.0050)  loss_rpn_box_reg: 0.0021 (0.0032)  time: 0.0997  data: 0.0020  max mem: 6724
Epoch: [0]  [ 340/4453]  eta: 0:06:47  lr: 0.001708  loss: 0.1576 (0.1628)  loss_classifier: 0.0420 (0.0489)  loss_box_reg: 0.0889 (0.1057)  loss_objectness: 0.0016 (0.0050)  loss_rpn_box_reg: 0.0024 (0.0033)  time: 0.0990  data: 0.0019  max mem: 6724
Epoch: [0]  [ 350/4453]  eta: 0:06:46  lr: 0.001758  loss: 0.1607 (0.1628)  loss_classifier: 0.0551 (0.0489)  loss_box_reg: 0.1017 (0.1056)  loss_objectness: 0.0023 (0.0050)  loss_rpn_box_reg: 0.0025 (0.0033)  time: 0.0977  data: 0.0016  max mem: 6724
Epoch: [0]  [ 360/4453]  eta: 0:06:45  lr: 0.001808  loss: 0.1401 (0.1629)  loss_classifier: 0.0398 (0.0491)  loss_box_reg: 0.0786 (0.1055)  loss_objectness: 0.0023 (0.0050)  loss_rpn_box_reg: 0.0025 (0.0033)  time: 0.0973  data: 0.0016  max me

Epoch: [0]  [ 660/4453]  eta: 0:06:14  lr: 0.003307  loss: 0.1760 (0.1689)  loss_classifier: 0.0581 (0.0523)  loss_box_reg: 0.1095 (0.1080)  loss_objectness: 0.0036 (0.0052)  loss_rpn_box_reg: 0.0023 (0.0034)  time: 0.0989  data: 0.0017  max mem: 6724
Epoch: [0]  [ 670/4453]  eta: 0:06:13  lr: 0.003357  loss: 0.1836 (0.1690)  loss_classifier: 0.0531 (0.0523)  loss_box_reg: 0.1115 (0.1081)  loss_objectness: 0.0013 (0.0052)  loss_rpn_box_reg: 0.0022 (0.0034)  time: 0.0985  data: 0.0016  max mem: 6724
Epoch: [0]  [ 680/4453]  eta: 0:06:12  lr: 0.003407  loss: 0.1788 (0.1693)  loss_classifier: 0.0531 (0.0525)  loss_box_reg: 0.1169 (0.1083)  loss_objectness: 0.0010 (0.0052)  loss_rpn_box_reg: 0.0017 (0.0034)  time: 0.0985  data: 0.0016  max mem: 6724
Epoch: [0]  [ 690/4453]  eta: 0:06:11  lr: 0.003457  loss: 0.1874 (0.1697)  loss_classifier: 0.0569 (0.0526)  loss_box_reg: 0.1299 (0.1087)  loss_objectness: 0.0014 (0.0052)  loss_rpn_box_reg: 0.0015 (0.0033)  time: 0.0986  data: 0.0016  max me

Epoch: [0]  [ 990/4453]  eta: 0:05:44  lr: 0.004955  loss: 0.1588 (0.1760)  loss_classifier: 0.0484 (0.0554)  loss_box_reg: 0.0971 (0.1119)  loss_objectness: 0.0021 (0.0054)  loss_rpn_box_reg: 0.0019 (0.0033)  time: 0.1010  data: 0.0019  max mem: 6724
Epoch: [0]  [1000/4453]  eta: 0:05:43  lr: 0.005000  loss: 0.1703 (0.1763)  loss_classifier: 0.0582 (0.0556)  loss_box_reg: 0.1021 (0.1119)  loss_objectness: 0.0017 (0.0055)  loss_rpn_box_reg: 0.0030 (0.0034)  time: 0.1010  data: 0.0019  max mem: 6724
Epoch: [0]  [1010/4453]  eta: 0:05:42  lr: 0.005000  loss: 0.1508 (0.1762)  loss_classifier: 0.0499 (0.0557)  loss_box_reg: 0.0924 (0.1117)  loss_objectness: 0.0012 (0.0055)  loss_rpn_box_reg: 0.0027 (0.0034)  time: 0.1011  data: 0.0019  max mem: 6724
Epoch: [0]  [1020/4453]  eta: 0:05:41  lr: 0.005000  loss: 0.1445 (0.1759)  loss_classifier: 0.0420 (0.0556)  loss_box_reg: 0.0924 (0.1115)  loss_objectness: 0.0017 (0.0055)  loss_rpn_box_reg: 0.0023 (0.0034)  time: 0.1011  data: 0.0019  max me

Epoch: [0]  [1320/4453]  eta: 0:05:11  lr: 0.005000  loss: 0.1556 (0.1761)  loss_classifier: 0.0449 (0.0561)  loss_box_reg: 0.0989 (0.1104)  loss_objectness: 0.0025 (0.0060)  loss_rpn_box_reg: 0.0017 (0.0035)  time: 0.0992  data: 0.0017  max mem: 6724
Epoch: [0]  [1330/4453]  eta: 0:05:10  lr: 0.005000  loss: 0.1625 (0.1761)  loss_classifier: 0.0411 (0.0561)  loss_box_reg: 0.1056 (0.1104)  loss_objectness: 0.0053 (0.0060)  loss_rpn_box_reg: 0.0028 (0.0035)  time: 0.0987  data: 0.0017  max mem: 6724
Epoch: [0]  [1340/4453]  eta: 0:05:09  lr: 0.005000  loss: 0.1563 (0.1760)  loss_classifier: 0.0422 (0.0560)  loss_box_reg: 0.1056 (0.1104)  loss_objectness: 0.0053 (0.0060)  loss_rpn_box_reg: 0.0025 (0.0035)  time: 0.0985  data: 0.0016  max mem: 6724
Epoch: [0]  [1350/4453]  eta: 0:05:08  lr: 0.005000  loss: 0.1505 (0.1758)  loss_classifier: 0.0443 (0.0560)  loss_box_reg: 0.0988 (0.1103)  loss_objectness: 0.0019 (0.0060)  loss_rpn_box_reg: 0.0023 (0.0035)  time: 0.0985  data: 0.0016  max me

Epoch: [0]  [1650/4453]  eta: 0:04:38  lr: 0.005000  loss: 0.1333 (0.1746)  loss_classifier: 0.0365 (0.0554)  loss_box_reg: 0.1008 (0.1098)  loss_objectness: 0.0011 (0.0059)  loss_rpn_box_reg: 0.0026 (0.0035)  time: 0.0995  data: 0.0018  max mem: 6724
Epoch: [0]  [1660/4453]  eta: 0:04:37  lr: 0.005000  loss: 0.1328 (0.1747)  loss_classifier: 0.0323 (0.0553)  loss_box_reg: 0.0950 (0.1099)  loss_objectness: 0.0020 (0.0060)  loss_rpn_box_reg: 0.0034 (0.0035)  time: 0.0992  data: 0.0017  max mem: 6724
Epoch: [0]  [1670/4453]  eta: 0:04:36  lr: 0.005000  loss: 0.1624 (0.1746)  loss_classifier: 0.0328 (0.0553)  loss_box_reg: 0.1052 (0.1099)  loss_objectness: 0.0026 (0.0059)  loss_rpn_box_reg: 0.0033 (0.0035)  time: 0.0992  data: 0.0018  max mem: 6724
Epoch: [0]  [1680/4453]  eta: 0:04:35  lr: 0.005000  loss: 0.1519 (0.1745)  loss_classifier: 0.0328 (0.0552)  loss_box_reg: 0.1024 (0.1098)  loss_objectness: 0.0014 (0.0059)  loss_rpn_box_reg: 0.0020 (0.0035)  time: 0.0994  data: 0.0018  max me

Epoch: [0]  [1980/4453]  eta: 0:04:06  lr: 0.005000  loss: 0.1560 (0.1739)  loss_classifier: 0.0480 (0.0553)  loss_box_reg: 0.1034 (0.1094)  loss_objectness: 0.0014 (0.0058)  loss_rpn_box_reg: 0.0018 (0.0034)  time: 0.1016  data: 0.0020  max mem: 6724
Epoch: [0]  [1990/4453]  eta: 0:04:05  lr: 0.005000  loss: 0.1756 (0.1740)  loss_classifier: 0.0484 (0.0553)  loss_box_reg: 0.1111 (0.1095)  loss_objectness: 0.0024 (0.0058)  loss_rpn_box_reg: 0.0024 (0.0035)  time: 0.1018  data: 0.0021  max mem: 6724
Epoch: [0]  [2000/4453]  eta: 0:04:04  lr: 0.005000  loss: 0.1756 (0.1740)  loss_classifier: 0.0479 (0.0553)  loss_box_reg: 0.1184 (0.1095)  loss_objectness: 0.0025 (0.0058)  loss_rpn_box_reg: 0.0031 (0.0035)  time: 0.1015  data: 0.0021  max mem: 6724
Epoch: [0]  [2010/4453]  eta: 0:04:03  lr: 0.005000  loss: 0.1729 (0.1741)  loss_classifier: 0.0461 (0.0553)  loss_box_reg: 0.1193 (0.1096)  loss_objectness: 0.0034 (0.0057)  loss_rpn_box_reg: 0.0031 (0.0035)  time: 0.1011  data: 0.0020  max me

Epoch: [0]  [2310/4453]  eta: 0:03:33  lr: 0.005000  loss: 0.1625 (0.1737)  loss_classifier: 0.0451 (0.0549)  loss_box_reg: 0.1070 (0.1097)  loss_objectness: 0.0010 (0.0057)  loss_rpn_box_reg: 0.0019 (0.0034)  time: 0.1010  data: 0.0019  max mem: 6724
Epoch: [0]  [2320/4453]  eta: 0:03:32  lr: 0.005000  loss: 0.1559 (0.1736)  loss_classifier: 0.0406 (0.0549)  loss_box_reg: 0.1070 (0.1097)  loss_objectness: 0.0010 (0.0057)  loss_rpn_box_reg: 0.0019 (0.0034)  time: 0.1012  data: 0.0020  max mem: 6724
Epoch: [0]  [2330/4453]  eta: 0:03:31  lr: 0.005000  loss: 0.1559 (0.1736)  loss_classifier: 0.0440 (0.0548)  loss_box_reg: 0.1097 (0.1097)  loss_objectness: 0.0018 (0.0057)  loss_rpn_box_reg: 0.0024 (0.0034)  time: 0.1012  data: 0.0020  max mem: 6724
Epoch: [0]  [2340/4453]  eta: 0:03:30  lr: 0.005000  loss: 0.1879 (0.1736)  loss_classifier: 0.0484 (0.0548)  loss_box_reg: 0.1198 (0.1097)  loss_objectness: 0.0012 (0.0057)  loss_rpn_box_reg: 0.0032 (0.0034)  time: 0.1012  data: 0.0020  max me

Epoch: [0]  [2640/4453]  eta: 0:03:01  lr: 0.005000  loss: 0.1654 (0.1726)  loss_classifier: 0.0470 (0.0544)  loss_box_reg: 0.1072 (0.1093)  loss_objectness: 0.0008 (0.0056)  loss_rpn_box_reg: 0.0031 (0.0034)  time: 0.0991  data: 0.0016  max mem: 6724
Epoch: [0]  [2650/4453]  eta: 0:03:00  lr: 0.005000  loss: 0.1591 (0.1725)  loss_classifier: 0.0466 (0.0544)  loss_box_reg: 0.1006 (0.1092)  loss_objectness: 0.0023 (0.0056)  loss_rpn_box_reg: 0.0031 (0.0034)  time: 0.0992  data: 0.0016  max mem: 6724
Epoch: [0]  [2660/4453]  eta: 0:02:59  lr: 0.005000  loss: 0.1482 (0.1725)  loss_classifier: 0.0466 (0.0543)  loss_box_reg: 0.0961 (0.1092)  loss_objectness: 0.0016 (0.0056)  loss_rpn_box_reg: 0.0026 (0.0034)  time: 0.0991  data: 0.0017  max mem: 6724
Epoch: [0]  [2670/4453]  eta: 0:02:58  lr: 0.005000  loss: 0.1591 (0.1724)  loss_classifier: 0.0481 (0.0543)  loss_box_reg: 0.1098 (0.1092)  loss_objectness: 0.0013 (0.0056)  loss_rpn_box_reg: 0.0013 (0.0034)  time: 0.0989  data: 0.0016  max me

Epoch: [0]  [2970/4453]  eta: 0:02:28  lr: 0.005000  loss: 0.1206 (0.1722)  loss_classifier: 0.0371 (0.0542)  loss_box_reg: 0.0782 (0.1091)  loss_objectness: 0.0012 (0.0055)  loss_rpn_box_reg: 0.0011 (0.0034)  time: 0.1013  data: 0.0020  max mem: 6724
Epoch: [0]  [2980/4453]  eta: 0:02:27  lr: 0.005000  loss: 0.1299 (0.1721)  loss_classifier: 0.0360 (0.0542)  loss_box_reg: 0.0861 (0.1091)  loss_objectness: 0.0005 (0.0055)  loss_rpn_box_reg: 0.0009 (0.0034)  time: 0.1014  data: 0.0020  max mem: 6724
Epoch: [0]  [2990/4453]  eta: 0:02:26  lr: 0.005000  loss: 0.1433 (0.1720)  loss_classifier: 0.0380 (0.0541)  loss_box_reg: 0.1000 (0.1091)  loss_objectness: 0.0009 (0.0055)  loss_rpn_box_reg: 0.0017 (0.0034)  time: 0.1014  data: 0.0020  max mem: 6724
Epoch: [0]  [3000/4453]  eta: 0:02:25  lr: 0.005000  loss: 0.1504 (0.1720)  loss_classifier: 0.0422 (0.0541)  loss_box_reg: 0.1067 (0.1091)  loss_objectness: 0.0010 (0.0055)  loss_rpn_box_reg: 0.0017 (0.0034)  time: 0.1015  data: 0.0020  max me

Epoch: [0]  [3300/4453]  eta: 0:01:55  lr: 0.005000  loss: 0.1657 (0.1716)  loss_classifier: 0.0460 (0.0538)  loss_box_reg: 0.1003 (0.1090)  loss_objectness: 0.0007 (0.0054)  loss_rpn_box_reg: 0.0023 (0.0033)  time: 0.0988  data: 0.0016  max mem: 6724
Epoch: [0]  [3310/4453]  eta: 0:01:54  lr: 0.005000  loss: 0.1688 (0.1716)  loss_classifier: 0.0491 (0.0538)  loss_box_reg: 0.1069 (0.1091)  loss_objectness: 0.0008 (0.0054)  loss_rpn_box_reg: 0.0024 (0.0033)  time: 0.0988  data: 0.0016  max mem: 6724
Epoch: [0]  [3320/4453]  eta: 0:01:53  lr: 0.005000  loss: 0.1765 (0.1716)  loss_classifier: 0.0480 (0.0538)  loss_box_reg: 0.1000 (0.1090)  loss_objectness: 0.0015 (0.0054)  loss_rpn_box_reg: 0.0026 (0.0033)  time: 0.0995  data: 0.0017  max mem: 6724
Epoch: [0]  [3330/4453]  eta: 0:01:52  lr: 0.005000  loss: 0.1781 (0.1715)  loss_classifier: 0.0484 (0.0538)  loss_box_reg: 0.1017 (0.1090)  loss_objectness: 0.0016 (0.0054)  loss_rpn_box_reg: 0.0038 (0.0033)  time: 0.0996  data: 0.0018  max me

Epoch: [0]  [3630/4453]  eta: 0:01:22  lr: 0.005000  loss: 0.1588 (0.1712)  loss_classifier: 0.0372 (0.0536)  loss_box_reg: 0.1056 (0.1089)  loss_objectness: 0.0027 (0.0054)  loss_rpn_box_reg: 0.0021 (0.0033)  time: 0.1022  data: 0.0021  max mem: 6724
Epoch: [0]  [3640/4453]  eta: 0:01:21  lr: 0.005000  loss: 0.1895 (0.1713)  loss_classifier: 0.0554 (0.0536)  loss_box_reg: 0.1149 (0.1089)  loss_objectness: 0.0014 (0.0054)  loss_rpn_box_reg: 0.0030 (0.0034)  time: 0.1030  data: 0.0022  max mem: 6724
Epoch: [0]  [3650/4453]  eta: 0:01:20  lr: 0.005000  loss: 0.1693 (0.1712)  loss_classifier: 0.0436 (0.0536)  loss_box_reg: 0.1116 (0.1089)  loss_objectness: 0.0014 (0.0054)  loss_rpn_box_reg: 0.0022 (0.0034)  time: 0.1030  data: 0.0022  max mem: 6724
Epoch: [0]  [3660/4453]  eta: 0:01:19  lr: 0.005000  loss: 0.1481 (0.1712)  loss_classifier: 0.0381 (0.0535)  loss_box_reg: 0.0989 (0.1089)  loss_objectness: 0.0014 (0.0054)  loss_rpn_box_reg: 0.0023 (0.0034)  time: 0.1021  data: 0.0021  max me

Epoch: [0]  [3960/4453]  eta: 0:00:49  lr: 0.005000  loss: 0.1694 (0.1712)  loss_classifier: 0.0409 (0.0533)  loss_box_reg: 0.1204 (0.1093)  loss_objectness: 0.0027 (0.0052)  loss_rpn_box_reg: 0.0020 (0.0033)  time: 0.1013  data: 0.0020  max mem: 6724
Epoch: [0]  [3970/4453]  eta: 0:00:48  lr: 0.005000  loss: 0.1437 (0.1711)  loss_classifier: 0.0377 (0.0533)  loss_box_reg: 0.0939 (0.1093)  loss_objectness: 0.0016 (0.0052)  loss_rpn_box_reg: 0.0022 (0.0033)  time: 0.1014  data: 0.0020  max mem: 6724
Epoch: [0]  [3980/4453]  eta: 0:00:47  lr: 0.005000  loss: 0.1500 (0.1712)  loss_classifier: 0.0426 (0.0534)  loss_box_reg: 0.0939 (0.1093)  loss_objectness: 0.0024 (0.0053)  loss_rpn_box_reg: 0.0022 (0.0033)  time: 0.1011  data: 0.0020  max mem: 6724
Epoch: [0]  [3990/4453]  eta: 0:00:46  lr: 0.005000  loss: 0.1780 (0.1712)  loss_classifier: 0.0449 (0.0533)  loss_box_reg: 0.1181 (0.1093)  loss_objectness: 0.0020 (0.0053)  loss_rpn_box_reg: 0.0025 (0.0033)  time: 0.1009  data: 0.0020  max me

Epoch: [0]  [4290/4453]  eta: 0:00:16  lr: 0.005000  loss: 0.1591 (0.1711)  loss_classifier: 0.0464 (0.0533)  loss_box_reg: 0.1042 (0.1093)  loss_objectness: 0.0018 (0.0052)  loss_rpn_box_reg: 0.0032 (0.0033)  time: 0.1023  data: 0.0022  max mem: 6724
Epoch: [0]  [4300/4453]  eta: 0:00:15  lr: 0.005000  loss: 0.1388 (0.1710)  loss_classifier: 0.0371 (0.0532)  loss_box_reg: 0.0925 (0.1093)  loss_objectness: 0.0014 (0.0052)  loss_rpn_box_reg: 0.0032 (0.0033)  time: 0.1026  data: 0.0022  max mem: 6724
Epoch: [0]  [4310/4453]  eta: 0:00:14  lr: 0.005000  loss: 0.1293 (0.1710)  loss_classifier: 0.0323 (0.0532)  loss_box_reg: 0.0917 (0.1093)  loss_objectness: 0.0016 (0.0052)  loss_rpn_box_reg: 0.0026 (0.0033)  time: 0.1016  data: 0.0021  max mem: 6724
Epoch: [0]  [4320/4453]  eta: 0:00:13  lr: 0.005000  loss: 0.1497 (0.1710)  loss_classifier: 0.0311 (0.0532)  loss_box_reg: 0.1036 (0.1093)  loss_objectness: 0.0016 (0.0052)  loss_rpn_box_reg: 0.0018 (0.0033)  time: 0.1011  data: 0.0020  max me

IndexError: Caught IndexError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/home/giora/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/giora/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/giora/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/giora/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataset.py", line 290, in __getitem__
    return self.dataset[self.indices[idx]]
  File "/home/giora/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataset.py", line 290, in __getitem__
    return self.dataset[self.indices[idx]]
IndexError: list index out of range


In [None]:
torch.cuda.max_memory_allocated(device=device)

In [None]:
torch.cuda.memory_summary(device=None, abbreviated=False)

In [None]:
model.eval()
x = [images[1], images[2]]
predictions= model(x)
predictions

In [None]:
idx = 1
img = T.ToPILImage()(x[idx].squeeze_(0))

# draw_rect(img, new_targets[0]['boxes'])
for box, label, score in zip(predictions[idx]['boxes'], predictions[idx]['labels'], predictions[idx]['scores']):
    if label == 1 and score > 0.6:
        draw_rect(img, [box])

In [22]:
# data_loader.dataset.dataset.images_ids.size4653
4653/9


517.0