In [1]:
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import cv2

In [3]:
import pandas as pd

df = pd.read_csv('flowers/train_labels.csv')

df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,_IGP1402.jpg,4928,3264,flower,3444,1790,4151,2783
1,_IGP1403.jpg,4928,3264,flower,2419,912,3094,1622
2,_IGP1403.jpg,4928,3264,flower,4062,1515,4537,2183
3,_IGP1404.jpg,4928,3264,flower,2708,1608,2958,1854
4,_IGP1405.jpg,4928,3264,flower,3301,401,4140,1129


In [8]:
import os
import numpy as np
import json
from detectron2.structures import BoxMode
import itertools
import cv2

def get_tomato_dicts(csv_file, img_dir):
    df = pd.read_csv(csv_file)
    df['filename'] = df['filename'].map(lambda x: img_dir+x)

    classes = ['flower']
    #df['class'] = df['class'].map(lambda x: '')
    df['class_int'] = df['class'].map(lambda x: classes.index(x))

    dataset_dicts = []
    for filename in df['filename'].unique().tolist():
        record = {}
        
        image = cv2.imread(filename)
        
        height, width, _ = image.shape
        
        record["file_name"] = filename
        record["height"] = height
        record["width"] = width

        objs = []
        for index, row in df[(df['filename']==filename)].iterrows():
          obj= {
              'bbox': [row['xmin'], row['ymin'], row['xmax'], row['ymax']],
              'bbox_mode': BoxMode.XYXY_ABS,
              'category_id': row['class_int'],
              "iscrowd": 0
          }
          objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [10]:
from detectron2.data import DatasetCatalog, MetadataCatalog

classes = ['flower']

for d in ["train", "test"]:
  DatasetCatalog.register('flower2_data/' + d, lambda d=d: get_tomato_dicts('flowers/' + d + '_labels.csv', 'flowers/' + d+'/'))
  MetadataCatalog.get('flower2_data/' + d).set(thing_classes=classes)
tomato_metadata = MetadataCatalog.get('flower2_data/train')

In [11]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.logger import setup_logger
setup_logger()
cfg = get_cfg()
cfg.merge_from_file("./detectron2/model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml")
cfg.MODEL.WEIGHTS = "model_final_59f53c.pkl"
cfg.DATASETS.TRAIN = ('flower2_data/train',)
cfg.DATASETS.TEST = ()   
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.MAX_ITER = 10000
cfg.MODEL.RETINANET.NUM_CLASSES = 1
cfg.SOLVER.BASE_LR = 0.0001
cfg.MODEL.DEVICE = "cuda:0"
cfg.OUTPUT_DIR = 'output_flower'
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

Loading config ./detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


[32m[01/03 21:56:56 d2.engine.defaults]: [0mModel:
RetinaNet(
  (backbone): FPN(
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelP6P7(
      (p6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (p7): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
   

[32m[01/03 21:57:20 d2.data.build]: [0mRemoved 0 images with no usable annotations. 90 images left.
[32m[01/03 21:57:20 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   flower   | 104          |
|            |              |[0m
[32m[01/03 21:57:20 d2.data.detection_utils]: [0mTransformGens used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[01/03 21:57:20 d2.data.build]: [0mUsing training sampler TrainingSampler


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
'head.cls_score.weight' has shape (720, 256, 3, 3) in the checkpoint but (9, 256, 3, 3) in the model! Skipped.
'head.cls_score.bias' has shape (720,) in the checkpoint but (9,) in the model! Skipped.


[32m[01/03 21:57:20 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[01/03 21:57:50 d2.utils.events]: [0meta: 3:26:01  iter: 19  total_loss: 1.646  loss_cls: 1.166  loss_box_reg: 0.470  time: 1.4149  data_time: 0.6478  lr: 0.000002  max_mem: 5584M
[32m[01/03 21:58:19 d2.utils.events]: [0meta: 3:25:36  iter: 39  total_loss: 1.636  loss_cls: 1.165  loss_box_reg: 0.486  time: 1.4198  data_time: 0.5627  lr: 0.000004  max_mem: 5584M
[32m[01/03 21:58:47 d2.utils.events]: [0meta: 3:25:25  iter: 59  total_loss: 1.589  loss_cls: 1.153  loss_box_reg: 0.442  time: 1.4110  data_time: 0.5613  lr: 0.000006  max_mem: 5584M
[32m[01/03 21:59:15 d2.utils.events]: [0meta: 3:25:10  iter: 79  total_loss: 1.550  loss_cls: 1.138  loss_box_reg: 0.420  time: 1.4149  data_time: 0.5703  lr: 0.000008  max_mem: 5584M
[32m[01/03 21:59:43 d2.utils.events]: [0meta: 3:26:58  iter: 99  total_loss: 1.521  loss_cls: 1.117  loss_box_reg: 0.389  time: 1.4088  data_time: 0.5834  lr: 0.000010  ma

[32m[01/03 22:18:37 d2.utils.events]: [0meta: 3:37:33  iter: 899  total_loss: 0.106  loss_cls: 0.064  loss_box_reg: 0.048  time: 1.4159  data_time: 0.6224  lr: 0.000090  max_mem: 5584M
[32m[01/03 22:19:06 d2.utils.events]: [0meta: 3:37:05  iter: 919  total_loss: 0.111  loss_cls: 0.065  loss_box_reg: 0.052  time: 1.4161  data_time: 0.5779  lr: 0.000092  max_mem: 5584M
[32m[01/03 22:19:33 d2.utils.events]: [0meta: 3:36:36  iter: 939  total_loss: 0.100  loss_cls: 0.062  loss_box_reg: 0.035  time: 1.4156  data_time: 0.5523  lr: 0.000094  max_mem: 5584M
[32m[01/03 22:20:02 d2.utils.events]: [0meta: 3:36:07  iter: 959  total_loss: 0.097  loss_cls: 0.059  loss_box_reg: 0.041  time: 1.4154  data_time: 0.5733  lr: 0.000096  max_mem: 5584M
[32m[01/03 22:20:30 d2.utils.events]: [0meta: 3:35:38  iter: 979  total_loss: 0.093  loss_cls: 0.058  loss_box_reg: 0.038  time: 1.4157  data_time: 0.5856  lr: 0.000098  max_mem: 5584M
[32m[01/03 22:20:59 d2.utils.events]: [0meta: 3:35:10  iter: 99

[32m[01/03 22:39:59 d2.utils.events]: [0meta: 2:53:00  iter: 1779  total_loss: 0.032  loss_cls: 0.013  loss_box_reg: 0.015  time: 1.4354  data_time: 0.5978  lr: 0.000100  max_mem: 5584M
[32m[01/03 22:40:27 d2.utils.events]: [0meta: 2:52:35  iter: 1799  total_loss: 0.027  loss_cls: 0.013  loss_box_reg: 0.015  time: 1.4354  data_time: 0.6363  lr: 0.000100  max_mem: 5584M
[32m[01/03 22:40:56 d2.utils.events]: [0meta: 2:53:06  iter: 1819  total_loss: 0.026  loss_cls: 0.012  loss_box_reg: 0.013  time: 1.4355  data_time: 0.5998  lr: 0.000100  max_mem: 5584M
[32m[01/03 22:41:25 d2.utils.events]: [0meta: 2:52:40  iter: 1839  total_loss: 0.027  loss_cls: 0.014  loss_box_reg: 0.013  time: 1.4356  data_time: 0.6103  lr: 0.000100  max_mem: 5584M
[32m[01/03 22:41:54 d2.utils.events]: [0meta: 3:01:09  iter: 1859  total_loss: 0.023  loss_cls: 0.010  loss_box_reg: 0.014  time: 1.4358  data_time: 0.6337  lr: 0.000100  max_mem: 5584M
[32m[01/03 22:42:23 d2.utils.events]: [0meta: 3:16:07  ite

[32m[01/03 23:01:27 d2.utils.events]: [0meta: 2:57:41  iter: 2659  total_loss: 0.014  loss_cls: 0.006  loss_box_reg: 0.008  time: 1.4445  data_time: 0.6852  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:01:56 d2.utils.events]: [0meta: 2:57:12  iter: 2679  total_loss: 0.013  loss_cls: 0.005  loss_box_reg: 0.008  time: 1.4448  data_time: 0.6156  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:02:25 d2.utils.events]: [0meta: 2:56:43  iter: 2699  total_loss: 0.016  loss_cls: 0.005  loss_box_reg: 0.010  time: 1.4448  data_time: 0.6083  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:02:55 d2.utils.events]: [0meta: 2:56:14  iter: 2719  total_loss: 0.015  loss_cls: 0.005  loss_box_reg: 0.009  time: 1.4449  data_time: 0.5983  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:03:24 d2.utils.events]: [0meta: 2:55:45  iter: 2739  total_loss: 0.013  loss_cls: 0.005  loss_box_reg: 0.007  time: 1.4451  data_time: 0.6070  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:03:53 d2.utils.events]: [0meta: 2:55:15  ite

[32m[01/03 23:22:57 d2.utils.events]: [0meta: 2:17:10  iter: 3539  total_loss: 0.010  loss_cls: 0.003  loss_box_reg: 0.007  time: 1.4495  data_time: 0.6657  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:23:27 d2.utils.events]: [0meta: 2:16:45  iter: 3559  total_loss: 0.010  loss_cls: 0.003  loss_box_reg: 0.006  time: 1.4496  data_time: 0.6132  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:23:56 d2.utils.events]: [0meta: 2:16:19  iter: 3579  total_loss: 0.009  loss_cls: 0.003  loss_box_reg: 0.006  time: 1.4497  data_time: 0.6302  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:24:25 d2.utils.events]: [0meta: 2:15:54  iter: 3599  total_loss: 0.010  loss_cls: 0.003  loss_box_reg: 0.006  time: 1.4498  data_time: 0.6138  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:24:55 d2.utils.events]: [0meta: 2:15:28  iter: 3619  total_loss: 0.009  loss_cls: 0.003  loss_box_reg: 0.006  time: 1.4499  data_time: 0.6350  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:25:24 d2.utils.events]: [0meta: 2:15:03  ite

[32m[01/03 23:44:18 d2.utils.events]: [0meta: 2:15:47  iter: 4419  total_loss: 0.007  loss_cls: 0.002  loss_box_reg: 0.005  time: 1.4506  data_time: 0.6948  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:44:48 d2.utils.events]: [0meta: 2:15:18  iter: 4439  total_loss: 0.009  loss_cls: 0.003  loss_box_reg: 0.006  time: 1.4507  data_time: 0.6140  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:45:17 d2.utils.events]: [0meta: 2:14:49  iter: 4459  total_loss: 0.007  loss_cls: 0.002  loss_box_reg: 0.005  time: 1.4508  data_time: 0.6644  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:45:47 d2.utils.events]: [0meta: 2:14:19  iter: 4479  total_loss: 0.007  loss_cls: 0.002  loss_box_reg: 0.005  time: 1.4508  data_time: 0.6200  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:46:15 d2.utils.events]: [0meta: 2:13:50  iter: 4499  total_loss: 0.008  loss_cls: 0.002  loss_box_reg: 0.005  time: 1.4508  data_time: 0.5999  lr: 0.000100  max_mem: 5584M
[32m[01/03 23:46:44 d2.utils.events]: [0meta: 2:13:21  ite

[32m[01/04 00:05:36 d2.utils.events]: [0meta: 1:54:13  iter: 5299  total_loss: 0.006  loss_cls: 0.001  loss_box_reg: 0.004  time: 1.4506  data_time: 0.6800  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:06:05 d2.utils.events]: [0meta: 1:53:44  iter: 5319  total_loss: 0.005  loss_cls: 0.002  loss_box_reg: 0.004  time: 1.4506  data_time: 0.6233  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:06:35 d2.utils.events]: [0meta: 1:53:15  iter: 5339  total_loss: 0.005  loss_cls: 0.002  loss_box_reg: 0.004  time: 1.4506  data_time: 0.6444  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:07:03 d2.utils.events]: [0meta: 1:52:46  iter: 5359  total_loss: 0.006  loss_cls: 0.002  loss_box_reg: 0.004  time: 1.4506  data_time: 0.6487  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:07:33 d2.utils.events]: [0meta: 1:52:17  iter: 5379  total_loss: 0.005  loss_cls: 0.001  loss_box_reg: 0.004  time: 1.4507  data_time: 0.6376  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:08:02 d2.utils.events]: [0meta: 1:51:48  ite

[32m[01/04 00:27:01 d2.utils.events]: [0meta: 1:32:21  iter: 6179  total_loss: 0.005  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4518  data_time: 0.5820  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:27:30 d2.utils.events]: [0meta: 1:33:12  iter: 6199  total_loss: 0.005  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4517  data_time: 0.6019  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:28:00 d2.utils.events]: [0meta: 1:32:26  iter: 6219  total_loss: 0.005  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4519  data_time: 0.6472  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:28:29 d2.utils.events]: [0meta: 1:31:19  iter: 6239  total_loss: 0.005  loss_cls: 0.001  loss_box_reg: 0.004  time: 1.4519  data_time: 0.6016  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:28:58 d2.utils.events]: [0meta: 1:30:48  iter: 6259  total_loss: 0.005  loss_cls: 0.001  loss_box_reg: 0.004  time: 1.4518  data_time: 0.6012  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:29:27 d2.utils.events]: [0meta: 1:30:19  ite

[32m[01/04 00:48:19 d2.utils.events]: [0meta: 1:11:45  iter: 7059  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4517  data_time: 0.6113  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:48:48 d2.utils.events]: [0meta: 1:11:16  iter: 7079  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4518  data_time: 0.6335  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:49:18 d2.utils.events]: [0meta: 1:10:47  iter: 7099  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4518  data_time: 0.6401  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:49:47 d2.utils.events]: [0meta: 1:10:11  iter: 7119  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4518  data_time: 0.7056  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:50:16 d2.utils.events]: [0meta: 1:09:48  iter: 7139  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4519  data_time: 0.7018  lr: 0.000100  max_mem: 5584M
[32m[01/04 00:50:45 d2.utils.events]: [0meta: 1:09:19  ite

[32m[01/04 01:09:41 d2.utils.events]: [0meta: 0:49:28  iter: 7939  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4522  data_time: 0.6081  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:10:10 d2.utils.events]: [0meta: 0:49:00  iter: 7959  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4522  data_time: 0.6175  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:10:40 d2.utils.events]: [0meta: 0:48:31  iter: 7979  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4523  data_time: 0.6297  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:11:08 d2.utils.events]: [0meta: 0:48:02  iter: 7999  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4522  data_time: 0.5637  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:11:37 d2.utils.events]: [0meta: 0:47:33  iter: 8019  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4521  data_time: 0.5721  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:12:06 d2.utils.events]: [0meta: 0:47:04  ite

[32m[01/04 01:31:02 d2.utils.events]: [0meta: 0:28:00  iter: 8819  total_loss: 0.004  loss_cls: 0.001  loss_box_reg: 0.003  time: 1.4524  data_time: 0.6207  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:31:30 d2.utils.events]: [0meta: 0:27:07  iter: 8839  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.002  time: 1.4523  data_time: 0.5547  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:31:59 d2.utils.events]: [0meta: 0:26:39  iter: 8859  total_loss: 0.003  loss_cls: 0.000  loss_box_reg: 0.002  time: 1.4523  data_time: 0.6170  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:32:28 d2.utils.events]: [0meta: 0:26:11  iter: 8879  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.002  time: 1.4523  data_time: 0.6099  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:32:58 d2.utils.events]: [0meta: 0:25:43  iter: 8899  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.002  time: 1.4524  data_time: 0.6117  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:33:27 d2.utils.events]: [0meta: 0:25:15  ite

[32m[01/04 01:52:20 d2.utils.events]: [0meta: 0:06:26  iter: 9699  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.002  time: 1.4524  data_time: 0.6294  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:52:49 d2.utils.events]: [0meta: 0:06:00  iter: 9719  total_loss: 0.003  loss_cls: 0.000  loss_box_reg: 0.002  time: 1.4523  data_time: 0.6330  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:53:18 d2.utils.events]: [0meta: 0:05:34  iter: 9739  total_loss: 0.003  loss_cls: 0.001  loss_box_reg: 0.002  time: 1.4523  data_time: 0.6159  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:53:47 d2.utils.events]: [0meta: 0:05:48  iter: 9759  total_loss: 0.002  loss_cls: 0.000  loss_box_reg: 0.002  time: 1.4523  data_time: 0.7198  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:54:16 d2.utils.events]: [0meta: 0:05:19  iter: 9779  total_loss: 0.003  loss_cls: 0.000  loss_box_reg: 0.002  time: 1.4523  data_time: 0.6854  lr: 0.000100  max_mem: 5584M
[32m[01/04 01:54:46 d2.utils.events]: [0meta: 0:04:50  ite

OrderedDict()