# Phân tích dữ liệu là một yếu tố rất quan trọng để xây dựng một mô hình tốt
![Image](https://rochemamabolo.files.wordpress.com/2014/11/garbage-in-garbage-out.jpg)

# Install venv

In [None]:
!pip install cython pyyaml==5.1
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html

In [None]:
!git clone https://github.com/facebookresearch/detectron2

Cloning into 'detectron2'...
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 9604 (delta 0), reused 1 (delta 0), pack-reused 9597[K
Receiving objects: 100% (9604/9604), 3.83 MiB | 2.88 MiB/s, done.
Resolving deltas: 100% (7055/7055), done.


# Tải dữ liệu sử dụng gdown 

In [None]:
!gdown --id 1BlZ8t-SgcHh26wrNXau76Yg0Aq5Fd9V4

Downloading...
From: https://drive.google.com/uc?id=1BlZ8t-SgcHh26wrNXau76Yg0Aq5Fd9V4
To: /content/TRAIN.zip
469MB [00:04, 102MB/s] 

In [None]:
!unzip TRAIN.zip 

# Khai báo thư viện

In [None]:
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import matplotlib.patches as patches
import seaborn as sns
import glob2
from tqdm import tqdm
import pandas as pd
import imutils

from google.colab.patches import cv2_imshow

from bokeh.plotting import figure
from bokeh.io import output_notebook, show, output_file
from bokeh.models import ColumnDataSource, HoverTool, Panel
from bokeh.models.widgets import Tabs

# Lấy bounding box và vẽ ảnh

In [None]:
def get_all_bboxes(annot_path):
    list_info = []
    with open(annot_path) as f:
      content = f.readlines()
    
    content = [x.strip() for x in content] 
    for line in content:
      line = line.split()
      class_id = int(line[0])
      bbox = [int(line[1]), int(line[2]), int(line[3]), int(line[4])]
      info_box = [class_id, bbox]
      list_info.append(info_box)
  
    return list_info

In [None]:
list_info = get_all_bboxes("TRAIN/TRAIN_0.txt")
list_info

[[0, [989, 66, 1029, 101]],
 [1, [949, 67, 983, 100]],
 [1, [528, 342, 541, 356]],
 [1, [876, 387, 893, 400]],
 [3, [1033, 69, 1067, 103]]]

In [None]:
def draw_image(image_name):
  image = cv2.imread(image_name)
  annot_path = os.path.join("TRAIN", (image_name.split("/")[1]).split(".")[0] + '.txt')
  
  list_info = get_all_bboxes(annot_path)
  for info in list_info:
    class_id = info[0]
    bbox = info[1]

    # draw rectangle
    image = cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
    # write class_id
    image = cv2.putText(image, str(class_id), (bbox[0], bbox[1]-3), cv2.FONT_HERSHEY_SIMPLEX , 1, (255, 0, 0), 2, cv2.LINE_AA)
  
  return image

In [None]:
image = draw_image("TRAIN/TRAIN_0.jpg")
cv2_imshow(image)

# Số lượng ảnh

In [None]:
list_image = glob2.glob("TRAIN/*.jpg")
list_annot = glob2.glob("TRAIN/*.txt")

print("number image: ", len(list_image))
print("number annot: ", len(list_annot))

number image:  2850
number annot:  3000


In [None]:
list_image_png = glob2.glob("TRAIN/*.png")

print("number image png: ", len(list_image_png))

number image png:  150


# Xem thử các hình PNG

In [None]:
small_list = list_image_png[:20]
for image_name in small_list:
  image = draw_image(image_name)
  image = imutils.resize(image)
  cv2_imshow(image)

=> không loại bỏ đc các hình PNG

# Gộp PNG và JPG

In [None]:
for image_path in list_image_png:
  list_image.append(image_path)

In [None]:
print("number image: ", len(list_image))

number image:  3000


In [None]:
list_annot = []
for image_path in list_image:
  annot_path = (image_path.split("/")[1]).split(".")[0] + ".txt"
  annot_path = os.path.join("TRAIN", annot_path)

  list_annot.append(annot_path)

print("len list_annot: ", len(list_annot))

len list_annot:  3000


In [None]:
small_list = list_image[50:70]
for image_name in small_list:
  image = draw_image(image_name)
  image = imutils.resize(image)
  cv2_imshow(image)

# Kiểm tra số lượng bbox và kich thước mỗi hình

In [None]:
def get_number_box(annot_path, image_path, list_widths_bbox, list_heights_bbox):
  list_info = []
  image = cv2.imread(image_path)
  
  width, height, _ = image.shape

  with open(annot_path) as f:
    content = f.readlines()
  
  content = [x.strip() for x in content] 
  
  file_name = image_path.split("/")[1]
  number_bb = len(content)

  list_info = get_all_bboxes(annot_path)
  for info in list_info:
    class_id = info[0]
    bbox = info[1]

    w_bbox = bbox[2] - bbox[0]
    h_bbox = bbox[3] - bbox[1]
    list_widths_bbox.append(w_bbox)
    list_heights_bbox.append(h_bbox)


  return file_name, number_bb, width, height, list_widths_bbox, list_heights_bbox 

In [None]:
list_file_names = []
list_number_bb = []
list_widths = []
list_heights = []


list_widths_bbox = []
list_heights_bbox = []

len_anno = len(list_annot)
with tqdm(total=len_anno) as pbar:
  for i in range(len_anno):
    annot_path = list_annot[i]
    image_path = list_image[i]

    file_name, number_bb, width, height, list_w_bbox, list_h_bbox = get_number_box(annot_path, image_path, list_widths_bbox, list_heights_bbox)
    list_widths_bbox = list_w_bbox
    list_heights_bbox = list_h_bbox
    list_file_names.append(file_name)
    list_number_bb.append(number_bb)
    list_widths.append(width)
    list_heights.append(height)

    pbar.update(1)


100%|██████████| 3000/3000 [02:12<00:00, 22.64it/s]


In [None]:
count_bbox = {'file_name': list_file_names, 'width': list_widths, 'height': list_heights, 'number_bbox': list_number_bb}
train_count_bbox = pd.DataFrame(data=count_bbox)

In [None]:
train_count_bbox

In [None]:
image = draw_image("TRAIN/TRAIN_41.jpg")
cv2_imshow(image)

In [None]:
size_distribute = {'w_bbox': list_widths_bbox, 'h_bbox': list_heights_bbox}
size_distribute_df = pd.DataFrame(data=size_distribute)

In [None]:
size_distribute_df

# Kiểm tra phân phối dữ liệu

In [None]:
def hist_hover(dataframe, column, colors=["#94c8d8"], bins=19, title=''):
    hist, edges = np.histogram(dataframe[column], bins = bins)
    
    hist_df = pd.DataFrame({column: hist,
                             "left": edges[:-1],
                             "right": edges[1:]})
    hist_df["interval"] = ["%d" % left for left in hist_df["left"]]

    src = ColumnDataSource(hist_df)
    plot = figure(plot_height = 400, plot_width = 800,
          title = title,
          x_axis_label = column,
          y_axis_label = "number of image")    
    plot.quad(bottom = 0, top = column,left = "left", 
        right = "right", source = src, fill_color = colors[0], 
        line_color = "#35838d", fill_alpha = 0.7,
        hover_fill_alpha = 0.7, hover_fill_color = colors[0])
        
    hover = HoverTool(tooltips = [('Number of bbox', '@interval'),
                              ('Count', str("@" + column))])
    plot.add_tools(hover)
    
    output_notebook()
    show(plot)

In [None]:
hist_hover(train_count_bbox, 'number_bbox', title='Number of bbox per image')

In [None]:
hist_hover(train_count_bbox, 'width', title='Width of image')

In [None]:
hist_hover(train_count_bbox, 'height', title='height of image')

- Phân phối của width [600, 2000]
- Phối phối của height [100, 2800]
- => kết hợp lại thì điều kiện width hight sẽ trong khoảng là [600, 2100]

# Kiểm tra phân phối của bounding box 


In [None]:
hist_hover(size_distribute_df, 'w_bbox', title='width of bbox')

In [None]:
hist_hover(size_distribute_df, 'h_bbox', title='height of bbox')

Phần lớn logo có chiều dài là 150 => chọn width height trong vùng 150

# Loại bỏ bbox có kich thước lớn hơn 150

In [None]:
def get_small_bbox(annot_path, image_path):
  list_info = get_all_bboxes(annot_path)
  image = cv2.imread(image_path)
  image_name = (image_path.split("/")[1]).split(".")[0]
  list_new_info = []
  for info in list_info:
    class_id = info[0]
    bbox = info[1]

    w_bbox = bbox[2] - bbox[0]
    h_bbox = bbox[3] - bbox[1]
    
    if w_bbox <= 150 and h_bbox <= 150:
      info_box = [class_id, bbox]
      list_new_info.append(info_box)
  
  return list_new_info, image, image_name

In [None]:
# create new_train
new_train_dir = "new_train"
if not os.path.exists(new_train_dir):
  os.mkdir(new_train_dir)

len_anno = len(list_annot)
with tqdm(total=len_anno) as pbar:
  for i in range(len_anno):
    annot_path = list_annot[i]
    image_path = list_image[i]

    list_new_info, image, image_name = get_small_bbox(annot_path, image_path)
    if len(list_new_info) != 0:
      # save image
      new_image_path = os.path.join(new_train_dir, image_name + '.jpg')
      new_annot_path = os.path.join(new_train_dir, image_name + '.txt')
      cv2.imwrite(new_image_path, image)
      with open(new_annot_path, "a+") as f:
        for info in list_new_info:
          class_id = info[0]
          bbox = info[1]
          f.write("{} {} {} {} {}\n".format(class_id, bbox[0], bbox[1], bbox[2], bbox[3]))

    pbar.update(1)

100%|██████████| 3000/3000 [02:52<00:00, 17.36it/s]


In [None]:
new_list_image = glob2.glob("new_train/*.jpg")
new_list_annot = glob2.glob("new_train/*.txt")

print("number new list image: ", len(new_list_image))
print("number new list annot: ", len(new_list_annot))

number new list image:  993
number new list annot:  993


In [None]:
small_list = new_list_image[40:60]
for image_name in small_list:
  image = draw_image(image_name)
  image = imutils.resize(image)
  cv2_imshow(image)

# Lưu lại dữ liệu đã xử lý

In [None]:
!zip -r new_train.zip new_train/

# Thống kê về diện tích

In [None]:
def get_area_box(annot_path, image_path, list_widths_bbox, list_heights_bbox, list_area_bbox):
  list_info = []
  image = cv2.imread(image_path)
  
  width, height, _ = image.shape

  with open(annot_path) as f:
    content = f.readlines()
  
  content = [x.strip() for x in content] 
  
  file_name = image_path.split("/")[1]
  number_bb = len(content)

  list_info = get_all_bboxes(annot_path)
  for info in list_info:
    class_id = info[0]
    bbox = info[1]

    w_bbox = bbox[2] - bbox[0]
    h_bbox = bbox[3] - bbox[1]
    list_widths_bbox.append(w_bbox)
    list_heights_bbox.append(h_bbox)
    list_area_bbox.append(w_bbox*h_bbox)


  return file_name, number_bb, width, height, list_widths_bbox, list_heights_bbox, list_area_bbox

In [None]:
list_file_names = []
list_number_bb = []
list_widths = []
list_heights = []
list_area_bbox = []

list_widths_bbox = []
list_heights_bbox = []

len_anno = len(new_list_annot)
with tqdm(total=len_anno) as pbar:
  for i in range(len_anno):
    annot_path = new_list_annot[i]
    image_path = new_list_image[i]

    file_name, number_bb, width, height, list_w_bbox, list_h_bbox, list_area = get_area_box(annot_path, image_path, list_widths_bbox, list_heights_bbox, list_area_bbox)
    list_widths_bbox = list_w_bbox
    list_heights_bbox = list_h_bbox
    list_area_bbox = list_area
    list_file_names.append(file_name)
    list_number_bb.append(number_bb)
    list_widths.append(width)
    list_heights.append(height)

    pbar.update(1)

100%|██████████| 993/993 [00:31<00:00, 31.06it/s]


In [None]:
new_train = {'w_bbox': list_widths_bbox, 'h_bbox': list_heights_bbox, 'area': list_area_bbox}
new_train_df = pd.DataFrame(data=new_train)

In [None]:
new_train_df

In [None]:
hist_hover(new_train_df, 'area', title='area of bbox')

Đến đây cỡ bản là đã xong về phân tích dữ liệu rồi. Từ cell dưới sẽ là huấn luyện mô hình faster-RCNN trên detectron2 và submit kết quả.

# Package detectron2

In [None]:
import os
import cv2
import json
import random
import glob2
import itertools
import torch
import numpy as np
from tqdm import tqdm
import xml.etree.ElementTree as ET

import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader
from detectron2.structures import BoxMode
from google.colab.patches import cv2_imshow
from detectron2.data import detection_utils 

import detectron2.data.transforms as T

import copy

import json

** fvcore version of PathManager will be deprecated soon. **
** Please migrate to the version in iopath repo. **
https://github.com/facebookresearch/iopath 

** fvcore version of PathManager will be deprecated soon. **
** Please migrate to the version in iopath repo. **
https://github.com/facebookresearch/iopath 



In [None]:
anno_files = glob2.glob(os.path.join('new_train', "*.txt"))
number = 80 * len(anno_files) // 100
train_anno_files = anno_files[:number]
val_anno_files = anno_files[number:]
print(len(train_anno_files))
print(len(val_anno_files))

794
199


In [None]:
def get_data_dicts_txt(anno_files):
    classes = ['Logo_UIT', 'Logo_HSV', 'Logo_CS', 'Logo_CE', 'Logo_SE', 'Logo_ISE']
    dataset_dicts = []    
    count = 0
    len_anno = len(anno_files)
    print('len_anno: ', len_anno)
    with tqdm(total=len_anno) as pbar:
      for file_path in anno_files:
          record = {}
          try:
            filename = file_path.split("/")[-1]
            filename = filename.split(".")[0]
            img_path = os.path.join('new_train', filename + '.jpg')
            height, width = cv2.imread(img_path).shape[:2]
            
            record["file_name"] = img_path
            record["image_id"] = count
            record["height"] = height
            record["width"] = width

            count += 1

            annotations = open(file_path, 'r')
            objs = []
            for line in annotations:
              line = line.rstrip('\n')
              # print("line: ", line)
              class_id, x1, y1, x2, y2 = line.split()[:]

              xmin = int(x1)
              ymin = int(y1)
              xmax = int(x2)
              ymax = int(y2)

              obj = {
                    'bbox': [xmin, ymin, xmax, ymax],
                    'bbox_mode': BoxMode.XYXY_ABS,
                    'category_id': classes.index(classes[int(class_id)]),
                    "iscrowd": 0
              }
              objs.append(obj)

            record["annotations"] = objs
            dataset_dicts.append(record)
            pbar.update(1)
          except Exception as e:
            print(e)
            pass

    return dataset_dicts

# get convert

In [None]:
train_dicts = get_data_dicts_txt(train_anno_files)

  1%|          | 6/794 [00:00<00:16, 47.27it/s]

len_anno:  794


100%|██████████| 794/794 [00:29<00:00, 27.31it/s]


In [None]:
val_dicts = get_data_dicts_txt(val_anno_files)

  1%|          | 2/199 [00:00<00:12, 15.91it/s]

len_anno:  199


100%|██████████| 199/199 [00:07<00:00, 28.39it/s]


In [None]:
with open('train.json', 'w') as fp:
    json.dump(train_dicts, fp)

with open('val.json', 'w') as fp:
    json.dump(val_dicts, fp)

# Load json

In [None]:
# with open('val_receipt.json', 'r') as fp:
#     val_dicts = json.load(fp)

# with open('train_receipt.json', 'r') as fp:
#     train_dicts = json.load(fp)

# View data

In [None]:
for i in range(len(val_dicts)):
  for j in range(len(val_dicts[i]["annotations"])):
      val_dicts[i]["annotations"][j]['bbox_mode'] = BoxMode.XYXY_ABS
for i in range(len(train_dicts)):
  for j in range(len(train_dicts[i]["annotations"])):
      train_dicts[i]["annotations"][j]['bbox_mode'] = BoxMode.XYXY_ABS

In [None]:
classes = ['Logo_UIT', 'Logo_HSV', 'Logo_CS', 'Logo_CE', 'Logo_SE', 'Logo_ISE']
data = [train_dicts, val_dicts]

for index, d in enumerate(["train", "val"]):
  DatasetCatalog.register("logouit_data/" + d, lambda index=index: data[index])
  MetadataCatalog.get("logouit_data/" + d).set(thing_classes=classes)
logo_metadata = MetadataCatalog.get("logouit_data/train")

# Custom mapper

In [None]:
class CustomTrainer(DefaultTrainer):
  
  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("detectron_eval", exist_ok=True) # name dir
        output_folder = "detectron_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [None]:
import random
import matplotlib.pyplot as plt

for d in random.sample(val_dicts, 2):
    img = cv2.imread(d["file_name"])
    v = Visualizer(img[:, :, ::-1], metadata=logo_metadata, scale=0.5)
    v = v.draw_dataset_dict(d)
    plt.figure(figsize = (14, 10))
    plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    plt.show()

# Train

In [None]:
cfg = get_cfg()
cfg.merge_from_file("./detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("logouit_data/train",)
cfg.DATASETS.TEST = ("logouit_data/val",)   # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 4

#https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md
cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl"  # initialize from model zoo
# cfg.MODEL.WEIGHTS = "faster_rcnn_R_101_FPN_3x_model/model_final.pth"
print("CHECK WEIGHTS: ", cfg.MODEL.WEIGHTS)
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0001
cfg.SOLVER.MAX_ITER = 3000 # 3k
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 4
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 6
cfg.OUTPUT_DIR = "./faster_rcnn_R_101_FPN_3x_model"

cfg.TEST.EVAL_PERIOD = 1000

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

CHECK WEIGHTS:  faster_rcnn_R_101_FPN_3x_model/model_final.pth
[32m[12/20 13:26:55 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): Fr

# predict

In [None]:
import os
import cv2
import json
import random
import itertools
import numpy as np

from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, evaluator
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.structures import BoxMode
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

import json

def predict (path_weigths, path_config, confidence_threshold, num_of_class, path_img):
  cfg = get_cfg()
  cfg.merge_from_file(path_config)
  cfg.MODEL.WEIGHTS = path_weigths

  #cfg.MODEL.WEIGHTS = "mask_rcnn_R_50_FPN_3x_model/model_final.pth"
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold
  cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 8   
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_of_class 
  predictor = DefaultPredictor(cfg)
  im = cv2.imread(path_img)
  outputs = predictor(im)
  
  return outputs

#Đầu vào detect = output của hàm predict, frame = original image của mình, classs = tên class để visualize
def visualize (out, frame, classs):
  boxes = out['instances'].pred_boxes
  scores = out['instances'].scores
  classes = out['instances'].pred_classes
  for i in range (len(classes)):
    if (scores[i] > 0.4):
      for j in boxes[i]:
        start = (int (j[0]), int (j[1]))
        end = (int (j[2]), int (j[3]))
        print (start)
        print (end)
        width =  end[0] - start[0]
        height = end[1] - start[1]
        print ('width:', width)
        print ('height:', height)
        print('class:', int (classes[i]))
        print('score:', float (scores[i]))
        print ('---------------------', start, end, scores[i], classes[i])
      color = int (classes[i])
      print (classes[i])
        
      cv2.rectangle(frame, start, end, (random.randint(0,255),random.randint(0,255),255), 3)
      cv2.putText(frame, str (classs[color]),start, cv2.FONT_HERSHEY_PLAIN, 1, (random.randint(0,255),random.randint(0,255),255), 2)
  return frame


path_weigth = "faster_rcnn_R_101_FPN_3x_model/model_final.pth"
path_config = "./detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
confidences_threshold = 0.4
num_of_class = 5
path_img = 'new_train/TRAIN_0.jpg'
classes = ['Logo_UIT', 'Logo_HSV', 'Logo_CS', 'Logo_CE', 'Logo_SE', 'Logo_ISE']
outputs = predict(path_weigth, path_config, confidences_threshold, num_of_class, path_img)
print(outputs)
_frame = cv2.imread(path_img)
frame = visualize (outputs, _frame, classes )
# cv2.imwrite("frame.jpg", frame)
cv2_imshow(frame)

# Submit 

In [None]:
!gdown --id 1Ng8UWSg2XNMWS49qhchTz24wSfE80cfA

Downloading...
From: https://drive.google.com/uc?id=1Ng8UWSg2XNMWS49qhchTz24wSfE80cfA
To: /content/WARM_UP.zip
370MB [00:02, 182MB/s]


In [None]:
!unzip WARM_UP.zip

In [None]:
import os
import cv2
import json
import random
import itertools
import numpy as np
import argparse
import cv2

from time import gmtime, strftime

def predict(image, predictor, list_labels):
    outputs = predictor(image)

    boxes = outputs['instances'].pred_boxes
    scores = outputs['instances'].scores
    classes = outputs['instances'].pred_classes

    list_boxes = []
    # list_paths = []
    # list_vehicles = []
    list_scores = []
    list_classes = []

    for i in range(len(classes)):
        if (scores[i] > 0.4):
            for j in boxes[i]:
                x1 = int(j[0])
                y1 = int(j[1])
                x2 = int(j[2]) 
                y2 = int(j[3]) 

            # print("min: ", (x1, y1))
            # print("max: ", (x2, y2))

            score = float(scores[i])
            # class_id = list_labels[int(classes[i])]
            class_id = classes[i]

            list_boxes.append([x1, y1, x2, y2])
            list_scores.append(score)
            list_classes.append(class_id)

    return list_boxes, list_scores, list_classes


In [None]:
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg as config_detectron

# set up detectron
path_weigth = "faster_rcnn_R_101_FPN_3x_model/model_final.pth"
path_config = "./detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
confidences_threshold = 0.4
num_of_class = 6

detectron = config_detectron()
detectron.MODEL.DEVICE= 'cuda'
detectron.merge_from_file(path_config)
detectron.MODEL.WEIGHTS = path_weigth

detectron.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidences_threshold
detectron.MODEL.ROI_HEADS.NUM_CLASSES = num_of_class

PREDICTOR = DefaultPredictor(detectron)

# create labels
CLASSES = ['Logo_UIT', 'Logo_HSV', 'Logo_CS', 'Logo_CE', 'Logo_SE', 'Logo_ISE']

In [None]:
path = "TEST"
list_path_test = glob2.glob(os.path.join(path, "*.jpg"))
print(len(list_path_test))

955


In [None]:
len_list = len(list_path_test)
with tqdm(total=len_list) as pbar:
  for image_path in list_path_test:
    image = cv2.imread(image_path)
    image_name = image_path.split("/")[-1]
    list_boxes, list_scores, list_classes = predict(image, PREDICTOR, CLASSES)
    with open("submission.txt", "a+") as f:
      for i in range(len(list_boxes)):
        class_id = list_classes[i]
        bbox = list_boxes[i]

        # write submit
        f.write("{}, {}, {}, {}, {}, {}\n".format(image_name, class_id, bbox[0], bbox[1], bbox[2], bbox[3]))
    
    pbar.update(1)

100%|██████████| 955/955 [02:13<00:00,  7.13it/s]
