In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/CS117

# Cài và import thư viện

In [None]:
# cài thư viện
%cd mmdetection
!pip install -r requirements/build.txt
!pip install "git+https://github.com/open-mmlab/cocoapi.git#subdirectory=pycocotools"
!pip install -v -e .  # or "python setup.py develop"
!pip install mmcv-full==1.3.8 -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
!pip install torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2
!pip install vietocr
%cd ..

In [None]:
# import thư viện
%cd mmdetection
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
import mmcv
import numpy as np
import glob
import os
import json
from tqdm import notebook
from scipy.ndimage import interpolation as inter
import cv2
from vietocr.tool.predictor import Predictor
from vietocr.tool.config import Cfg
from PIL import Image
%cd ..

# Test hiệu suất phát hiện đối tượng

In [None]:
!python mmdetection/tools/test.py \
    ./models/detection/config_faster_rcnn_r50.py \
    ./models/detection/epoch_12.pth \
    --eval bbox --options "classwise=True" \
    # --options "jsonfile_prefix=./results"

In [None]:
!pip3 install tidecv

In [None]:
# Chỉ lấy key annotations ra để so
import json
with open('./annotations/test.json', 'r') as f:
  annotations = json.load(f)

for i in annotations['annotations']:
  i['score'] = 1

with open('./annotations/test_annotations.json', 'w') as f:
  json.dump(annotations['annotations'], f)

In [None]:
from tidecv import TIDE, datasets
tide = TIDE()
tide.evaluate_range(datasets.COCOResult('./annotations/test_annotations.json'), datasets.COCOResult('./results.bbox.json'), mode=TIDE.BOX) # Use TIDE.MASK for masks
tide.summarize()  # Summarize the results as tables in the console
tide.plot()       # Show a summary figure. Specify a folder and it'll output a png to that folder.

# Baseline ra file submission.txt để nộp lên hệ thống AIHub

In [None]:
test_dir = './test_images'
config_detection = './models/detection/config_faster_rcnn_r50.py'
check_point_detection = './models/detection/epoch_12.pth'

In [None]:
# định nghĩa mô hình localization
model = init_detector(config_detection, check_point_detection, device='cuda:0')
score_thr = 0.85 #NGƯỠNG

# định nghĩa mô hình recognition
config_seller = Cfg.load_config_from_name('vgg_transformer')
config_seller['weights'] = './models/recognition/seller.pth'
config_seller['device'] = 'cuda:0'
config_seller['predictor']['beamsearch']=False
detector_seller = Predictor(config_seller)

config_address = Cfg.load_config_from_name('vgg_transformer')
config_address['weights'] = './models/recognition/address.pth'
config_address['device'] = 'cuda:0'
config_address['predictor']['beamsearch']=False
detector_address = Predictor(config_address)

config_timestamp = Cfg.load_config_from_name('vgg_transformer')
config_timestamp['weights'] = './models/recognition/timestamp.pth'
config_timestamp['device'] = 'cuda:0'
config_timestamp['predictor']['beamsearch']=False
detector_timestamp = Predictor(config_timestamp)

config_totalcost = Cfg.load_config_from_name('vgg_transformer')
config_totalcost['weights'] = './models/recognition/totalcost.pth'
config_totalcost['device'] = 'cuda:0'
config_totalcost['predictor']['beamsearch']=False
detector_totalcost = Predictor(config_totalcost)

dict_model_OCR = {
    0: detector_seller,
    1: detector_address,
    2: detector_timestamp,
    3: detector_totalcost,
}

In [None]:
# Tải dữ liệu
!bash download_data.sh

In [None]:
from utils import correct_skew, Sort_address, Sort_timestamp

In [None]:
# Chạy ra kết quả detection
imgs = [i.split('/')[-1] for i in glob.glob(os.path.join(test_dir, '*'))]
dict_detection = {}

print('Localization...')
for i, img in notebook.tqdm(enumerate(imgs)):
    img = os.path.join(test_dir, img)
    file_name = imgs[i].split('/')[-1]
    result = inference_detector(model, img)
    dict_detection[file_name] = []

    num = os.path.splitext(file_name.split("_")[1])[0]

    if isinstance(result, tuple):
        bbox_result, segm_result = result
        if isinstance(segm_result, tuple):
            segm_result = segm_result[0]  # ms rcnn
    else:
        bbox_result, segm_result = result, None

    bboxes = np.vstack(bbox_result)
    labels = [
        np.full(bbox.shape[0], i, dtype=np.int32)
        for i, bbox in enumerate(bbox_result)
    ]
    labels = np.concatenate(labels)

    scores = bboxes[:, -1]
    inds = scores > score_thr
    bboxes = bboxes[inds, :]
    labels = labels[inds]
    
    lst_bboxes = []
    for cls, bbox in zip(labels, bboxes):
        lst_bboxes.append([str(cls), str(bbox[0]), str(bbox[1]), str(bbox[2]), str(bbox[3]), str(bbox[4])])

    dict_detection[file_name] = lst_bboxes

print('Rule-based...')

# Sắp xếp lại bbox
dict_per_cls = {}
for img in dict_detection:
    dict_per_cls[img] = {0: [], 1: [], 2: [], 3: []}
    
    # Chuẩn hóa bounding box
    bboxes = dict_detection[img]
    bboxes = [list(map(float, bbox)) for bbox in bboxes] # Chuyển về float
    for bbox in bboxes:
        try:
            dict_per_cls[img][int(bbox[0])].append(bbox)
        except:
            pass

    dict_per_cls[img][1] = Sort_address(dict_per_cls[img][1])
    dict_per_cls[img][2] = Sort_timestamp(dict_per_cls[img][2])
    dict_per_cls[img][3] = Sort_timestamp(dict_per_cls[img][3])


print('Recognition...')
dict_text = {}
result_visualize = {}
for img in notebook.tqdm(dict_per_cls):
    image = cv2.imread(os.path.join(test_dir, img))
    texts = []
    list_loop_result = []
    dict_cls = {0: [], 1: [], 2: [], 3: []}
    dict_text[img] = []
    for cls in dict_per_cls[img]:
      dict_cls[cls] = []
      for bbox in dict_per_cls[img][cls]:
          score = float(bbox[-1])
          dict_bbox = {}
          x1,y1,x2,y2 = int(float(bbox[1])), int(float(bbox[2])), int(float(bbox[3])), int(float(bbox[4]))
          dict_bbox['bbox'] = [x1,y1,x2,y2]
          crop_img = image[y1:y2,x1:x2]
          w, h = crop_img.shape[1], crop_img.shape[0]
          if w <= h:
              crop_img = cv2.rotate(crop_img, cv2.cv2.ROTATE_90_CLOCKWISE)
          _, skewd = correct_skew(crop_img)
          pil_image = Image.fromarray(skewd)
          s = dict_model_OCR[cls].predict(pil_image)
          dict_cls[cls].append(s)
    dict_text[img] = dict_cls

In [None]:
import pandas as pd
df = pd.read_csv('./mcocr_test_samples_df.csv')
imgs = df['img_id'].tolist()

In [None]:
# Xuất ra file detection
import csv
with open('results.csv', 'w') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(['img_id', 'anno_image_quality', 'anno_texts'])
    for img in list(imgs):
      if dict_text[img]:
          seller = ' '.join(dict_text[img][0])
          address = ' '.join(dict_text[img][1])
          timestamp = ' '.join(dict_text[img][2])
          totalcost = ' '.join(dict_text[img][3])
          text = seller + '|||' + address + '|||' + timestamp + '|||' + totalcost
          csv_writer.writerow([img, 0.5, text])
      else:
        csv_writer.writerow([img, 0.5, ''])

In [None]:
!zip submission.zip results.csv