In [None]:
import os, torch

# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
# os.environ['NVIDIA_VISIBLE_DEVICES'] = "0"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

# detect

In [None]:
class Args():
    def __init__(self,cuda=True, trained_model='weights/craft_mlt_25k.pth', text_threshold=0.7, low_text=0.4, link_threshold=0.4, canvas_size =1100, mag_ratio=1.5, poly=False, show_time=False,test_folder='/data/',refine=False, refiner_model='weights/craft_refiner_CTW1500.pth'):
        self.cuda = cuda
        self.trained_model = trained_model = trained_model
        self.text_threshold = text_threshold
        self.low_text = low_text
        self.link_threshold = link_threshold
        self.canvas_size = canvas_size
        self.mag_ratio = mag_ratio
        self.poly = poly
        self.show_time = show_time
        self.test_folder = test_folder
        self.refine = refine
        self.refiner_model = refiner_model
        
def img_show(img, size =(15,15)):
    plt.rcParams["figure.figsize"] = size
    imgplot = plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.show()
    

In [None]:
args = Args()

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import cv2
import math

""" auxilary functions """
# unwarp corodinates
def warpCoord(Minv, pt):
    out = np.matmul(Minv, (pt[0], pt[1], 1))
    return np.array([out[0]/out[2], out[1]/out[2]])
""" end of auxilary functions """


def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
    # prepare data
    linkmap = linkmap.copy()
    textmap = textmap.copy()
    img_h, img_w = textmap.shape
    """ labeling method """
    ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
    ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)

    text_score_comb = np.clip(text_score + link_score, 0, 1)
    nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4)

    det = []
    mapper = []
    for k in range(1,nLabels):
        # size filtering
        size = stats[k, cv2.CC_STAT_AREA]
        if size < 10: continue

        # thresholding
        if np.max(textmap[labels==k]) < text_threshold: continue

        # make segmentation map
        segmap = np.zeros(textmap.shape, dtype=np.uint8)
        segmap[labels==k] = 255
        segmap[np.logical_and(link_score==1, text_score==0)] = 0   # remove link area
        x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
        w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
        niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
        sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
        # boundary check
        if sx < 0 : sx = 0
        if sy < 0 : sy = 0
        if ex >= img_w: ex = img_w
        if ey >= img_h: ey = img_h
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
        segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)

        # make box
        np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
        rectangle = cv2.minAreaRect(np_contours)
        box = cv2.boxPoints(rectangle)

        # align diamond-shape
        w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
        box_ratio = max(w, h) / (min(w, h) + 1e-5)
        if abs(1 - box_ratio) <= 0.1:
            l, r = min(np_contours[:,0]), max(np_contours[:,0])
            t, b = min(np_contours[:,1]), max(np_contours[:,1])
            box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)

        # make clock-wise order
        startidx = box.sum(axis=1).argmin()
        box = np.roll(box, 4-startidx, 0)
        box = np.array(box)

        det.append(box)
        mapper.append(k)

    return det, labels, mapper

def getPoly_core(boxes, labels, mapper, linkmap):
    # configs
    num_cp = 5
    max_len_ratio = 0.7
    expand_ratio = 1.45
    max_r = 2.0
    step_r = 0.2

    polys = []  
    for k, box in enumerate(boxes):
        # size filter for small instance
        w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
        if w < 10 or h < 10:
            polys.append(None); continue

        # warp image
        tar = np.float32([[0,0],[w,0],[w,h],[0,h]])
        M = cv2.getPerspectiveTransform(box, tar)
        word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
        try:
            Minv = np.linalg.inv(M)
        except:
            polys.append(None); continue

        # binarization for selected label
        cur_label = mapper[k]
        word_label[word_label != cur_label] = 0
        word_label[word_label > 0] = 1

        """ Polygon generation """
        # find top/bottom contours
        cp = []
        max_len = -1
        for i in range(w):
            region = np.where(word_label[:,i] != 0)[0]
            if len(region) < 2 : continue
            cp.append((i, region[0], region[-1]))
            length = region[-1] - region[0] + 1
            if length > max_len: max_len = length

        # pass if max_len is similar to h
        if h * max_len_ratio < max_len:
            polys.append(None); continue

        # get pivot points with fixed length
        tot_seg = num_cp * 2 + 1
        seg_w = w / tot_seg     # segment width
        pp = [None] * num_cp    # init pivot points
        cp_section = [[0, 0]] * tot_seg
        seg_height = [0] * num_cp
        seg_num = 0
        num_sec = 0
        prev_h = -1
        for i in range(0,len(cp)):
            (x, sy, ey) = cp[i]
            if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
                # average previous segment
                if num_sec == 0: break
                cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
                num_sec = 0

                # reset variables
                seg_num += 1
                prev_h = -1

            # accumulate center points
            cy = (sy + ey) * 0.5
            cur_h = ey - sy + 1
            cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
            num_sec += 1

            if seg_num % 2 == 0: continue # No polygon area

            if prev_h < cur_h:
                pp[int((seg_num - 1)/2)] = (x, cy)
                seg_height[int((seg_num - 1)/2)] = cur_h
                prev_h = cur_h

        # processing last segment
        if num_sec != 0:
            cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]

        # pass if num of pivots is not sufficient or segment widh is smaller than character height 
        if None in pp or seg_w < np.max(seg_height) * 0.25:
            polys.append(None); continue

        # calc median maximum of pivot points
        half_char_h = np.median(seg_height) * expand_ratio / 2

        # calc gradiant and apply to make horizontal pivots
        new_pp = []
        for i, (x, cy) in enumerate(pp):
            dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
            dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
            if dx == 0:     # gradient if zero
                new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
                continue
            rad = - math.atan2(dy, dx)
            c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
            new_pp.append([x - s, cy - c, x + s, cy + c])

        # get edge points to cover character heatmaps
        isSppFound, isEppFound = False, False
        grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
        grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
        for r in np.arange(0.5, max_r, step_r):
            dx = 2 * half_char_h * r
            if not isSppFound:
                line_img = np.zeros(word_label.shape, dtype=np.uint8)
                dy = grad_s * dx
                p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
                    spp = p
                    isSppFound = True
            if not isEppFound:
                line_img = np.zeros(word_label.shape, dtype=np.uint8)
                dy = grad_e * dx
                p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
                    epp = p
                    isEppFound = True
            if isSppFound and isEppFound:
                break

        # pass if boundary of polygon is not found
        if not (isSppFound and isEppFound):
            polys.append(None); continue

        # make final polygon
        poly = []
        poly.append(warpCoord(Minv, (spp[0], spp[1])))
        for p in new_pp:
            poly.append(warpCoord(Minv, (p[0], p[1])))
        poly.append(warpCoord(Minv, (epp[0], epp[1])))
        poly.append(warpCoord(Minv, (epp[2], epp[3])))
        for p in reversed(new_pp):
            poly.append(warpCoord(Minv, (p[2], p[3])))
        poly.append(warpCoord(Minv, (spp[2], spp[3])))

        # add to final result
        polys.append(np.array(poly))

    return polys

def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
    
    boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)

    if poly:
        polys = getPoly_core(boxes, labels, mapper, linkmap)
    else:
        polys = [None] * len(boxes)

    return boxes, polys

def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2):
    if len(polys) > 0:
        polys = np.array(polys)
        for k in range(len(polys)):
            if polys[k] is not None:
                polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
    return polys

In [None]:
import sys
import os
import time
import argparse
from pathlib import Path
from glob import glob

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

from PIL import Image

import cv2
from skimage import io
import numpy as np
# import craft_utils
import imgproc
import file_utils
import json
import zipfile

from collections import OrderedDict

from torch.autograd import Variable

import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

from icecream import ic
import matplotlib.pyplot as plt

TRT_LOGGER = trt.Logger()

class RTLayer():
    
    """
    
    """

    def __init__(self, config=None, model_path=None, data_path='./weights',
                 engine_path=None, cuda_ctx=None, input_shape=None):
        super().__init__()
        data_path = os.path.abspath(data_path)
        model_path = sorted(glob(data_path + '/*.engine'))
    
#         self.engine_path=model_path[0]
        self.engine_path=data_path+'/detect.engine'
        
        self.cuda_ctx = cuda_ctx
        if self.cuda_ctx:
            self.cuda_ctx.push()

        self.trt_logger = trt.Logger(trt.Logger.INFO)
        self._load_plugins()
        self.engine = self._load_engine()
        self.input_shape = input_shape

    def __call__(self, args, image, arg_cuda):
        
        trt_infer_befo_mem = torch.cuda.memory_allocated()/1024/1024
        print("current_memory:", trt_infer_befo_mem)
        

        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio
        
#         t0 = time.time()
        # preprocessing
        img_resized = imgproc.normalizeMeanVariance(img_resized)
        img_resized = torch.from_numpy(img_resized).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
        img_resized = Variable(img_resized.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
#         t0 = time.time() - t0
#         print(t0)
        t0 = time.time()
#         if arg_cuda:
#             img_resized.cuda()
#             img_resized = img_resized.to(device)
        t0 = time.time() - t0
        print(t0)
        # ic(img_resized.shape)
        
        # feed to engine and process output
        height, width = img_resized.shape[2:4]
        self.input_shape = (height,width)
        img_resized = img_resized.cpu().detach().numpy()
#         print('img2', img_resized.shape, img_resized)
        
        segment_inputs, segment_outputs, segment_bindings = self._allocate_buffers()
        
        stream = cuda.Stream()
        with self.engine.create_execution_context() as context:
            context.active_optimization_profile = 0
            origin_inputshape=context.get_binding_shape(0)
            
            if (origin_inputshape[-1]==-1):
                origin_inputshape[-2],origin_inputshape[-1]=(self.input_shape)
                context.set_binding_shape(0,(origin_inputshape))
            
            input_img_array = np.array([img_resized] * self.engine.max_batch_size)
            
            img = torch.from_numpy(input_img_array).float().numpy()

            segment_inputs[0].host = img

            [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in segment_inputs] #Copy from the Python buffer src to the device pointer dest (an int or a DeviceAllocation) asynchronously,
#             segment_inputs[0].device
            stream.synchronize() #Wait for all activity on this stream to cease, then return.
            
            context.execute_async_v2(bindings=segment_bindings, stream_handle=stream.handle)#Asynchronously execute inference on a batch. 
            
            stream.synchronize()
            [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in segment_outputs]#Copy from the device pointer src (an int or a DeviceAllocation) to the Python buffer dest asynchronously
            stream.synchronize()
            bs1, bs2 = context.get_binding_shape(2),context.get_binding_shape(1)

            y_out = segment_outputs[1].host
            feature_out = segment_outputs[0].host

        t1 = time.time()
        y1 =  y_out[0:np.array(bs1).prod()].reshape(bs1)
        feature1 = feature_out[0:np.array(bs2).prod()].reshape(bs2)
        
        y = torch.from_numpy(y1)
        feature = torch.from_numpy(feature1)
        
        # make score and link map
        score_text = y[0,:,:,0].cpu().data.numpy()
        score_link = y[0,:,:,1].cpu().data.numpy()
        
        # refine link
        if refine_net is not None:
            with torch.no_grad():
                y_refiner = refine_net(y, feature)
            score_link = y_refiner[0,:,:,0].cpu().data.numpy()

        t1 = time.time() - t1
        print(t1)
        # Post-processing
        boxes, polys = getDetBoxes(score_text, score_link, args.text_threshold, args.link_threshold, args.low_text, args.poly)

        # coordinate adjustment
        boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)
        
#         print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
        
        trt_infer_mem = torch.cuda.memory_allocated()/1024/1024
        print("trt infer memory: %fMB"%(trt_infer_mem-trt_infer_befo_mem))
        
        return boxes, polys, ret_score_text
        
    def _load_plugins(self):
        if trt.__version__[0] < '7':
            ctypes.CDLL("./libflattenconcat.so")
        trt.init_libnvinfer_plugins(self.trt_logger, '')
        print('success load pluginx')
        
    def _load_engine(self):
        assert os.path.exists(self.engine_path)
        print("Reading engine from file {}".format(self.engine_path))
        with open(self.engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            print('success load engine')
            return runtime.deserialize_cuda_engine(f.read())

    def _allocate_buffers(self):
        inputs = []
        outputs = []
        bindings = []
        class HostDeviceMem(object):
            def __init__(self, host_mem, device_mem):
                self.host = host_mem
                self.device = device_mem

            def __str__(self):
                return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
            
            def __repr__(self):
                return self.__str__()
        
        for binding in self.engine:
            
            dims = self.engine.get_binding_shape(binding)
            # print(dims)
            if dims[-1] == -1:
                assert(self.input_shape is not None)
                dims[-2],dims[-1] = self.input_shape
            else:
                dims[-3],dims[-2] = int(self.input_shape[0]/2), int(self.input_shape[1]/2)
            size = trt.volume(dims) * self.engine.max_batch_size#The maximum batch size which can be used for inference.
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            if self.engine.binding_is_input(binding):#Determine whether a binding is an input binding.
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        
        print('success allocate_buffers')
        return inputs, outputs, bindings

    # def __del__(self):
    #     """Free CUDA memories and context."""
    #     del self.cuda_outputs
    #     del self.cuda_inputs
    #     del self.stream

In [None]:
import imgproc
import cv2
from torch.autograd import Variable
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit

image_path = './IMG_7602.jpg'
print("Test image :", image_path)

image = imgproc.loadImage(image_path)


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
_____________________________________________________________________________
This file contains main inference pipeline to Tensor RT
_____________________________________________________________________________
"""
from icecream import ic
import os
import time
import argparse

from PIL import Image

import cv2
from skimage import io
import imgproc
import file_utils

# from trt_layer import RTLayer

def str2bool(v):
    return v.lower() in ("yes", "y", "true", "t", "1")

# parser = argparse.ArgumentParser(description='TensorRT inference pipeline for CRAFT Text Detection')
# parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold')
# parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
# parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
# parser.add_argument('--canvas_size', default=1100, type=int, help='image size for inference')
# parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
# parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
# parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
# parser.add_argument('--test_folder', default='images/', type=str, help='folder path to input images')

# args = parser.parse_args()

""" For test images in a folder """

result_folder = './result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)

def test_net(args, image):
    layer = RTLayer()
    t = time.time()
    boxes, polys, ret_score_text = layer(args, image)
    print(f'infer time: {time.time()-t}')
    return boxes, polys, ret_score_text


In [None]:
# LinkRefiner
refine_net = None
if args.refine:
    from refinenet import RefineNet
    refine_net = RefineNet()
    print('Loading weights of refiner from checkpoint (' + args.refiner_model + ')')
    if args.cuda:
        refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model)))
        refine_net = refine_net.cuda()
        refine_net = torch.nn.DataParallel(refine_net)
    else:
        refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu')))

    refine_net.eval()
    args.poly = True

In [None]:
trt_load_befo_mem = torch.cuda.memory_allocated()/1024/1024
print("current_memory:", trt_load_befo_mem)
t2 = time.time()
layer = RTLayer()
print(f'load time: {time.time()-t2}')

trt_load_mem = torch.cuda.memory_allocated()/1024/1024

print("trt_load_mem: %fMB"%(trt_load_mem-trt_load_befo_mem))

In [None]:
boxes, polys, ret_score_text = layer(args, image, args.cuda)


In [None]:
# for idx,i in enumerate(image_boxes):
img_1 = image.copy()
imageWidth, imageHeight = img_1.shape[:2] 
resizeHeight = int(1 * imageHeight) 
resizeWidth = int(1 * imageWidth) 
resizeImageNDArray = cv2.resize(img_1, (resizeHeight, resizeWidth), interpolation = cv2.INTER_CUBIC)

for pts in boxes:

    pts = np.array(pts).reshape(-1,2).astype(np.int32)

    resizeImageNDArray = cv2.polylines(resizeImageNDArray, [pts], True, (0,0,255),2)

img_show(resizeImageNDArray)

In [None]:
import sys
import os
import time
import argparse

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

from PIL import Image

import cv2
from skimage import io
import numpy as np
from text_detection import craft_utils
from text_detection import imgproc
import text_detection.file_utils
import json
import zipfile

from text_detection.craft import CRAFT

from collections import OrderedDict

import matplotlib.pyplot as plt

def copyStateDict(state_dict):
    if list(state_dict.keys())[0].startswith("module"):
        start_idx = 1
    else:
        start_idx = 0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = ".".join(k.split(".")[start_idx:])
        new_state_dict[name] = v
    return new_state_dict

def str2bool(v):
    return v.lower() in ("yes", "y", "true", "t", "1")

def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,refine_net=None):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.to(device)

    # forward pass
    with torch.no_grad():
        y, feature = net(x)
    
    # make score and link map
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0,:,:,0].cpu().data.numpy()


    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    
    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    
    if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text

In [None]:
regist_img_num = 0
# test_img_num = 1

# load net
net = CRAFT()     # initialize

print('Loading weights from checkpoint (' + args.trained_model + ')')
device=torch.device('cuda')
if args.cuda:
    net.load_state_dict(copyStateDict(torch.load(args.trained_model,map_location=device)))
else:
    net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu')))

if args.cuda:
#     net = net.cuda()
    net = net.to(device)
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = False

net.eval()

In [None]:
bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net)


In [None]:
# for idx,i in enumerate(image_boxes):
img_2 = image.copy()
imageWidth, imageHeight = img_2.shape[:2] 
resizeHeight = int(1 * imageHeight) 
resizeWidth = int(1 * imageWidth) 
resizeImageNDArray = cv2.resize(img_2, (resizeHeight, resizeWidth), interpolation = cv2.INTER_CUBIC)

for pts in bboxes:

    pts = np.array(pts).reshape(-1,2).astype(np.int32)

    resizeImageNDArray = cv2.polylines(resizeImageNDArray, [pts], True, (0,0,255),2)

img_show(resizeImageNDArray)

------------------------------------------------------------------------------------------------------------

# OCR

In [None]:
def cut_image(bboxes, image):
    images = []
    data_list = []
#     if len(bboxes) != 0:
#     print(bboxes)
    for pts in bboxes:
        pts = pts.astype(np.float32)
        data_list.append([pts[0][0], pts[0][1], pts[1][0], pts[2][1]])

        rect = pts
#         print(rect)
        (top_left, top_right, bottom_right, bottom_left) = rect

        w1 = abs(bottom_right[0] - bottom_left[0])
        w2 = abs(top_right[0] - top_left[0])
        h1 = abs(top_right[1] - bottom_right[1])
        h2 = abs(top_left[1] - bottom_left[1])
#         print(w1,w2,h1,h2)
        max_width = max([w1, w2])
        max_height = max([h1, h2])

        dst = np.float32([[0, 0], [max_width - 1, 0], [max_width - 1, max_height - 1], [0, max_height - 1]])
#         print(dst)
        m = cv2.getPerspectiveTransform(rect, dst)
        
        warped = cv2.warpPerspective(image, m, (int(max_width), int(max_height)))
        images.append(warped)

    return images, data_list

In [None]:
class Args(object):
    def __init__(self):
        pass

    def add_argument(self, key, default=None, help=None,action=None,required=False,type=None):
        key = key.replace('-','')
        if action == 'store_true':
            self.__dict__[key] = False
        else:
            self.__dict__[key] = default
            
        if required:
            print(key,'/',help)
            
            
    def set_argument(self, data_dict):
        for key in data_dict:
            self.__dict__[key] = data_dict[key]

In [None]:
parser = Args()

parser.add_argument('--image_folder', required=True, help='path to image_folder which contains text images')
parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
parser.add_argument('--batch_size', type=int, default=192, help='input batch size')
parser.add_argument('--saved_model', required=True, help="path to saved_model to evaluation")
""" Data processing """
parser.add_argument('--batch_max_length', type=int, default=25, help='maximum-label-length')
parser.add_argument('--imgH', type=int, default=32, help='the height of the input image')
parser.add_argument('--imgW', type=int, default=100, help='the width of the input image')
parser.add_argument('--rgb', action='store_true', help='use rgb input')
parser.add_argument('--character', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz', help='character label')
parser.add_argument('--sensitive', default=True, action='store_true', help='for sensitive character mode')
parser.add_argument('--PAD', action='store_true', help='whether to keep ratio then pad for image resize')
""" Model Architecture """
parser.add_argument('--Transformation', type=str, required=True, help='Transformation stage. None|TPS')
parser.add_argument('--FeatureExtraction', type=str, required=True, help='FeatureExtraction stage. VGG|RCNN|ResNet')
parser.add_argument('--SequenceModeling', type=str, required=True, help='SequenceModeling stage. None|BiLSTM')
parser.add_argument('--Prediction', type=str, required=True, help='Prediction stage. CTC|Attn')
parser.add_argument('--num_fiducial', type=int, default=20, help='number of fiducial points of TPS-STN')
parser.add_argument('--input_channel', type=int, default=1, help='the number of input channel of Feature extractor')
parser.add_argument('--output_channel', type=int, default=512,
                        help='the number of output channel of Feature extractor')
parser.add_argument('--hidden_size', type=int, default=256, help='the size of the LSTM hidden state')

In [None]:
with open('../opt.txt','r') as f:
    opt = f.read()
    
opt = opt.split('------------ Options -------------\n')
opt = opt[-1].split('\n')

# test 실행용
opt_dict ={}
int_keys = ['manualSeed', 'workers', 'batch_size', 'num_iter', 'valInterval', 'batch_max_length', 'imgH', 'imgW', 'num_fiducial', 'input_channel', 'output_channel', 'hidden_size']
float_keys = ['lr', 'beta1', 'rho', 'eps', 'grad_clip']
bool_keys = ['FT', 'adam', 'rgb', 'sensitive', 'PAD', 'data_filtering_off']
str_keys = ['exp_name', 'train_data', 'valid_data', 'saved_model', 'select_data', 'batch_ratio', 'total_data_usage_ratio', 'character', 'Transformation', 'FeatureExtraction', 'SequenceModeling', 'Prediction']
save_keys = ['image_folder', 'workers', 'batch_size', 'saved_model', 'batch_max_length', 'imgH', 'imgW', 'rgb', 'character', 'sensitive', 'PAD', 'Transformation', 'FeatureExtraction', 'SequenceModeling', 'Prediction', 'num_fiducial', 'input_channel', 'output_channel', 'hidden_size','rgb']
for i in opt:
    t = i.split(':')
    if i == '---------------------------------------':
        break
    else:
        key, data = t[0].strip(), t[1][1:]
        if key in int_keys:
            data = int(data)
        elif key in float_keys:
            data = float(data)
        elif key in bool_keys:
            if data == 'True':
                data = True
            elif data == 'False':
                data = False      
        elif key in str_keys:
            data = str(data)
        if key in save_keys:
            opt_dict[key] = data

In [None]:
# save_model_name = '../best_accuracy.pth'

In [None]:
parser.set_argument(opt_dict)
parser.sensitive = True
# parser.character = ' 가각간갇갈갉갊감갑값갓갔강갖갗같갚갛개객갠갤갬갭갯갰갱갸갹갼걀걋걍걔걘걜거걱건걷걸걺검겁것겄겅겆겉겊겋게겐겔겜겝겟겠겡겨격겪견겯결겸겹겻겼경곁계곈곌곕곗고곡곤곧골곪곬곯곰곱곳공곶과곽관괄괆괌괍괏광괘괜괠괩괬괭괴괵괸괼굄굅굇굉교굔굘굡굣구국군굳굴굵굶굻굼굽굿궁궂궈궉권궐궜궝궤궷귀귁귄귈귐귑귓규균귤그극근귿글긁금급긋긍긔기긱긴긷길긺김깁깃깅깆깊까깍깎깐깔깖깜깝깟깠깡깥깨깩깬깰깸깹깻깼깽꺄꺅꺌꺼꺽꺾껀껄껌껍껏껐껑께껙껜껨껫껭껴껸껼꼇꼈꼍꼐꼬꼭꼰꼲꼴꼼꼽꼿꽁꽂꽃꽈꽉꽐꽜꽝꽤꽥꽹꾀꾄꾈꾐꾑꾕꾜꾸꾹꾼꿀꿇꿈꿉꿋꿍꿎꿔꿜꿨꿩꿰꿱꿴꿸뀀뀁뀄뀌뀐뀔뀜뀝뀨끄끅끈끊끌끎끓끔끕끗끙끝끼끽낀낄낌낍낏낑나낙낚난낟날낡낢남납낫났낭낮낯낱낳내낵낸낼냄냅냇냈냉냐냑냔냘냠냥너넉넋넌널넒넓넘넙넛넜넝넣네넥넨넬넴넵넷넸넹녀녁년녈념녑녔녕녘녜녠노녹논놀놂놈놉놋농높놓놔놘놜놨뇌뇐뇔뇜뇝뇟뇨뇩뇬뇰뇹뇻뇽누눅눈눋눌눔눕눗눙눠눴눼뉘뉜뉠뉨뉩뉴뉵뉼늄늅늉느늑는늘늙늚늠늡늣능늦늪늬늰늴니닉닌닐닒님닙닛닝닢다닥닦단닫달닭닮닯닳담답닷닸당닺닻닿대댁댄댈댐댑댓댔댕댜더덕덖던덛덜덞덟덤덥덧덩덫덮데덱덴델뎀뎁뎃뎄뎅뎌뎐뎔뎠뎡뎨뎬도독돈돋돌돎돐돔돕돗동돛돝돠돤돨돼됐되된될됨됩됫됴두둑둔둘둠둡둣둥둬뒀뒈뒝뒤뒨뒬뒵뒷뒹듀듄듈듐듕드득든듣들듦듬듭듯등듸디딕딘딛딜딤딥딧딨딩딪따딱딴딸땀땁땃땄땅땋때땍땐땔땜땝땟땠땡떠떡떤떨떪떫떰떱떳떴떵떻떼떽뗀뗄뗌뗍뗏뗐뗑뗘뗬또똑똔똘똥똬똴뙈뙤뙨뚜뚝뚠뚤뚫뚬뚱뛔뛰뛴뛸뜀뜁뜅뜨뜩뜬뜯뜰뜸뜹뜻띄띈띌띔띕띠띤띨띰띱띳띵라락란랄람랍랏랐랑랒랖랗래랙랜랠램랩랫랬랭랴략랸럇량러럭런럴럼럽럿렀렁렇레렉렌렐렘렙렛렝려력련렬렴렵렷렸령례롄롑롓로록론롤롬롭롯롱롸롼뢍뢨뢰뢴뢸룀룁룃룅료룐룔룝룟룡루룩룬룰룸룹룻룽뤄뤘뤠뤼뤽륀륄륌륏륑류륙륜률륨륩륫륭르륵른를름릅릇릉릊릍릎리릭린릴림립릿링마막만많맏말맑맒맘맙맛망맞맡맣매맥맨맬맴맵맷맸맹맺먀먁먈먕머먹먼멀멂멈멉멋멍멎멓메멕멘멜멤멥멧멨멩며멱면멸몃몄명몇몌모목몫몬몰몲몸몹못몽뫄뫈뫘뫙뫼묀묄묍묏묑묘묜묠묩묫무묵묶문묻물묽묾뭄뭅뭇뭉뭍뭏뭐뭔뭘뭡뭣뭬뮈뮌뮐뮤뮨뮬뮴뮷므믄믈믐믓미믹민믿밀밂밈밉밋밌밍및밑바박밖밗반받발밝밞밟밤밥밧방밭배백밴밸뱀뱁뱃뱄뱅뱉뱌뱍뱐뱝버벅번벋벌벎범법벗벙벚베벡벤벧벨벰벱벳벴벵벼벽변별볍볏볐병볕볘볜보복볶본볼봄봅봇봉봐봔봤봬뵀뵈뵉뵌뵐뵘뵙뵤뵨부북분붇불붉붊붐붑붓붕붙붚붜붤붰붸뷔뷕뷘뷜뷩뷰뷴뷸븀븃븅브븍븐블븜븝븟비빅빈빌빎빔빕빗빙빚빛빠빡빤빨빪빰빱빳빴빵빻빼빽뺀뺄뺌뺍뺏뺐뺑뺘뺙뺨뻐뻑뻔뻗뻘뻠뻣뻤뻥뻬뼁뼈뼉뼘뼙뼛뼜뼝뽀뽁뽄뽈뽐뽑뽕뾔뾰뿅뿌뿍뿐뿔뿜뿟뿡쀼쁑쁘쁜쁠쁨쁩삐삑삔삘삠삡삣삥사삭삯산삳살삵삶삼삽삿샀상샅새색샌샐샘샙샛샜생샤샥샨샬샴샵샷샹섀섄섈섐섕서석섞섟선섣설섦섧섬섭섯섰성섶세섹센셀셈셉셋셌셍셔셕션셜셤셥셧셨셩셰셴셸솅소속솎손솔솖솜솝솟송솥솨솩솬솰솽쇄쇈쇌쇔쇗쇘쇠쇤쇨쇰쇱쇳쇼쇽숀숄숌숍숏숑수숙순숟술숨숩숫숭숯숱숲숴쉈쉐쉑쉔쉘쉠쉥쉬쉭쉰쉴쉼쉽쉿슁슈슉슐슘슛슝스슥슨슬슭슴습슷승시식신싣실싫심십싯싱싶싸싹싻싼쌀쌈쌉쌌쌍쌓쌔쌕쌘쌜쌤쌥쌨쌩썅써썩썬썰썲썸썹썼썽쎄쎈쎌쏀쏘쏙쏜쏟쏠쏢쏨쏩쏭쏴쏵쏸쐈쐐쐤쐬쐰쐴쐼쐽쑈쑤쑥쑨쑬쑴쑵쑹쒀쒔쒜쒸쒼쓩쓰쓱쓴쓸쓺쓿씀씁씌씐씔씜씨씩씬씰씸씹씻씽아악안앉않알앍앎앓암압앗았앙앝앞애액앤앨앰앱앳앴앵야약얀얄얇얌얍얏양얕얗얘얜얠얩어억언얹얻얼얽얾엄업없엇었엉엊엌엎에엑엔엘엠엡엣엥여역엮연열엶엷염엽엾엿였영옅옆옇예옌옐옘옙옛옜오옥온올옭옮옰옳옴옵옷옹옻와왁완왈왐왑왓왔왕왜왝왠왬왯왱외왹왼욀욈욉욋욍요욕욘욜욤욥욧용우욱운울욹욺움웁웃웅워웍원월웜웝웠웡웨웩웬웰웸웹웽위윅윈윌윔윕윗윙유육윤율윰윱윳융윷으윽은을읊음읍읏응읒읓읔읕읖읗의읜읠읨읫이익인일읽읾잃임입잇있잉잊잎자작잔잖잗잘잚잠잡잣잤장잦재잭잰잴잼잽잿쟀쟁쟈쟉쟌쟎쟐쟘쟝쟤쟨쟬저적전절젊점접젓정젖제젝젠젤젬젭젯젱져젼졀졈졉졌졍졔조족존졸졺좀좁좃종좆좇좋좌좍좔좝좟좡좨좼좽죄죈죌죔죕죗죙죠죡죤죵주죽준줄줅줆줌줍줏중줘줬줴쥐쥑쥔쥘쥠쥡쥣쥬쥰쥴쥼즈즉즌즐즘즙즛증지직진짇질짊짐집짓징짖짙짚짜짝짠짢짤짧짬짭짯짰짱째짹짼쨀쨈쨉쨋쨌쨍쨔쨘쨩쩌쩍쩐쩔쩜쩝쩟쩠쩡쩨쩽쪄쪘쪼쪽쫀쫄쫌쫍쫏쫑쫓쫘쫙쫠쫬쫴쬈쬐쬔쬘쬠쬡쭁쭈쭉쭌쭐쭘쭙쭝쭤쭸쭹쮜쮸쯔쯤쯧쯩찌찍찐찔찜찝찡찢찧차착찬찮찰참찹찻찼창찾채책챈챌챔챕챗챘챙챠챤챦챨챰챵처척천철첨첩첫첬청체첵첸첼쳄쳅쳇쳉쳐쳔쳤쳬쳰촁초촉촌촐촘촙촛총촤촨촬촹최쵠쵤쵬쵭쵯쵱쵸춈추축춘출춤춥춧충춰췄췌췐취췬췰췸췹췻췽츄츈츌츔츙츠측츤츨츰츱츳층치칙친칟칠칡침칩칫칭카칵칸칼캄캅캇캉캐캑캔캘캠캡캣캤캥캬캭컁커컥컨컫컬컴컵컷컸컹케켁켄켈켐켑켓켕켜켠켤켬켭켯켰켱켸코콕콘콜콤콥콧콩콰콱콴콸쾀쾅쾌쾡쾨쾰쿄쿠쿡쿤쿨쿰쿱쿳쿵쿼퀀퀄퀑퀘퀭퀴퀵퀸퀼큄큅큇큉큐큔큘큠크큭큰클큼큽킁키킥킨킬킴킵킷킹타탁탄탈탉탐탑탓탔탕태택탠탤탬탭탯탰탱탸턍터턱턴털턺텀텁텃텄텅테텍텐텔템텝텟텡텨텬텼톄톈토톡톤톨톰톱톳통톺톼퇀퇘퇴퇸툇툉툐투툭툰툴툼툽툿퉁퉈퉜퉤튀튁튄튈튐튑튕튜튠튤튬튱트특튼튿틀틂틈틉틋틔틘틜틤틥티틱틴틸팀팁팃팅파팍팎판팔팖팜팝팟팠팡팥패팩팬팰팸팹팻팼팽퍄퍅퍼퍽펀펄펌펍펏펐펑페펙펜펠펨펩펫펭펴편펼폄폅폈평폐폘폡폣포폭폰폴폼폽폿퐁퐈퐝푀푄표푠푤푭푯푸푹푼푿풀풂품풉풋풍풔풩퓌퓐퓔퓜퓟퓨퓬퓰퓸퓻퓽프픈플픔픕픗피픽핀필핌핍핏핑하학한할핥함합핫항해핵핸핼햄햅햇했행햐향허헉헌헐헒험헙헛헝헤헥헨헬헴헵헷헹혀혁현혈혐협혓혔형혜혠혤혭호혹혼홀홅홈홉홋홍홑화확환활홧황홰홱홴횃횅회획횐횔횝횟횡효횬횰횹횻후훅훈훌훑훔훗훙훠훤훨훰훵훼훽휀휄휑휘휙휜휠휨휩휫휭휴휵휸휼흄흇흉흐흑흔흖흗흘흙흠흡흣흥흩희흰흴흼흽힁히힉힌힐힘힙힛힝!@#$%^&*《》()[]【】【】\"\'◐◑oㅇ⊙○◎◉◀▶⇒◆■□△★※☎☏;:/.?<>-_=+×\￦|₩~,.㎡㎥ℓ㎖㎘→「」『』·ㆍ1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ읩①②③④⑤月日軍 '
parser.character = ' !"%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\_abcdefghijklmnopqrstuvwxyz{}~→ㄱㄴㄹㅁㅅㅇㅔㅠㅣ가각간갇갈감갑값갓갔강갖같갚갛개객갤갬갯갱걀걔거걱건걷걸검겁것겅겉게겐겔겠겨격겪견결겸겹겼경곁계고곡곤곧골곰곱곳공곶과곽관괄괌광괘괜괭괴굉교구국군굳굴굵굶굽굿궁권궐궤귀귄귓규균귤그극근글긁금급긋긍기긴길김깁깃깅깆깊까깍깎깐깔깜깝깡깥깨깬깽꺠꺵꺼꺽꺾껌껍껏껑께껴꼬꼭꼴꼼꼽꼿꽁꽂꽃꽉꽝꽤꾀꾸꾹꾼꿀꿈꿍꿔꿨꿰뀌뀐뀝끄끈끊끌끓끔끗끙끝끼낄낌나낙낚난날낡남납낫났낭낮낯낱낳내낸낼냄냅냇냉냐냠냥너넉넌널넓넘넛넣네넥넨넬넷녀녁년념녕녘노녹논놀놈놉농높놓놔뇌뇨뇸누눈눌눔눕뉘뉴늄느늑는늘늙능늦늬니닉닌닐님닙닝다닥닦단닫달닭닮닳담답닷당닿대댁댄댐댓더덕던덜덟덤덥덧덩덫덮데덱덴델뎅뎌도독돈돌돔돕돗동돼됐되된될됨됩두둑둔둘둠둡둣둥뒤뒷듀드득든듣들듬듭듯등듸디딕딘딜딝딤딥딧딩딪따딱딴딸땀땅때땐땜땠땡떙떠떡떤떨떻떼또똑똘똥뚜뚝뚫뚱뛰뜀뜨뜩뜯뜰뜸뜻띄띠라락란랄람랍랐랑랗래랙랜램랫랬랭랲략량러럭런럴럼럽럿렀렁렇레렉렌렐렘렙렛려력련렬렴렵렷렸령례로록론롤롬롭롯롱뢰료룡루룩룬룰룸룹룻룽뤄뤼류륙륜률륭르륵른를름릅릇릉릎리릭린릴림립릿링마막만많말맑맘맙맛망맞맡맣매맥맨맵맹맺머먹먼멀멈멋멍멎메멘멜멤멩며면멸명몇모목몫몬몯몰몸몹못몽묘무묵묶문묻물뭄뭇뭉뭐뭔뭘뭣뮈뮌뮐뮤므믄믈미믹민믿밀밉밌밍및밑바박밖반받발밝밟밤밥방밭배백밴밸뱀뱃뱅뱉버벅번벌범법벗벙벚베벤벧벨벳벼벽변별볍병볕보복볶본볼봄봅봇봉봐봤뵈뵙부북분붇불붉붐붓붕붙뷔뷰브븐블븨비빅빈빌빔빗빙빚빛빠빡빨빱빵빼뺄뺏뺨뺴뻐뻔뻗뻬뼈뼉뼛뽀뽑뾰뿌뿍뿐뿔뿡쁘쁜쁨삐사삭산살삶삼삽삿샀상새색샌샐샘샛생샤샨샬샴샵샷샹섀서석섞선설섬섭섯성세섹센셀셈셉셋셔션셜셨셰셸소속손솔솜솝솟송솥쇄쇠쇼숍숏수숙순숟술숨숫숭숯숲숴쉐쉬쉰쉴쉼쉽쉿슈슐슛스슨슬슴습슷승시식신싣실싫심십싯싱싶싸싹싼쌀쌈쌌쌍쌓쌘쌤쌩써썩썬썰썹썼쎄쎈쎌쏘쏙쏟쑤쑥쓰쓴쓸씀씌씨씩씬씰씹씻씽아악안앉않알앓암압앗았앙앞애액앤앨앱앵야약얀얄얇양얕얗얘어억언얹얻얼엄업없엇었엉엊엌엎에엑엔엘엠엡엣엥여역엮연열엷염엽엿였영옆예옌옐옘옙옛오옥온올옮옳옵옷옹옻와왁완왓왔왕왜왠외왼요욕욘욜욤용우욱운울움웃웅워원월웠웨웩웬웰웹위윅윈윌윗윙유육윤율융윷으윽은을음읍응읗의이익인일읽잃임입잇있잉잊잎자작잔잖잘잠잡잣장잦재잭잰잼쟁쟤저적전절젊젋점접젓정젖제젝젠젤젯젱져졌조족존졸좀좁종좋좌죄죠주죽준줄줌줍중줘쥐쥘쥬쥴즈즉즌즐즘즙증지직진질짐집짓징짙짚짜짝짠짧짱째쨌쨰쩌쩍쩐쩔쩜쩰쪄쪽쫑쫓쬘쭈쭉쯔쯤찌찍찔찜찡찢찧차착찬찮찰참찻창찾채책챈챌챔챙챠처척천철첨첩첫청체첵첸첼쳐쳤초촉촌촘촛총촬최추축춘출춤춥춧충춰췌취츠측츰층치칙친칠침칩칫칭카칵칸칼캄캅캉캐캔캘캠캡캣캥커컥컨컬컴컵컷케켄켈켓켜켰코콕콘콜콤콥콧콩콰콸쾅쾌쿄쿠쿡쿤쿨쿵쿼퀀퀘퀴퀵퀸큐큘크큰클큼키킥킨킬킴킵킷킹타탁탄탈탉탐탑탓탕태택탠탤탬탭탱터턱턴털텀텃텅테텍텐텔템텝텨톈토톡톤톨톰톱톳통톺퇴투툭툰툴툼퉁튀튜트특튼튿틀틈티틱틴틸팀팁팃팅파팎판팔팜팝팟팡팥패팩팬팰팻팽퍼퍽펀펄펌페펙펜펠펩펫펭펴편펼평폐포폭폰폴폼퐁표푸푹푼풀품풋풍퓨퓰프픈플픔피픽핀필핌핏핑하학한할함합핫항핳해핵핸햄햇했행햐향허헌헐험헛헤헨헬헴헸혀혁현혈혐협혔형혜호혹혼홀홈홉홍화확환활황회획횐횟횡효후훈훌훔훤훨훼휀휘휙휠휴흄흉흐흑흔흘흙흠흡흥흩희흰히힌힐힘힝，．０１ＥＨＰＲＴａｋｌｍ'
parser.image_folder = 'deep-text-recognition-benchmark/demo_image'
# parser.saved_model = save_model_name
cudnn.benchmark = True
cudnn.deterministic = True

opt = parser


In [None]:
rec_load_befo_mem = torch.cuda.memory_allocated()/1024/1024
print("current_memory:", rec_load_befo_mem)

from utill import CTCLabelConverter, AttnLabelConverter
from ocrmodel import Model
from dataset import AlignCollate, RawDataset

#boxes, polys, ret_score_text

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

images, _ = cut_image(boxes, gray)

# parser.image_folder = images
# converter = AttnLabelConverter(opt.character)
converter = CTCLabelConverter(opt.character)
opt.num_class = len(converter.character)
# if opt.rgb:
#     opt.input_channel = 3

# model = Model(opt)
# model.load_state_dict(torch.load(opt.saved_model, map_location=device),strict=False)
# model.to(device)

# model.eval()

# rec_load_mem = torch.cuda.memory_allocated()/1024/1024
# print("trt_load_mem: %fMB"%(rec_load_mem-rec_load_befo_mem))

In [None]:
opt.num_class

In [None]:
torch.backends.cudnn.enabled = False

In [None]:
AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
demo_data = RawDataset(root=images, opt=opt)  # use RawDataset

#     if device=='cuda':
demo_loader = torch.utils.data.DataLoader(
    demo_data, batch_size=opt.batch_size,
    shuffle=False,
    num_workers=int(opt.workers),
    collate_fn=AlignCollate_demo, pin_memory=True)

# with torch.no_grad():
for image_tensors, image_path_list in demo_loader:
    batch_size = image_tensors.size(0)
    image = image_tensors.to(device)

    # For max length prediction
    length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
#     text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)
    

In [None]:
opt.saved_model = "./deep-text-recognition-benchmark/best_accuracy_noparell.pth"

In [None]:
model = Model(opt)
print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
      opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
      opt.SequenceModeling, opt.Prediction)
# model = torch.nn.DataParallel(model).to(device)
# model2 = model.module.to(device)
# load model
print('loading pretrained model from %s' % opt.saved_model)
model.load_state_dict(torch.load(opt.saved_model, map_location=device))

In [None]:
# opt.Transformation = 'TPS' 
opt.Transformation = 'None' 
opt.FeatureExtraction = 'ResNet' 
opt.SequenceModeling = 'BiLSTM' 
opt.Prediction = 'CTC'
# opt.Prediction = 'Attn'
# opt.image_folder = 'demo_image/'

In [None]:
model_c = Model(opt)

model_c.FeatureExtraction = model.FeatureExtraction

model_c.AdaptiveAvgPool = model.AdaptiveAvgPool
model_c.SequenceModeling = model.SequenceModeling
model_c.Prediction = model.Prediction.generator

# prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
# AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
# demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
# demo_loader = torch.utils.data.DataLoader(
#     demo_data, batch_size=opt.batch_size,
#     shuffle=False,
#     num_workers=int(opt.workers),
#     collate_fn=AlignCollate_demo, pin_memory=True)

# predict
model_c.eval()
with torch.no_grad():
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        image = image_tensors.to(device)
        # For max length prediction
        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

        if 'CTC' in opt.Prediction:
            preds = model_c(image, text_for_pred)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.max(2)
            # preds_index = preds_index.view(-1)
            preds_str = converter.decode(preds_index, preds_size)

        else:
            preds = model_c(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)


        log = open(f'./log_demo_result.txt', 'a')
        dashed_line = '-' * 80
        head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

        print(f'{dashed_line}\n{head}\n{dashed_line}')
        log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

        preds_prob = F.softmax(preds, dim=2)
        preds_max_prob, _ = preds_prob.max(dim=2)
        for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob):
            if 'Attn' in opt.Prediction:
                pred_EOS = pred.find('[s]')
                pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

            # calculate confidence score (= multiply of pred_max_prob)
            confidence_score = pred_max_prob.cumprod(dim=0)[-1]

In [None]:
import tensorrt as trt
import numpy as np
import os

import pycuda.driver as cuda
import pycuda.autoinit



class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


class TrtModel:
    
    def __init__(self,engine_path,max_batch_size=1,dtype=np.float32, batch_size=1):
        
        self.engine_path = engine_path
        self.dtype = dtype
        self.logger = trt.Logger(trt.Logger.WARNING)
#         self.logger = trt.Logger()
        self.batch_size = batch_size
        self.runtime = trt.Runtime(self.logger)
        self.engine = self.load_engine(self.runtime, self.engine_path)
        self.max_batch_size = max_batch_size
        self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
        self.context = self.engine.create_execution_context()
                
    
    @staticmethod
    def load_engine(trt_runtime, engine_path):
        trt.init_libnvinfer_plugins(None, "")             
        with open(engine_path, 'rb') as f:
            engine_data = f.read()
        engine = trt_runtime.deserialize_cuda_engine(engine_data)
        return engine
    
    def allocate_buffers(self):
        
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        
        for binding in self.engine:
            dims = self.engine.get_binding_shape(binding)
            dims[0] = self.batch_size
            
            size = trt.volume(dims)
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            
            bindings.append(int(device_mem))

            if self.engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        
        return inputs, outputs, bindings, stream
        
        
    def __call__(self, x:np.ndarray):
        
        x = x.astype(self.dtype)
#         batch_size = x.shape[0]
#         print(batch_size)
#         self.inputs[0].host = self.inputs[0].host * batch_size
#         print(self.inputs[0].host.shape, x.ravel().shape)

        np.copyto(self.inputs[0].host,x.ravel()) # 두번쨰 인자를 첫번쨰 인자에다 복사
        
        self.context.set_binding_shape(0, x.shape)
        
        for inp in self.inputs:
            cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
        
        self.context.execute_async(batch_size=self.batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
        
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out.host, out.device, self.stream) 
        
        self.stream.synchronize()
        
        bs = self.context.get_binding_shape(1)
        
        y_out = self.outputs[-1].host

        y1 =  y_out[0:np.array(bs).prod()].reshape(bs)
        
        return y1


In [None]:
data = np.array(image.detach().cpu())
data.shape

In [None]:
trt_engine_path = './deep-text-recognition-benchmark/weights/ocr.engine'
batch_size = data.shape[0]
model = TrtModel(trt_engine_path, batch_size=batch_size)
shape = model.engine.get_binding_shape(0) # shape 0번째는 인풋 1 의 shape, 1번쨰는 아웃풋 1의 shape
print(shape)

In [None]:
batch_size

In [None]:
print(model.engine.get_binding_shape(1))

In [None]:
result = model(data)

In [None]:
preds = torch.tensor(result)
preds.shape

In [None]:
preds_size = torch.IntTensor([preds.size(1)] * batch_size)
_, preds_index = preds.max(2)
preds_index = preds_index.view(-1)
preds_str = converter.decode(preds_index.data, preds_size.data)
preds_str

In [None]:
import torch.nn.functional as F

preds_size = torch.IntTensor([preds.size(1)] * batch_size)
_, preds_index = preds.max(2)
preds_index = preds_index.view(-1)

preds_str = []
for index, l in enumerate(preds_size.data):

    t = preds_index.data[index:]
    
    char_list = []
    for i in range(l):
        if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):  # removing repeated characters and blank.
            char_list.append(opt.character[t[i]])
    text = ''.join(char_list)

    preds_str.append(text)


preds_prob = F.softmax(preds, dim=2)
preds_max_prob, _ = preds_prob.max(dim=2)

result_list = []
for pred, pred_max_prob in zip(preds_str, preds_max_prob):
#     if 'Attn' in opt.Prediction:
#         pred_EOS = pred.find('[s]')
#         pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
#         pred_max_prob = pred_max_prob[:pred_EOS]

    # calculate confidence score (= multiply of pred_max_prob)
    try:
        confidence_score = pred_max_prob.cumprod(dim=0)[-1]
    except:
        confidence_score  = 0
    result_list.append([pred, float(confidence_score)])

In [None]:
preds_str

In [None]:
import tensorrt as trt

TRT_LOGGER = trt.Logger()

with open('./deep-text-recognition-benchmark/weights/ocr.engine', "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine =  runtime.deserialize_cuda_engine(f.read())

In [None]:
class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

In [None]:
inputs = []
outputs = []
bindings = []

for binding in engine:
    dims = engine.get_binding_shape(binding)
    dims[0] = batch_size
#     if dims[-1] == -1:
#         dims[-2],dims[-1] = input_shape
#     else:
#         dims[-3],dims[-2] = int(input_shape[0]/2), int(input_shape[1]/2)
        
    size = trt.volume(dims) * engine.max_batch_size
    dtype = trt.nptype(engine.get_binding_dtype(binding))
    
    host_mem = cuda.pagelocked_empty(size, dtype)
    device_mem = cuda.mem_alloc(host_mem.nbytes)
    
    bindings.append(int(device_mem))
    
    if engine.binding_is_input(binding):#Determine whether a binding is an input binding.
        inputs.append(HostDeviceMem(host_mem, device_mem))
    else:
        outputs.append(HostDeviceMem(host_mem, device_mem))

In [None]:
segment_inputs, segment_outputs, segment_bindings = inputs, outputs, bindings

stream = cuda.Stream()

In [None]:
# with engine.create_execution_context() as context:
context = engine.create_execution_context()
context.active_optimization_profile = 0
origin_inputshape=context.get_binding_shape(0)

if (origin_inputshape[-1]==-1):
    origin_inputshape[-2], origin_inputshape[-1]=(input_shape)
    context.set_binding_shape(0,(origin_inputshape))

else:
    origin_inputshape[0] = batch_size
    context.set_binding_shape(0,(origin_inputshape))

# input_img_array = np.array(image * engine.max_batch_size)
input_img_array = np.array(image.detach().cpu() * engine.max_batch_size)
# print(input_img_array.shape)
# img = torch.from_numpy(input_img_array).float()

segment_inputs[0].host = input_img_array

[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in segment_inputs] 
stream.synchronize()

context.execute_async_v2(bindings=segment_bindings, stream_handle=stream.handle)
stream.synchronize()

[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in segment_outputs]
stream.synchronize()

bs = context.get_binding_shape(1)

y_out = segment_outputs[-1].host

y1 =  y_out[0:np.array(bs).prod()].reshape(bs)

In [None]:
preds = torch.tensor(y1)

In [None]:
preds = torch.tensor(y1) * 255

In [None]:
import torch.nn.functional as F

preds_size = torch.IntTensor([preds.size(1)] * batch_size)
_, preds_index = preds.max(2)
preds_index = preds_index.view(-1)

preds_str = []
for index, l in enumerate(preds_size.data):

    t = preds_index.data[index:]

    char_list = []
    for i in range(l):
        if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):  # removing repeated characters and blank.
            char_list.append(opt.character[t[i]])
    text = ''.join(char_list)

    preds_str.append(text)


preds_prob = F.softmax(preds, dim=2)
preds_max_prob, _ = preds_prob.max(dim=2)

result_list = []
for pred, pred_max_prob in zip(preds_str, preds_max_prob):
#     if 'Attn' in opt.Prediction:
#         pred_EOS = pred.find('[s]')
#         pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
#         pred_max_prob = pred_max_prob[:pred_EOS]

    # calculate confidence score (= multiply of pred_max_prob)
    try:
        confidence_score = pred_max_prob.cumprod(dim=0)[-1]
    except:
        confidence_score  = 0
    result_list.append([pred, float(confidence_score)])

In [None]:
result_list